Java Tutorial/I18N/Collator
Версия от 17:44, 31 мая 2010; (обсуждение)
Содержание
- 1 Check Equality for two strings with Collator
- 2 CollationKey and Collator.getInstance
- 3 Collator based compare
- 4 Compare accentuated letters
- 5 Comparing Strings in a Locale-Independent Way
- 6 International friendly string comparison with case-order
- 7 Sort string array with Collator
- 8 Sort strings using Collator class
- 9 Use RuleBasedCollator
Check Equality for two strings with Collator
import java.text.Collator;
import java.util.Locale;
public class Main {
public static void main(String args[]) {
String s1 = "é";
String s2 = "�";
Collator frCollator = Collator.getInstance(Locale.FRANCE);
frCollator.setStrength(Collator.SECONDARY);
if (frCollator.rupare(s1, s2) == 0) {
System.out.println("s1 = s2");
}
}
}
//s1 = s2
CollationKey and Collator.getInstance
/*
* Copyright (c) 1995 - 2008 Sun Microsystems, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Sun Microsystems nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.text.CollationKey;
import java.text.Collator;
import java.util.Locale;
public class KeysDemo {
public static void sortArray(CollationKey[] keys) {
CollationKey tmp;
for (int i = 0; i < keys.length; i++) {
for (int j = i + 1; j < keys.length; j++) {
// Compare the keys
if (keys[i].rupareTo(keys[j]) > 0) {
// Swap keys[i] and keys[j]
tmp = keys[i];
keys[i] = keys[j];
keys[j] = tmp;
}
}
}
}
static void displayWords(CollationKey[] keys) {
for (int i = 0; i < keys.length; i++) {
System.out.println(keys[i].getSourceString());
}
}
static public void main(String[] args) {
Collator enUSCollator = Collator.getInstance(new Locale("en", "US"));
String[] words = { "peach", "apricot", "grape", "lemon" };
CollationKey[] keys = new CollationKey[words.length];
for (int k = 0; k < keys.length; k++) {
keys[k] = enUSCollator.getCollationKey(words[k]);
}
sortArray(keys);
displayWords(keys);
}
}
Collator based compare
/*
* Copyright (c) 1995 - 2008 Sun Microsystems, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Sun Microsystems nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.text.Collator;
import java.util.Locale;
public class CollatorDemo {
public static void sortStrings(Collator collator, String[] words) {
String tmp;
for (int i = 0; i < words.length; i++) {
for (int j = i + 1; j < words.length; j++) {
// Compare elements of the array two at a time.
if (collator.rupare(words[i], words[j]) > 0) {
// Swap words[i] and words[j]
tmp = words[i];
words[i] = words[j];
words[j] = tmp;
}
}
}
}
public static void printStrings(String[] words) {
for (int i = 0; i < words.length; i++) {
System.out.println(words[i]);
}
}
public static void testCompare() {
Collator myCollator = Collator.getInstance(new Locale("en", "US"));
System.out.println(myCollator.rupare("abc", "def"));
System.out.println(myCollator.rupare("rtf", "rtf"));
System.out.println(myCollator.rupare("xyz", "abc"));
}
static public void main(String[] args) {
testCompare();
System.out.println();
Collator fr_FRCollator = Collator.getInstance(new Locale("fr", "FR"));
Collator en_USCollator = Collator.getInstance(new Locale("en", "US"));
String eWithCircumflex = new String("\u00EA");
String eWithAcute = new String("\u00E9");
String peachfr = "p" + eWithAcute + "ch" + eWithAcute;
String sinfr = "p" + eWithCircumflex + "che";
String[] words = { peachfr, sinfr, "peach", "sin" };
sortStrings(fr_FRCollator, words);
System.out.println("Locale: fr_FR");
printStrings(words);
System.out.println();
sortStrings(en_USCollator, words);
System.out.println("Locale: en_US");
printStrings(words);
}
}
Compare accentuated letters
import java.text.Collator;
import java.util.Locale;
public class Main {
public static void main(String args[]) {
String s1 = "é";
String s2 = "f";
Collator frCollator = Collator.getInstance(Locale.FRANCE);
frCollator.setStrength(Collator.CANONICAL_DECOMPOSITION);
if (frCollator.rupare(s1, s2) < 0) {
System.out.println("s1 < s2");
}
}
}
//s1 < s2
Comparing Strings in a Locale-Independent Way
import java.text.Collator;
import java.util.Locale;
public class Main {
public static void main(String[] argv) throws Exception {
Collator collator = Collator.getInstance(Locale.CANADA);
String aString1 = "";
String aString2 = "";
int compare = collator.rupare(aString1, aString2);
if (compare < 0) {
// aString1 < aString2
} else if (compare > 1) {
// aString1 > aString2
} else {
// aString1 = aString2
}
}
}
International friendly string comparison with case-order
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: StringComparable.java 468655 2006-10-28 07:12:06Z minchau $
*/
import java.util.Vector;
import java.text.Collator;
import java.text.RuleBasedCollator;
import java.text.CollationElementIterator;
import java.util.Locale;
import java.text.CollationKey;
/**
* International friendly string comparison with case-order
* @author Igor Hersht, igorh@ca.ibm.ru
*/
public class StringComparable implements Comparable {
public final static int UNKNOWN_CASE = -1;
public final static int UPPER_CASE = 1;
public final static int LOWER_CASE = 2;
private String m_text;
private Locale m_locale;
private RuleBasedCollator m_collator;
private String m_caseOrder;
private int m_mask = 0xFFFFFFFF;
public StringComparable(final String text, final Locale locale, final Collator collator, final String caseOrder){
m_text = text;
m_locale = locale;
m_collator = (RuleBasedCollator)collator;
m_caseOrder = caseOrder;
m_mask = getMask(m_collator.getStrength());
}
public final static Comparable getComparator( final String text, final Locale locale, final Collator collator, final String caseOrder){
if((caseOrder == null) ||(caseOrder.length() == 0)){// no case-order specified
return ((RuleBasedCollator)collator).getCollationKey(text);
}else{
return new StringComparable(text, locale, collator, caseOrder);
}
}
public final String toString(){return m_text;}
public int compareTo(Object o) {
final String pattern = ((StringComparable)o).toString();
if(m_text.equals(pattern)){//Code-point equals
return 0;
}
final int savedStrength = m_collator.getStrength();
int comp = 0;
// Is there difference more significant than case-order?
if(((savedStrength == Collator.PRIMARY) || (savedStrength == Collator.SECONDARY))){
comp = m_collator.rupare(m_text, pattern );
}else{// more than SECONDARY
m_collator.setStrength(Collator.SECONDARY);
comp = m_collator.rupare(m_text, pattern );
m_collator.setStrength(savedStrength);
}
if(comp != 0){//Difference more significant than case-order
return comp ;
}
// No difference more significant than case-order.
// Find case difference
comp = getCaseDiff(m_text, pattern);
if(comp != 0){
return comp;
}else{// No case differences. Less significant difference could exist
return m_collator.rupare(m_text, pattern );
}
}
private final int getCaseDiff (final String text, final String pattern){
final int savedStrength = m_collator.getStrength();
final int savedDecomposition = m_collator.getDecomposition();
m_collator.setStrength(Collator.TERTIARY);// not to ignore case
m_collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION );// corresponds NDF
final int diff[] =getFirstCaseDiff (text, pattern, m_locale);
m_collator.setStrength(savedStrength);// restore
m_collator.setDecomposition(savedDecomposition); //restore
if(diff != null){
if((m_caseOrder).equals("upper-first")){
if(diff[0] == UPPER_CASE){
return -1;
}else{
return 1;
}
}else{// lower-first
if(diff[0] == LOWER_CASE){
return -1;
}else{
return 1;
}
}
}else{// No case differences
return 0;
}
}
private final int[] getFirstCaseDiff(final String text, final String pattern, final Locale locale){
final CollationElementIterator targIter = m_collator.getCollationElementIterator(text);
final CollationElementIterator patIter = m_collator.getCollationElementIterator(pattern);
int startTarg = -1;
int endTarg = -1;
int startPatt = -1;
int endPatt = -1;
final int done = getElement(CollationElementIterator.NULLORDER);
int patternElement = 0, targetElement = 0;
boolean getPattern = true, getTarget = true;
while (true) {
if (getPattern){
startPatt = patIter.getOffset();
patternElement = getElement(patIter.next());
endPatt = patIter.getOffset();
}
if ((getTarget)){
startTarg = targIter.getOffset();
targetElement = getElement(targIter.next());
endTarg = targIter.getOffset();
}
getTarget = getPattern = true;
if ((patternElement == done) ||( targetElement == done)) {
return null;
} else if (targetElement == 0) {
getPattern = false;
} else if (patternElement == 0) {
getTarget = false;
} else if (targetElement != patternElement) {// mismatch
if((startPatt < endPatt) && (startTarg < endTarg)){
final String subText = text.substring(startTarg, endTarg);
final String subPatt = pattern.substring(startPatt, endPatt);
final String subTextUp = subText.toUpperCase(locale);
final String subPattUp = subPatt.toUpperCase(locale);
if(m_collator.rupare(subTextUp, subPattUp) != 0){ // not case diffference
continue;
}
int diff[] = {UNKNOWN_CASE, UNKNOWN_CASE};
if(m_collator.rupare(subText, subTextUp) == 0){
diff[0] = UPPER_CASE;
}else if(m_collator.rupare(subText, subText.toLowerCase(locale)) == 0){
diff[0] = LOWER_CASE;
}
if(m_collator.rupare(subPatt, subPattUp) == 0){
diff[1] = UPPER_CASE;
}else if(m_collator.rupare(subPatt, subPatt.toLowerCase(locale)) == 0){
diff[1] = LOWER_CASE;
}
if(((diff[0] == UPPER_CASE) && ( diff[1] == LOWER_CASE)) ||
((diff[0] == LOWER_CASE) && ( diff[1] == UPPER_CASE))){
return diff;
}else{// not case diff
continue;
}
}else{
continue;
}
}
}
}
// Return a mask for the part of the order we"re interested in
private static final int getMask(final int strength) {
switch (strength) {
case Collator.PRIMARY:
return 0xFFFF0000;
case Collator.SECONDARY:
return 0xFFFFFF00;
default:
return 0xFFFFFFFF;
}
}
//get collation element with given strength
// from the element with max strength
private final int getElement(int maxStrengthElement){
return (maxStrengthElement & m_mask);
}
}//StringComparable
Sort string array with Collator
import java.io.BufferedWriter;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.Collator;
public class Main {
public static void main(String args[]) throws Exception {
String[] words = { "é", "e", "a", "c" };
Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "Cp850"));
for (int i = 0; i < 4; i++) {
w.write(words[i] + " ");
}
sortArray(Collator.getInstance(), words);
for (int i = 0; i < 4; i++) {
w.write(words[i] + " ");
}
w.flush();
w.close();
}
public static void sortArray(Collator collator, String[] strArray) {
String tmp;
if (strArray.length == 1)
return;
for (int i = 0; i < strArray.length; i++) {
for (int j = i + 1; j < strArray.length; j++) {
if (collator.rupare(strArray[i], strArray[j]) > 0) {
tmp = strArray[i];
strArray[i] = strArray[j];
strArray[j] = tmp;
}
}
}
}
}
Sort strings using Collator class
import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class Main {
public static void main(String[] args) {
List<String> fruits = new ArrayList<String>();
fruits.add("A");
fruits.add("é");
fruits.add("C");
fruits.add("D");
fruits.add("A");
Collator collator = Collator.getInstance(Locale.US);
Collections.sort(fruits, collator);
for (int i = 0; i < fruits.size(); i++) {
String fruit = fruits.get(i);
System.out.println("Fruit = " + fruit);
}
}
}
Use RuleBasedCollator
/*
* Copyright (c) 1995 - 2008 Sun Microsystems, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Sun Microsystems nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.text.Collator;
import java.text.ParseException;
import java.text.RuleBasedCollator;
public class RulesDemo {
public static void sortStrings(Collator collator, String[] words) {
String tmp;
for (int i = 0; i < words.length; i++) {
for (int j = i + 1; j < words.length; j++) {
// Compare elements of the words array
if (collator.rupare(words[i], words[j]) > 0) {
// Swap words[i] and words[j]
tmp = words[i];
words[i] = words[j];
words[j] = tmp;
}
}
}
}
public static void printStrings(String[] words) {
for (int i = 0; i < words.length; i++) {
System.out.println(words[i]);
}
}
static public void main(String[] args) {
String englishRules = ("< a,A < b,B < c,C < d,D < e,E < f,F "
+ "< g,G < h,H < i,I < j,J < k,K < l,L "
+ "< m,M < n,N < o,O < p,P < q,Q < r,R "
+ "< s,S < t,T < u,U < v,V < w,W < x,X " + "< y,Y < z,Z");
String smallnTilde = new String("\u00F1");
String capitalNTilde = new String("\u00D1");
String traditionalSpanishRules = ("< a,A < b,B < c,C "
+ "< ch, cH, Ch, CH " + "< d,D < e,E < f,F "
+ "< g,G < h,H < i,I < j,J < k,K < l,L " + "< ll, lL, Ll, LL "
+ "< m,M < n,N " + "< " + smallnTilde + "," + capitalNTilde + " "
+ "< o,O < p,P < q,Q < r,R " + "< s,S < t,T < u,U < v,V < w,W < x,X " + "< y,Y < z,Z");
String[] words = { "luz", "curioso", "llama", "chalina" };
try {
RuleBasedCollator enCollator = new RuleBasedCollator(englishRules);
RuleBasedCollator spCollator = new RuleBasedCollator(
traditionalSpanishRules);
sortStrings(enCollator, words);
printStrings(words);
System.out.println();
sortStrings(spCollator, words);
printStrings(words);
} catch (ParseException pe) {
System.out.println("Parse exception for rules");
}
}
}