Java Tutorial/I18N/Collator

Материал из Java эксперт
Версия от 05:02, 1 июня 2010; Admin (обсуждение | вклад) (1 версия)
(разн.) ← Предыдущая | Текущая версия (разн.) | Следующая → (разн.)
Перейти к: навигация, поиск

Check Equality for two strings with Collator

import java.text.Collator;
import java.util.Locale;
public class Main {
  public static void main(String args[]) {
    String s1 = "é";
    String s2 = "�";
    Collator frCollator = Collator.getInstance(Locale.FRANCE);
    frCollator.setStrength(Collator.SECONDARY);
    if (frCollator.rupare(s1, s2) == 0) {
      System.out.println("s1 = s2");
    }
  }
}
//s1 = s2





CollationKey and Collator.getInstance

/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.text.CollationKey;
import java.text.Collator;
import java.util.Locale;
public class KeysDemo {
  public static void sortArray(CollationKey[] keys) {
    CollationKey tmp;
    for (int i = 0; i < keys.length; i++) {
      for (int j = i + 1; j < keys.length; j++) {
        // Compare the keys
        if (keys[i].rupareTo(keys[j]) > 0) {
          // Swap keys[i] and keys[j]
          tmp = keys[i];
          keys[i] = keys[j];
          keys[j] = tmp;
        }
      }
    }
  }
  static void displayWords(CollationKey[] keys) {
    for (int i = 0; i < keys.length; i++) {
      System.out.println(keys[i].getSourceString());
    }
  }
  static public void main(String[] args) {
    Collator enUSCollator = Collator.getInstance(new Locale("en", "US"));
    String[] words = { "peach", "apricot", "grape", "lemon" };
    CollationKey[] keys = new CollationKey[words.length];
    for (int k = 0; k < keys.length; k++) {
      keys[k] = enUSCollator.getCollationKey(words[k]);
    }
    sortArray(keys);
    displayWords(keys);
  }
}





Collator based compare

/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.text.Collator;
import java.util.Locale;
public class CollatorDemo {
  public static void sortStrings(Collator collator, String[] words) {
    String tmp;
    for (int i = 0; i < words.length; i++) {
      for (int j = i + 1; j < words.length; j++) {
        // Compare elements of the array two at a time.
        if (collator.rupare(words[i], words[j]) > 0) {
          // Swap words[i] and words[j]
          tmp = words[i];
          words[i] = words[j];
          words[j] = tmp;
        }
      }
    }
  }
  public static void printStrings(String[] words) {
    for (int i = 0; i < words.length; i++) {
      System.out.println(words[i]);
    }
  }
  public static void testCompare() {
    Collator myCollator = Collator.getInstance(new Locale("en", "US"));
    System.out.println(myCollator.rupare("abc", "def"));
    System.out.println(myCollator.rupare("rtf", "rtf"));
    System.out.println(myCollator.rupare("xyz", "abc"));
  }
  static public void main(String[] args) {
    testCompare();
    System.out.println();
    Collator fr_FRCollator = Collator.getInstance(new Locale("fr", "FR"));
    Collator en_USCollator = Collator.getInstance(new Locale("en", "US"));
    String eWithCircumflex = new String("\u00EA");
    String eWithAcute = new String("\u00E9");
    String peachfr = "p" + eWithAcute + "ch" + eWithAcute;
    String sinfr = "p" + eWithCircumflex + "che";
    String[] words = { peachfr, sinfr, "peach", "sin" };
    sortStrings(fr_FRCollator, words);
    System.out.println("Locale: fr_FR");
    printStrings(words);
    System.out.println();
    sortStrings(en_USCollator, words);
    System.out.println("Locale: en_US");
    printStrings(words);
  }
}





Compare accentuated letters

import java.text.Collator;
import java.util.Locale;
public class Main {
  public static void main(String args[]) {
    String s1 = "&eacute;";
    String s2 = "f";
    Collator frCollator = Collator.getInstance(Locale.FRANCE);
    frCollator.setStrength(Collator.CANONICAL_DECOMPOSITION);
    if (frCollator.rupare(s1, s2) < 0) {
        System.out.println("s1 < s2");
    }
  }
}
//s1 < s2





Comparing Strings in a Locale-Independent Way

import java.text.Collator;
import java.util.Locale;
public class Main {
  public static void main(String[] argv) throws Exception {
    Collator collator = Collator.getInstance(Locale.CANADA);
    String aString1 = "";
    String aString2 = "";
    int compare = collator.rupare(aString1, aString2);
    if (compare < 0) {
      // aString1 < aString2
    } else if (compare > 1) {
      // aString1 > aString2
    } else {
      // aString1 = aString2
    }
  }
}





International friendly string comparison with case-order

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the  "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * $Id: StringComparable.java 468655 2006-10-28 07:12:06Z minchau $
 */

import java.util.Vector;
import java.text.Collator;
import java.text.RuleBasedCollator;
import java.text.CollationElementIterator;
import java.util.Locale;
import java.text.CollationKey;

/**
* International friendly string comparison with case-order
 * @author Igor Hersht, igorh@ca.ibm.ru
*/
public class StringComparable implements Comparable  {
    
     public final static int UNKNOWN_CASE = -1;
     public final static int UPPER_CASE = 1;
     public final static int LOWER_CASE = 2;
     
     private  String m_text;
     private  Locale m_locale;
     private RuleBasedCollator m_collator;
     private String m_caseOrder;
     private int m_mask = 0xFFFFFFFF; 
     
    public StringComparable(final String text, final Locale locale, final Collator collator, final String caseOrder){
         m_text =  text;
         m_locale = locale;
         m_collator = (RuleBasedCollator)collator;
         m_caseOrder = caseOrder;
         m_mask = getMask(m_collator.getStrength());
    }
  
   public final static Comparable getComparator( final String text, final Locale locale, final Collator collator, final String caseOrder){
       if((caseOrder == null) ||(caseOrder.length() == 0)){// no case-order specified
            return  ((RuleBasedCollator)collator).getCollationKey(text);
       }else{
            return new StringComparable(text, locale, collator, caseOrder);
       }       
   }
   
   public final String toString(){return m_text;}
   
   public int compareTo(Object o) {
   final String pattern = ((StringComparable)o).toString();
   if(m_text.equals(pattern)){//Code-point equals 
      return 0;
   }
   final int savedStrength = m_collator.getStrength(); 
   int comp = 0;
      // Is there difference more significant than case-order?     
     if(((savedStrength == Collator.PRIMARY) || (savedStrength == Collator.SECONDARY))){  
         comp = m_collator.rupare(m_text, pattern );     
     }else{// more than SECONDARY
         m_collator.setStrength(Collator.SECONDARY);
         comp = m_collator.rupare(m_text, pattern );
         m_collator.setStrength(savedStrength);
     }
     if(comp != 0){//Difference more significant than case-order 
        return comp ; 
     }      
        
      // No difference more significant than case-order.     
      // Find case difference
       comp = getCaseDiff(m_text, pattern);
       if(comp != 0){  
           return comp;
       }else{// No case differences. Less significant difference could exist 
            return m_collator.rupare(m_text, pattern );
       }      
  }
  
 
  private final int getCaseDiff (final String text, final String pattern){
     final int savedStrength = m_collator.getStrength();
     final int savedDecomposition = m_collator.getDecomposition();
     m_collator.setStrength(Collator.TERTIARY);// not to ignore case  
     m_collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION );// corresponds NDF
     
    final int diff[] =getFirstCaseDiff (text, pattern, m_locale);
    m_collator.setStrength(savedStrength);// restore
    m_collator.setDecomposition(savedDecomposition); //restore
    if(diff != null){  
       if((m_caseOrder).equals("upper-first")){
            if(diff[0] == UPPER_CASE){
                return -1;
            }else{
                return 1;
            }
       }else{// lower-first
            if(diff[0] == LOWER_CASE){
                return -1;
            }else{
                return 1;
            }
       }
   }else{// No case differences
        return 0;
   }
    
  }
  
   
    
  private final int[] getFirstCaseDiff(final String text, final String pattern, final Locale locale){
        
        final CollationElementIterator targIter = m_collator.getCollationElementIterator(text);
        final CollationElementIterator patIter = m_collator.getCollationElementIterator(pattern);  
        int startTarg = -1;
        int endTarg = -1;
        int startPatt = -1;
        int endPatt = -1;
        final int done = getElement(CollationElementIterator.NULLORDER);
        int patternElement = 0, targetElement = 0;
        boolean getPattern = true, getTarget = true;
        
        while (true) { 
            if (getPattern){
                 startPatt = patIter.getOffset();
                 patternElement = getElement(patIter.next());
                 endPatt = patIter.getOffset();
            }
            if ((getTarget)){               
                 startTarg  = targIter.getOffset(); 
                 targetElement   = getElement(targIter.next()); 
                 endTarg  = targIter.getOffset();
            }
            getTarget = getPattern = true;                          
            if ((patternElement == done) ||( targetElement == done)) {
                return null;                      
            } else if (targetElement == 0) {
              getPattern = false;           
            } else if (patternElement == 0) {
              getTarget = false;           
            } else if (targetElement != patternElement) {// mismatch
                if((startPatt < endPatt) && (startTarg < endTarg)){
                    final String  subText = text.substring(startTarg, endTarg);
                    final String  subPatt = pattern.substring(startPatt, endPatt);
                    final String  subTextUp = subText.toUpperCase(locale);
                    final String  subPattUp = subPatt.toUpperCase(locale);
                    if(m_collator.rupare(subTextUp, subPattUp) != 0){ // not case diffference
                        continue;
                    }
                    
                    int diff[] = {UNKNOWN_CASE, UNKNOWN_CASE};
                    if(m_collator.rupare(subText, subTextUp) == 0){
                        diff[0] = UPPER_CASE;
                    }else if(m_collator.rupare(subText, subText.toLowerCase(locale)) == 0){
                       diff[0] = LOWER_CASE; 
                    }
                    if(m_collator.rupare(subPatt, subPattUp) == 0){
                        diff[1] = UPPER_CASE;
                    }else if(m_collator.rupare(subPatt, subPatt.toLowerCase(locale)) == 0){
                       diff[1] = LOWER_CASE; 
                    }
                    
                    if(((diff[0] == UPPER_CASE) && ( diff[1] == LOWER_CASE)) ||
                       ((diff[0] == LOWER_CASE) && ( diff[1] == UPPER_CASE))){        
                        return diff;
                    }else{// not case diff
                      continue; 
                    }  
                }else{
                    continue; 
                }
                    
           }
        }
                           
  }
  
  
 // Return a mask for the part of the order we"re interested in
    private static final int getMask(final int strength) {
        switch (strength) {
            case Collator.PRIMARY:
                return 0xFFFF0000;
            case Collator.SECONDARY:
                return 0xFFFFFF00;
            default: 
                return 0xFFFFFFFF;
        }
    }
    //get collation element with given strength
    // from the element with max strength
  private final int getElement(int maxStrengthElement){
    
    return (maxStrengthElement & m_mask);
  }  
}//StringComparable





Sort string array with Collator

import java.io.BufferedWriter;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.text.Collator;
public class Main {
  public static void main(String args[]) throws Exception {
    String[] words = { "&eacute;", "e", "a", "c" };
    Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "Cp850"));
    for (int i = 0; i < 4; i++) {
      w.write(words[i] + " ");
    }
    sortArray(Collator.getInstance(), words);
    for (int i = 0; i < 4; i++) {
      w.write(words[i] + " ");
    }
    w.flush();
    w.close();
  }
  public static void sortArray(Collator collator, String[] strArray) {
    String tmp;
    if (strArray.length == 1)
      return;
    for (int i = 0; i < strArray.length; i++) {
      for (int j = i + 1; j < strArray.length; j++) {
        if (collator.rupare(strArray[i], strArray[j]) > 0) {
          tmp = strArray[i];
          strArray[i] = strArray[j];
          strArray[j] = tmp;
        }
      }
    }
  }
}





Sort strings using Collator class

import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class Main {
  public static void main(String[] args) {
    List<String> fruits = new ArrayList<String>();
    fruits.add("A");
    fruits.add("&eacute;");
    fruits.add("C");
    fruits.add("D");
    fruits.add("A");
    Collator collator = Collator.getInstance(Locale.US);
    Collections.sort(fruits, collator);
    for (int i = 0; i < fruits.size(); i++) {
      String fruit = fruits.get(i);
      System.out.println("Fruit = " + fruit);
    }
  }
}





Use RuleBasedCollator

/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.text.Collator;
import java.text.ParseException;
import java.text.RuleBasedCollator;
public class RulesDemo {
  public static void sortStrings(Collator collator, String[] words) {
    String tmp;
    for (int i = 0; i < words.length; i++) {
      for (int j = i + 1; j < words.length; j++) {
        // Compare elements of the words array
        if (collator.rupare(words[i], words[j]) > 0) {
          // Swap words[i] and words[j]
          tmp = words[i];
          words[i] = words[j];
          words[j] = tmp;
        }
      }
    }
  }
  public static void printStrings(String[] words) {
    for (int i = 0; i < words.length; i++) {
      System.out.println(words[i]);
    }
  }
  static public void main(String[] args) {
    String englishRules = ("< a,A < b,B < c,C < d,D < e,E < f,F "
        + "< g,G < h,H < i,I < j,J < k,K < l,L "
        + "< m,M < n,N < o,O < p,P < q,Q < r,R "
        + "< s,S < t,T < u,U < v,V < w,W < x,X " + "< y,Y < z,Z");
    String smallnTilde = new String("\u00F1");
    String capitalNTilde = new String("\u00D1");
    String traditionalSpanishRules = ("< a,A < b,B < c,C "
        + "< ch, cH, Ch, CH " + "< d,D < e,E < f,F "
        + "< g,G < h,H < i,I < j,J < k,K < l,L " + "< ll, lL, Ll, LL "
        + "< m,M < n,N " + "< " + smallnTilde + "," + capitalNTilde + " "
        + "< o,O < p,P < q,Q < r,R " + "< s,S < t,T < u,U < v,V < w,W < x,X " + "< y,Y < z,Z");
    String[] words = { "luz", "curioso", "llama", "chalina" };
    try {
      RuleBasedCollator enCollator = new RuleBasedCollator(englishRules);
      RuleBasedCollator spCollator = new RuleBasedCollator(
          traditionalSpanishRules);
      sortStrings(enCollator, words);
      printStrings(words);
      System.out.println();
      sortStrings(spCollator, words);
      printStrings(words);
    } catch (ParseException pe) {
      System.out.println("Parse exception for rules");
    }
  }
}