Java Tutorial/I18N/BreakIterator

Материал из Java эксперт
Перейти к: навигация, поиск

Determining Potential Line Breaks in a Unicode String

   <source lang="java">

import java.text.BreakIterator; import java.util.Locale; public class Main {

 public static void main(String[] argv) throws Exception {
   BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA);
   iterator.setText("line1\nline2");
   for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
     System.out.println(index);
   }
 }

}</source>





Determining the Character Boundaries in a Unicode String

   <source lang="java">

import java.text.BreakIterator; import java.util.Locale; public class Main {

 public static void main(String[] argv) throws Exception {
   BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA);
   iterator.setText("aString");
   for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
     System.out.println(index);
   }
 }

}</source>





Determining the Sentence Boundaries in a Unicode String

   <source lang="java">

import java.text.BreakIterator; import java.util.Locale; public class Main {

 public static void main(String[] argv) throws Exception {
   BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA);
   iterator.setText("this is a test.");
   for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
     System.out.println(index);
   }
 }

}</source>





Determining the Word Boundaries in a Unicode String

   <source lang="java">

import java.text.BreakIterator; import java.util.Locale; public class Main {

 public static void main(String[] argv) throws Exception {
   BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA);
   iterator.setText("a sentence");
   for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
     System.out.println(index);
   }
 }

}</source>





Word Wrap

   <source lang="java">

/**

* $Revision: 10205 $
* $Date: 2008-04-11 15:48:27 -0700 (Fri, 11 Apr 2008) $
*
* Copyright (C) 2004-2008 Jive Software. All rights reserved.
*
* This software is published under the terms of the GNU Public License (GPL),
* a copy of which is included in this distribution, or a commercial license
* agreement with Jive.
*/

import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.BreakIterator; import java.util.*; import java.util.concurrent.ConcurrentHashMap; /**

* Utility class to peform common String manipulation algorithms.
*/

public class StringUtils {

   // Constants used by escapeHTMLTags
   private static final char[] QUOTE_ENCODE = """.toCharArray();
   private static final char[] AMP_ENCODE = "&".toCharArray();
   private static final char[] LT_ENCODE = "<".toCharArray();
   private static final char[] GT_ENCODE = ">".toCharArray();
   private StringUtils() {
       // Not instantiable.
   }
   /**
    * Reformats a string where lines that are longer than width
    * are split apart at the earliest wordbreak or at maxLength, whichever is
    * sooner. If the width specified is less than 5 or greater than the input
    * Strings length the string will be returned as is.
    * <p/>
    * Please note that this method can be lossy - trailing spaces on wrapped
    * lines may be trimmed.
    *
    * @param input the String to reformat.
    * @param width the maximum length of any one line.
    * @return a new String with reformatted as needed.
    */
   public static String wordWrap(String input, int width, Locale locale) {
       // protect ourselves
       if (input == null) {
           return "";
       }
       else if (width < 5) {
           return input;
       }
       else if (width >= input.length()) {
           return input;
       }
 
       StringBuilder buf = new StringBuilder(input);
       boolean endOfLine = false;
       int lineStart = 0;
       for (int i = 0; i < buf.length(); i++) {
           if (buf.charAt(i) == "\n") {
               lineStart = i + 1;
               endOfLine = true;
           }
           // handle splitting at width character
           if (i > lineStart + width - 1) {
               if (!endOfLine) {
                   int limit = i - lineStart - 1;
                   BreakIterator breaks = BreakIterator.getLineInstance(locale);
                   breaks.setText(buf.substring(lineStart, i));
                   int end = breaks.last();
                   // if the last character in the search string isn"t a space,
                   // we can"t split on it (looks bad). Search for a previous
                   // break character
                   if (end == limit + 1) {
                       if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                           end = breaks.preceding(end - 1);
                       }
                   }
                   // if the last character is a space, replace it with a \n
                   if (end != BreakIterator.DONE && end == limit + 1) {
                       buf.replace(lineStart + end, lineStart + end + 1, "\n");
                       lineStart = lineStart + end;
                   }
                   // otherwise, just insert a \n
                   else if (end != BreakIterator.DONE && end != 0) {
                       buf.insert(lineStart + end, "\n");
                       lineStart = lineStart + end + 1;
                   }
                   else {
                       buf.insert(i, "\n");
                       lineStart = i + 1;
                   }
               }
               else {
                   buf.insert(i, "\n");
                   lineStart = i + 1;
                   endOfLine = false;
               }
           }
       }
       return buf.toString();
   }

}</source>





Wrap multi-line strings (and get the individual lines)

   <source lang="java">

/*

* $Id: Utilities.java,v 1.11 2008/10/14 22:31:46 rah003 Exp $
*
* Copyright 2006 Sun Microsystems, Inc., 4150 Network Circle,
* Santa Clara, California 95054, U.S.A. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/

import java.text.BreakIterator; import java.util.StringTokenizer; /**

* Contribution from NetBeans: Issue #319-swingx.
* 
* 
* PENDING: need to reconcile with OS, JVM... added as-is because needed the
* shortcut handling to fix #
* 
* @author apple
*/

public class Utils {

 /**
  * Wrap multi-line strings (and get the individual lines).
  * 
  * @param original
  *          the original string to wrap
  * @param width
  *          the maximum width of lines
  * @param breakIterator
  *          breaks original to chars, words, sentences, depending on what
  *          instance you provide.
  * @param removeNewLines
  *          if true, any newlines in the original string are
  *          ignored
  * @return the lines after wrapping
  */
 public static String[] wrapStringToArray(String original, int width, BreakIterator breakIterator,
     boolean removeNewLines) {
   if (original.length() == 0) {
     return new String[] { original };
   }
   String[] workingSet;
   // substitute original newlines with spaces,
   // remove newlines from head and tail
   if (removeNewLines) {
     original = trimString(original);
     original = original.replace("\n", " ");
     workingSet = new String[] { original };
   } else {
     StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
     int len = tokens.countTokens();
     workingSet = new String[len];
     for (int i = 0; i < len; i++) {
       workingSet[i] = tokens.nextToken();
     }
   }
   if (width < 1) {
     width = 1;
   }
   if (original.length() <= width) {
     return workingSet;
   }
   widthcheck: {
     boolean ok = true;
     for (int i = 0; i < workingSet.length; i++) {
       ok = ok && (workingSet[i].length() < width);
       if (!ok) {
         break widthcheck;
       }
     }
     return workingSet;
   }
   java.util.ArrayList<String> lines = new java.util.ArrayList<String>();
   int lineStart = 0; // the position of start of currently processed line in
                       // the original string
   for (int i = 0; i < workingSet.length; i++) {
     if (workingSet[i].length() < width) {
       lines.add(workingSet[i]);
     } else {
       breakIterator.setText(workingSet[i]);
       int nextStart = breakIterator.next();
       int prevStart = 0;
       do {
         while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
           prevStart = nextStart;
           nextStart = breakIterator.next();
         }
         if (nextStart == BreakIterator.DONE) {
           nextStart = prevStart = workingSet[i].length();
         }
         if (prevStart == 0) {
           prevStart = nextStart;
         }
         lines.add(workingSet[i].substring(lineStart, prevStart));
         lineStart = prevStart;
         prevStart = 0;
       } while (lineStart < workingSet[i].length());
       lineStart = 0;
     }
   }
   String[] s = new String[lines.size()];
   return (String[]) lines.toArray(s);
 }
 private static String trimString(String s) {
   int idx = 0;
   char c;
   final int slen = s.length();
   if (slen == 0) {
     return s;
   }
   do {
     c = s.charAt(idx++);
   } while (((c == "\n") || (c == "\r")) && (idx < slen));
   s = s.substring(--idx);
   idx = s.length() - 1;
   if (idx < 0) {
     return s;
   }
   do {
     c = s.charAt(idx--);
   } while (((c == "\n") || (c == "\r")) && (idx >= 0));
   return s.substring(0, idx + 2);
 }

}</source>