Java Tutorial/I18N/BreakIterator

Материал из Java эксперт
Перейти к: навигация, поиск

Determining Potential Line Breaks in a Unicode String

import java.text.BreakIterator;
import java.util.Locale;
public class Main {
  public static void main(String[] argv) throws Exception {
    BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA);
    iterator.setText("line1\nline2");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
      System.out.println(index);
    }
  }
}





Determining the Character Boundaries in a Unicode String

import java.text.BreakIterator;
import java.util.Locale;
public class Main {
  public static void main(String[] argv) throws Exception {
    BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA);
    iterator.setText("aString");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
      System.out.println(index);
    }
  }
}





Determining the Sentence Boundaries in a Unicode String

import java.text.BreakIterator;
import java.util.Locale;
public class Main {
  public static void main(String[] argv) throws Exception {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA);
    iterator.setText("this is a test.");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
      System.out.println(index);
    }
  }
}





Determining the Word Boundaries in a Unicode String

import java.text.BreakIterator;
import java.util.Locale;
public class Main {
  public static void main(String[] argv) throws Exception {
    BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA);
    iterator.setText("a sentence");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
      System.out.println(index);
    }
  }
}





Word Wrap

/**
 * $Revision: 10205 $
 * $Date: 2008-04-11 15:48:27 -0700 (Fri, 11 Apr 2008) $
 *
 * Copyright (C) 2004-2008 Jive Software. All rights reserved.
 *
 * This software is published under the terms of the GNU Public License (GPL),
 * a copy of which is included in this distribution, or a commercial license
 * agreement with Jive.
 */
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.BreakIterator;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
 * Utility class to peform common String manipulation algorithms.
 */
public class StringUtils {
    // Constants used by escapeHTMLTags
    private static final char[] QUOTE_ENCODE = """.toCharArray();
    private static final char[] AMP_ENCODE = "&".toCharArray();
    private static final char[] LT_ENCODE = "<".toCharArray();
    private static final char[] GT_ENCODE = ">".toCharArray();
    private StringUtils() {
        // Not instantiable.
    }
    /**
     * Reformats a string where lines that are longer than <tt>width</tt>
     * are split apart at the earliest wordbreak or at maxLength, whichever is
     * sooner. If the width specified is less than 5 or greater than the input
     * Strings length the string will be returned as is.
     * <p/>
     * Please note that this method can be lossy - trailing spaces on wrapped
     * lines may be trimmed.
     *
     * @param input the String to reformat.
     * @param width the maximum length of any one line.
     * @return a new String with reformatted as needed.
     */
    public static String wordWrap(String input, int width, Locale locale) {
        // protect ourselves
        if (input == null) {
            return "";
        }
        else if (width < 5) {
            return input;
        }
        else if (width >= input.length()) {
            return input;
        }
  
        StringBuilder buf = new StringBuilder(input);
        boolean endOfLine = false;
        int lineStart = 0;
        for (int i = 0; i < buf.length(); i++) {
            if (buf.charAt(i) == "\n") {
                lineStart = i + 1;
                endOfLine = true;
            }
            // handle splitting at width character
            if (i > lineStart + width - 1) {
                if (!endOfLine) {
                    int limit = i - lineStart - 1;
                    BreakIterator breaks = BreakIterator.getLineInstance(locale);
                    breaks.setText(buf.substring(lineStart, i));
                    int end = breaks.last();
                    // if the last character in the search string isn"t a space,
                    // we can"t split on it (looks bad). Search for a previous
                    // break character
                    if (end == limit + 1) {
                        if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                            end = breaks.preceding(end - 1);
                        }
                    }
                    // if the last character is a space, replace it with a \n
                    if (end != BreakIterator.DONE && end == limit + 1) {
                        buf.replace(lineStart + end, lineStart + end + 1, "\n");
                        lineStart = lineStart + end;
                    }
                    // otherwise, just insert a \n
                    else if (end != BreakIterator.DONE && end != 0) {
                        buf.insert(lineStart + end, "\n");
                        lineStart = lineStart + end + 1;
                    }
                    else {
                        buf.insert(i, "\n");
                        lineStart = i + 1;
                    }
                }
                else {
                    buf.insert(i, "\n");
                    lineStart = i + 1;
                    endOfLine = false;
                }
            }
        }
        return buf.toString();
    }
}





Wrap multi-line strings (and get the individual lines)

/*
 * $Id: Utilities.java,v 1.11 2008/10/14 22:31:46 rah003 Exp $
 *
 * Copyright 2006 Sun Microsystems, Inc., 4150 Network Circle,
 * Santa Clara, California 95054, U.S.A. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
import java.text.BreakIterator;
import java.util.StringTokenizer;
/**
 * Contribution from NetBeans: Issue #319-swingx.
 * 
 * 
 * PENDING: need to reconcile with OS, JVM... added as-is because needed the
 * shortcut handling to fix #
 * 
 * @author apple
 */
public class Utils {
  /**
   * Wrap multi-line strings (and get the individual lines).
   * 
   * @param original
   *          the original string to wrap
   * @param width
   *          the maximum width of lines
   * @param breakIterator
   *          breaks original to chars, words, sentences, depending on what
   *          instance you provide.
   * @param removeNewLines
   *          if <code>true</code>, any newlines in the original string are
   *          ignored
   * @return the lines after wrapping
   */
  public static String[] wrapStringToArray(String original, int width, BreakIterator breakIterator,
      boolean removeNewLines) {
    if (original.length() == 0) {
      return new String[] { original };
    }
    String[] workingSet;
    // substitute original newlines with spaces,
    // remove newlines from head and tail
    if (removeNewLines) {
      original = trimString(original);
      original = original.replace("\n", " ");
      workingSet = new String[] { original };
    } else {
      StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
      int len = tokens.countTokens();
      workingSet = new String[len];
      for (int i = 0; i < len; i++) {
        workingSet[i] = tokens.nextToken();
      }
    }
    if (width < 1) {
      width = 1;
    }
    if (original.length() <= width) {
      return workingSet;
    }
    widthcheck: {
      boolean ok = true;
      for (int i = 0; i < workingSet.length; i++) {
        ok = ok && (workingSet[i].length() < width);
        if (!ok) {
          break widthcheck;
        }
      }
      return workingSet;
    }
    java.util.ArrayList<String> lines = new java.util.ArrayList<String>();
    int lineStart = 0; // the position of start of currently processed line in
                        // the original string
    for (int i = 0; i < workingSet.length; i++) {
      if (workingSet[i].length() < width) {
        lines.add(workingSet[i]);
      } else {
        breakIterator.setText(workingSet[i]);
        int nextStart = breakIterator.next();
        int prevStart = 0;
        do {
          while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
            prevStart = nextStart;
            nextStart = breakIterator.next();
          }
          if (nextStart == BreakIterator.DONE) {
            nextStart = prevStart = workingSet[i].length();
          }
          if (prevStart == 0) {
            prevStart = nextStart;
          }
          lines.add(workingSet[i].substring(lineStart, prevStart));
          lineStart = prevStart;
          prevStart = 0;
        } while (lineStart < workingSet[i].length());
        lineStart = 0;
      }
    }
    String[] s = new String[lines.size()];
    return (String[]) lines.toArray(s);
  }
  private static String trimString(String s) {
    int idx = 0;
    char c;
    final int slen = s.length();
    if (slen == 0) {
      return s;
    }
    do {
      c = s.charAt(idx++);
    } while (((c == "\n") || (c == "\r")) && (idx < slen));
    s = s.substring(--idx);
    idx = s.length() - 1;
    if (idx < 0) {
      return s;
    }
    do {
      c = s.charAt(idx--);
    } while (((c == "\n") || (c == "\r")) && (idx >= 0));
    return s.substring(0, idx + 2);
  }
}