Java Tutorial/I18N/BreakIterator
Содержание
Determining Potential Line Breaks in a Unicode String
import java.text.BreakIterator;
import java.util.Locale;
public class Main {
public static void main(String[] argv) throws Exception {
BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA);
iterator.setText("line1\nline2");
for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
System.out.println(index);
}
}
}
Determining the Character Boundaries in a Unicode String
import java.text.BreakIterator;
import java.util.Locale;
public class Main {
public static void main(String[] argv) throws Exception {
BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA);
iterator.setText("aString");
for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
System.out.println(index);
}
}
}
Determining the Sentence Boundaries in a Unicode String
import java.text.BreakIterator;
import java.util.Locale;
public class Main {
public static void main(String[] argv) throws Exception {
BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA);
iterator.setText("this is a test.");
for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
System.out.println(index);
}
}
}
Determining the Word Boundaries in a Unicode String
import java.text.BreakIterator;
import java.util.Locale;
public class Main {
public static void main(String[] argv) throws Exception {
BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA);
iterator.setText("a sentence");
for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
System.out.println(index);
}
}
}
Word Wrap
/**
* $Revision: 10205 $
* $Date: 2008-04-11 15:48:27 -0700 (Fri, 11 Apr 2008) $
*
* Copyright (C) 2004-2008 Jive Software. All rights reserved.
*
* This software is published under the terms of the GNU Public License (GPL),
* a copy of which is included in this distribution, or a commercial license
* agreement with Jive.
*/
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.BreakIterator;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
* Utility class to peform common String manipulation algorithms.
*/
public class StringUtils {
// Constants used by escapeHTMLTags
private static final char[] QUOTE_ENCODE = """.toCharArray();
private static final char[] AMP_ENCODE = "&".toCharArray();
private static final char[] LT_ENCODE = "<".toCharArray();
private static final char[] GT_ENCODE = ">".toCharArray();
private StringUtils() {
// Not instantiable.
}
/**
* Reformats a string where lines that are longer than <tt>width</tt>
* are split apart at the earliest wordbreak or at maxLength, whichever is
* sooner. If the width specified is less than 5 or greater than the input
* Strings length the string will be returned as is.
* <p/>
* Please note that this method can be lossy - trailing spaces on wrapped
* lines may be trimmed.
*
* @param input the String to reformat.
* @param width the maximum length of any one line.
* @return a new String with reformatted as needed.
*/
public static String wordWrap(String input, int width, Locale locale) {
// protect ourselves
if (input == null) {
return "";
}
else if (width < 5) {
return input;
}
else if (width >= input.length()) {
return input;
}
StringBuilder buf = new StringBuilder(input);
boolean endOfLine = false;
int lineStart = 0;
for (int i = 0; i < buf.length(); i++) {
if (buf.charAt(i) == "\n") {
lineStart = i + 1;
endOfLine = true;
}
// handle splitting at width character
if (i > lineStart + width - 1) {
if (!endOfLine) {
int limit = i - lineStart - 1;
BreakIterator breaks = BreakIterator.getLineInstance(locale);
breaks.setText(buf.substring(lineStart, i));
int end = breaks.last();
// if the last character in the search string isn"t a space,
// we can"t split on it (looks bad). Search for a previous
// break character
if (end == limit + 1) {
if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
end = breaks.preceding(end - 1);
}
}
// if the last character is a space, replace it with a \n
if (end != BreakIterator.DONE && end == limit + 1) {
buf.replace(lineStart + end, lineStart + end + 1, "\n");
lineStart = lineStart + end;
}
// otherwise, just insert a \n
else if (end != BreakIterator.DONE && end != 0) {
buf.insert(lineStart + end, "\n");
lineStart = lineStart + end + 1;
}
else {
buf.insert(i, "\n");
lineStart = i + 1;
}
}
else {
buf.insert(i, "\n");
lineStart = i + 1;
endOfLine = false;
}
}
}
return buf.toString();
}
}
Wrap multi-line strings (and get the individual lines)
/*
* $Id: Utilities.java,v 1.11 2008/10/14 22:31:46 rah003 Exp $
*
* Copyright 2006 Sun Microsystems, Inc., 4150 Network Circle,
* Santa Clara, California 95054, U.S.A. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
import java.text.BreakIterator;
import java.util.StringTokenizer;
/**
* Contribution from NetBeans: Issue #319-swingx.
*
*
* PENDING: need to reconcile with OS, JVM... added as-is because needed the
* shortcut handling to fix #
*
* @author apple
*/
public class Utils {
/**
* Wrap multi-line strings (and get the individual lines).
*
* @param original
* the original string to wrap
* @param width
* the maximum width of lines
* @param breakIterator
* breaks original to chars, words, sentences, depending on what
* instance you provide.
* @param removeNewLines
* if <code>true</code>, any newlines in the original string are
* ignored
* @return the lines after wrapping
*/
public static String[] wrapStringToArray(String original, int width, BreakIterator breakIterator,
boolean removeNewLines) {
if (original.length() == 0) {
return new String[] { original };
}
String[] workingSet;
// substitute original newlines with spaces,
// remove newlines from head and tail
if (removeNewLines) {
original = trimString(original);
original = original.replace("\n", " ");
workingSet = new String[] { original };
} else {
StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
int len = tokens.countTokens();
workingSet = new String[len];
for (int i = 0; i < len; i++) {
workingSet[i] = tokens.nextToken();
}
}
if (width < 1) {
width = 1;
}
if (original.length() <= width) {
return workingSet;
}
widthcheck: {
boolean ok = true;
for (int i = 0; i < workingSet.length; i++) {
ok = ok && (workingSet[i].length() < width);
if (!ok) {
break widthcheck;
}
}
return workingSet;
}
java.util.ArrayList<String> lines = new java.util.ArrayList<String>();
int lineStart = 0; // the position of start of currently processed line in
// the original string
for (int i = 0; i < workingSet.length; i++) {
if (workingSet[i].length() < width) {
lines.add(workingSet[i]);
} else {
breakIterator.setText(workingSet[i]);
int nextStart = breakIterator.next();
int prevStart = 0;
do {
while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
prevStart = nextStart;
nextStart = breakIterator.next();
}
if (nextStart == BreakIterator.DONE) {
nextStart = prevStart = workingSet[i].length();
}
if (prevStart == 0) {
prevStart = nextStart;
}
lines.add(workingSet[i].substring(lineStart, prevStart));
lineStart = prevStart;
prevStart = 0;
} while (lineStart < workingSet[i].length());
lineStart = 0;
}
}
String[] s = new String[lines.size()];
return (String[]) lines.toArray(s);
}
private static String trimString(String s) {
int idx = 0;
char c;
final int slen = s.length();
if (slen == 0) {
return s;
}
do {
c = s.charAt(idx++);
} while (((c == "\n") || (c == "\r")) && (idx < slen));
s = s.substring(--idx);
idx = s.length() - 1;
if (idx < 0) {
return s;
}
do {
c = s.charAt(idx--);
} while (((c == "\n") || (c == "\r")) && (idx >= 0));
return s.substring(0, idx + 2);
}
}