Java Tutorial/Data Type/String Split
Версия от 17:44, 31 мая 2010; (обсуждение)
Содержание
- 1 Split a String
- 2 Split by dot
- 3 " ".split(" ") generates a NullPointerException
- 4 Splits a string around matches of the given delimiter character.
- 5 Splits the provided text into an array, separator string specified. Returns a maximum of max substrings.
- 6 Splits the provided text into an array, using whitespace as the separator, preserving all tokens, including empty tokens created by adjacent separators.
- 7 Split string
- 8 Split up a string into multiple strings based on a delimiter
- 9 String.split() is based on regular expression
- 10 String split on multicharacter delimiter
- 11 Using split() with a space can be a problem
Split a String
public class Main {
public static void main(String[] args) {
String str = "one,two,three,four,five";
String[] elements = str.split(",");
for (int i = 0; i < elements.length; i++)
System.out.println(elements[i]);
}
}
/*
one
two
three
four
five
*/
Split by dot
public class Main {
public static void main(String args[]) throws Exception {
String s = "A.BB.CCC";
String[] words = s.split("\\.");
for (String str : words) {
System.out.println(str);
}
}
}
/*
A
BB
CCC
*/
" ".split(" ") generates a NullPointerException
public class Main {
public static void main(String args[]) throws Exception {
String[] words = " ".split(" ");
String firstWord = words[0];
System.out.println(firstWord);
}
}
/*
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 0
at Main.main(Main.java:5)
*/
Splits a string around matches of the given delimiter character.
import java.util.StringTokenizer;
/*
Derby - Class org.apache.derby.iapi.util.PropertyUtil
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
public class Main {
/**
* Splits a string around matches of the given delimiter character.
*
* Where applicable, this method can be used as a substitute for
* <code>String.split(String regex)</code>, which is not available
* on a JSR169/Java ME platform.
*
* @param str the string to be split
* @param delim the delimiter
* @throws NullPointerException if str is null
*/
static public String[] split(String str, char delim)
{
if (str == null) {
throw new NullPointerException("str can"t be null");
}
// Note the javadoc on StringTokenizer:
// StringTokenizer is a legacy class that is retained for
// compatibility reasons although its use is discouraged in
// new code.
// In other words, if StringTokenizer is ever removed from the JDK,
// we need to have a look at String.split() (or java.util.regex)
// if it is supported on a JSR169/Java ME platform by then.
StringTokenizer st = new StringTokenizer(str, String.valueOf(delim));
int n = st.countTokens();
String[] s = new String[n];
for (int i = 0; i < n; i++) {
s[i] = st.nextToken();
}
return s;
}
}
Splits the provided text into an array, separator string specified. Returns a maximum of max substrings.
import java.util.ArrayList;
import java.util.List;
public class Main {
/**
* Splits the provided text into an array, separator string specified.
* Returns a maximum of <code>max</code> substrings.
*
* The separator(s) will not be included in the returned String array.
* Adjacent separators are treated as one separator.
*
* A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separator splits on whitespace.
*
* <pre>
* StringUtils.splitByWholeSeparator(null, *, *) = null
* StringUtils.splitByWholeSeparator("", *, *) = []
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
* </pre>
*
* @param str the String to parse, may be null
* @param separator String containing the String to be used as a delimiter,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the returned
* array. A zero or negative value implies no limit.
* @return an array of parsed Strings, <code>null</code> if null String was input
*/
public static String[] splitByWholeSeparator( String str, String separator, int max ) {
return splitByWholeSeparatorWorker(str, separator, max, false);
}
/**
* Performs the logic for the <code>splitByWholeSeparatorPreserveAllTokens</code> methods.
*
* @param str the String to parse, may be <code>null</code>
* @param separator String containing the String to be used as a delimiter,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the returned
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.4
*/
private static String[] splitByWholeSeparatorWorker(String str, String separator, int max,
boolean preserveAllTokens)
{
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
if ((separator == null) || ("".equals(separator))) {
// Split on whitespace.
return splitWorker(str, null, max, preserveAllTokens);
}
int separatorLength = separator.length();
ArrayList substrings = new ArrayList();
int numberOfSubstrings = 0;
int beg = 0;
int end = 0;
while (end < len) {
end = str.indexOf(separator, beg);
if (end > -1) {
if (end > beg) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
// The following is OK, because String.substring( beg, end ) excludes
// the character at the position "end".
substrings.add(str.substring(beg, end));
// Set the starting point for the next search.
// The following is equivalent to beg = end + (separatorLength - 1) + 1,
// which is the right calculation:
beg = end + separatorLength;
}
} else {
// We found a consecutive occurrence of the separator, so skip it.
if (preserveAllTokens) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
substrings.add("");
}
}
beg = end + separatorLength;
}
} else {
// String.substring( beg ) goes from "beg" to the end of the String.
substrings.add(str.substring(beg));
end = len;
}
}
return (String[]) substrings.toArray(new String[substrings.size()]);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
}
Splits the provided text into an array, using whitespace as the separator, preserving all tokens, including empty tokens created by adjacent separators.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
public class Main {
// -----------------------------------------------------------------------
/**
* Splits the provided text into an array, using whitespace as the
* separator, preserving all tokens, including empty tokens created by
* adjacent separators. This is an alternative to using StringTokenizer.
* Whitespace is defined by {@link Character#isWhitespace(char)}.
*
* The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.
*
* A <code>null</code> input String returns <code>null</code>.
*
* <pre>
* StringUtils.splitPreserveAllTokens(null) = null
* StringUtils.splitPreserveAllTokens("") = []
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"]
* StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(String str) {
return splitWorker(str, null, -1, true);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
}
Split string
/*
* Static String formatting and query routines.
* Copyright (C) 2001-2005 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Java+Utilities
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
import java.util.HashMap;
import java.util.regex.Pattern;
/**
* Utilities for String formatting, manipulation, and queries.
* More information about this class is available from .
*
* @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
* @since ostermillerutils 1.00.00
*/
public class StringHelper {
/**
* Split the given String into tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","2","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","1","","2",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2---3----4", "--");<br>
* result: {"1-2","-3","","4"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.00.00
*/
public static String[] split(String s, String delimiter){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character because
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don"
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behavior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count++;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
/**
* Split the given String into tokens. Delimiters will
* be returned as tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","-","2","-","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","-","1","-","","-","2","-",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2--3---4----5", "--");<br>
* result: {"1-2","--","3","--","-4","--","","--","5"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.05.00
*/
public static String[] splitIncludeDelimiters(String s, String delimiter){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character because
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don"
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behavior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count+=2;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
result[count] = delimiter;
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
}
Split up a string into multiple strings based on a delimiter
/*
* JBoss, Home of Professional Open Source
* Copyright 2005, JBoss Inc., and individual contributors as indicated
* by the @authors tag. See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
public class Main{
/** An empty string constant */
public static final String EMPTY = "";
/////////////////////////////////////////////////////////////////////////
// Spliting Methods //
/////////////////////////////////////////////////////////////////////////
/**
* Split up a string into multiple strings based on a delimiter.
*
* @param string String to split up.
* @param delim Delimiter.
* @param limit Limit the number of strings to split into
* (-1 for no limit).
* @return Array of strings.
*/
public static String[] split(final String string, final String delim,
final int limit)
{
// get the count of delim in string, if count is > limit
// then use limit for count. The number of delimiters is less by one
// than the number of elements, so add one to count.
int count = count(string, delim) + 1;
if (limit > 0 && count > limit)
{
count = limit;
}
String strings[] = new String[count];
int begin = 0;
for (int i = 0; i < count; i++)
{
// get the next index of delim
int end = string.indexOf(delim, begin);
// if the end index is -1 or if this is the last element
// then use the string"s length for the end index
if (end == -1 || i + 1 == count)
end = string.length();
// if end is 0, then the first element is empty
if (end == 0)
strings[i] = EMPTY;
else
strings[i] = string.substring(begin, end);
// update the begining index
begin = end + 1;
}
return strings;
}
/**
* Split up a string into multiple strings based on a delimiter.
*
* @param string String to split up.
* @param delim Delimiter.
* @return Array of strings.
*/
public static String[] split(final String string, final String delim)
{
return split(string, delim, -1);
}
/////////////////////////////////////////////////////////////////////////
// Counting Methods //
/////////////////////////////////////////////////////////////////////////
/**
* Count the number of instances of substring within a string.
*
* @param string String to look for substring in.
* @param substring Sub-string to look for.
* @return Count of substrings in string.
*/
public static int count(final String string, final String substring)
{
int count = 0;
int idx = 0;
while ((idx = string.indexOf(substring, idx)) != -1)
{
idx++;
count++;
}
return count;
}
/**
* Count the number of instances of character within a string.
*
* @param string String to look for substring in.
* @param c Character to look for.
* @return Count of substrings in string.
*/
public static int count(final String string, final char c)
{
return count(string, String.valueOf(c));
}
}
String.split() is based on regular expression
public class Main {
public static void main(String args[]) throws Exception {
String s3 = "{A}{this is a test}{1234}";
String[] words = s3.split("[{}]");
for (String str : words) {
System.out.println(str);
}
}
}
/*
A
this is a test
1234
*/
String split on multicharacter delimiter
/**************************************************************************************
* Copyright (c) Jonas Bon�r, Alexandre Vasseur. All rights reserved. *
* http://aspectwerkz.codehaus.org *
* ---------------------------------------------------------------------------------- *
* The software in this package is published under the terms of the LGPL license *
* a copy of which has been included with this distribution in the license.txt file. *
**************************************************************************************/
import java.util.List;
import java.util.ArrayList;
/**
* Utility methods for strings.
*
* @author
*/
public class Strings {
/**
* String split on multicharacter delimiter. <p/>Written by Tim Quinn (tim.quinn@honeywell.ru)
*
* @param stringToSplit
* @param delimiter
* @return
*/
public static final String[] splitString(String stringToSplit, String delimiter) {
String[] aRet;
int iLast;
int iFrom;
int iFound;
int iRecords;
// return Blank Array if stringToSplit == "")
if (stringToSplit.equals("")) {
return new String[0];
}
// count Field Entries
iFrom = 0;
iRecords = 0;
while (true) {
iFound = stringToSplit.indexOf(delimiter, iFrom);
if (iFound == -1) {
break;
}
iRecords++;
iFrom = iFound + delimiter.length();
}
iRecords = iRecords + 1;
// populate aRet[]
aRet = new String[iRecords];
if (iRecords == 1) {
aRet[0] = stringToSplit;
} else {
iLast = 0;
iFrom = 0;
iFound = 0;
for (int i = 0; i < iRecords; i++) {
iFound = stringToSplit.indexOf(delimiter, iFrom);
if (iFound == -1) { // at End
aRet[i] = stringToSplit.substring(iLast + delimiter.length(), stringToSplit.length());
} else if (iFound == 0) { // at Beginning
aRet[i] = "";
} else { // somewhere in middle
aRet[i] = stringToSplit.substring(iFrom, iFound);
}
iLast = iFound;
iFrom = iFound + delimiter.length();
}
}
return aRet;
}
}
Using split() with a space can be a problem
public class Main {
public static void main(String args[]) throws Exception {
String s3 = "A B C";
String[] words = s3.split(" ");
for (String s : words) {
System.out.println(s);
}
}
}
/*
A
B
C
*/