Java/Regular Expressions/String Operation
Содержание
- 1 Apply proper uppercase and lowercase on a String
- 2 Calculating Word Frequencies with Regular Expressions
- 3 Create a string search and replace using regex
- 4 Determining If a String Matches a Pattern Exactly
- 5 Extract a substring by matching a regular expression.
- 6 Get all digits from a string
- 7 Get First Found regex
- 8 Get First Not Empty String in a String list
- 9 Get Found regex
- 10 Ignore case differences when searching for or replacing substrings.
- 11 Java Regular Expression :split 2
- 12 Java Regular Expression : Split text
- 13 Match punct
- 14 Match space
- 15 Match string ends
- 16 Match words
- 17 Parse an Apache log file with StringTokenizer
- 18 Print all the strings that match a given pattern from a file
- 19 Quick demo of Regular Expressions substitution
- 20 Regular Expression Replace
- 21 Regular Expression Search and Replace Program
- 22 Regular expression: Split Demo
- 23 Remove trailing white space from a string
- 24 Removing Duplicate Whitespace in a String
- 25 Replacing String Tokenizer
- 26 Searching and Replacing with Nonconstant Values Using a Regular Expression
- 27 Simple split
- 28 Split a String into a Java Array of Strings divided by an Regular Expressions
- 29 Split the supplied content into lines, returning each line as an element in the returned list.
- 30 Split-up string using regular expression
- 31 StringConvenience -- demonstrate java.lang.String convenience routine
- 32 String replace
- 33 String split
- 34 Strip extra spaces in a XML string
- 35 Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+
- 36 Use replaceAll() to ignore case when replacing one substring with another
Apply proper uppercase and lowercase on a String
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] args) { String str = "this is a test"; StringBuffer sb = new StringBuffer(); Matcher m = Pattern.rupile("([a-z])([a-z]*)", Pattern.CASE_INSENSITIVE).matcher(str); while (m.find()) { m.appendReplacement(sb, m.group(1).toUpperCase() + m.group(2).toLowerCase()); } System.out.println(m.appendTail(sb).toString()); }
} //This Is A Test
</source>
Calculating Word Frequencies with Regular Expressions
<source lang="java">
import java.io.FileInputStream; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class WordCount {
public static void main(String args[]) throws Exception { String filename = "WordCount.java"; // Map File from filename to byte buffer FileInputStream input = new FileInputStream(filename); FileChannel channel = input.getChannel(); int fileLength = (int) channel.size(); MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileLength); // Convert to character buffer Charset charset = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = charset.newDecoder(); CharBuffer charBuffer = decoder.decode(buffer); // Create line pattern Pattern linePattern = Pattern.rupile(".*$", Pattern.MULTILINE); // Create word pattern Pattern wordBreakPattern = Pattern.rupile("[\\p{Punct}\\s}]"); // Match line pattern to buffer Matcher lineMatcher = linePattern.matcher(charBuffer); Map map = new TreeMap(); Integer ONE = new Integer(1); // For each line while (lineMatcher.find()) { // Get line CharSequence line = lineMatcher.group(); // Get array of words on line String words[] = wordBreakPattern.split(line); // For each word for (int i = 0, n = words.length; i < n; i++) { if (words[i].length() > 0) { Integer frequency = (Integer) map.get(words[i]); if (frequency == null) { frequency = ONE; } else { int value = frequency.intValue(); frequency = new Integer(value + 1); } map.put(words[i], frequency); } } } System.out.println(map); }
}
</source>
Create a string search and replace using regex
<source lang="java">
import java.util.regex.Pattern; import java.util.regex.Matcher; public class Main {
public static void main(String[] args) { String source = "The quick brown fox jumps over the brown lazy dog."; String find = "brown"; String replace = "red"; Pattern pattern = Pattern.rupile(find); Matcher matcher = pattern.matcher(source); String output = matcher.replaceAll(replace); System.out.println("Source = " + source); System.out.println("Output = " + output); }
}
</source>
Determining If a String Matches a Pattern Exactly
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { // Compile regular expression String patternStr = "b"; Pattern pattern = Pattern.rupile(patternStr); // Determine if there is an exact match CharSequence inputStr = "a b c"; Matcher matcher = pattern.matcher(inputStr); boolean matchFound = matcher.matches(); // Try a different input matcher.reset("b"); matchFound = matcher.matches(); // Determine if pattern matches beginning of input matchFound = matcher.lookingAt(); }
}
</source>
Extract a substring by matching a regular expression.
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String args[]) { Pattern pat = Pattern.rupile("\\b\\w+@XYZ\\.ru\\b"); Matcher mat = pat.matcher("t@XYZ.ru\n" + "a@XYZ.ru\n" + "n@XYZ.ru"); while (mat.find()) System.out.println("Match: " + mat.group()); }
} /* Match: t@XYZ.ru Match: a@XYZ.ru Match: n@XYZ.ru
- /
</source>
Get all digits from a string
<source lang="java">
public class Main {
public static void main(String[] argv) throws Exception { System.out.println("abasdfasdf1 2wasdfasdf9_8asdfasdfz asdfasdfyx7".replaceAll("\\D", "")); }
}
</source>
Get First Found regex
<source lang="java">
/**
* Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */
import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {
public static String getFirstFound(String contents, String regex) { List<String> founds = getFound(contents, regex); if (isEmpty(founds)) { return null; } return founds.get(0);
}
public static List<String> getFound(String contents, String regex) { if (isEmpty(regex) || isEmpty(contents)) { return null; } List<String> results = new ArrayList<String>(); Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE); Matcher matcher = pattern.matcher(contents); while (matcher.find()) { if (matcher.groupCount() > 0) { results.add(matcher.group(1)); } else { results.add(matcher.group()); } } return results; } public static boolean isEmpty(List<String> list) { if (list == null || list.size() == 0) { return true; } if (list.size() == 1 && isEmpty(list.get(0))) { return true; } return false;
}
public static boolean isEmpty(String str) { if (str != null && str.trim().length() > 0) { return false; } return true; }
}
</source>
Get First Not Empty String in a String list
<source lang="java">
/**
* Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */
import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {
public static String getFirstNotEmpty(List<String> list) { if (isEmpty(list)) { return null; } for (String item : list) { if (!isEmpty(item)) { return item; } } return null; } public static List<String> getFound(String contents, String regex) { if (isEmpty(regex) || isEmpty(contents)) { return null; } List<String> results = new ArrayList<String>(); Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE); Matcher matcher = pattern.matcher(contents); while (matcher.find()) { if (matcher.groupCount() > 0) { results.add(matcher.group(1)); } else { results.add(matcher.group()); } } return results; }
public static boolean isEmpty(List<String> list) { if (list == null || list.size() == 0) { return true; } if (list.size() == 1 && isEmpty(list.get(0))) { return true; } return false; } public static boolean isEmpty(String str) { if (str != null && str.trim().length() > 0) { return false; } return true; }
}
</source>
Get Found regex
<source lang="java">
/**
* Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */
import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {
public static List<String> getFound(String contents, String regex) { if (isEmpty(regex) || isEmpty(contents)) { return null; } List<String> results = new ArrayList<String>(); Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE); Matcher matcher = pattern.matcher(contents); while (matcher.find()) { if (matcher.groupCount() > 0) { results.add(matcher.group(1)); } else { results.add(matcher.group()); } } return results; } public static boolean isEmpty(String str) { if (str != null && str.trim().length() > 0) { return false; } return true; }
}
</source>
Ignore case differences when searching for or replacing substrings.
<source lang="java">
public class Main {
public static void main(String args[]) { String str = "This is a TEST."; // Use matches() to find any version of test. if (str.matches("(?i).*test.*")) System.out.println("test is in the string."); }
}
</source>
Java Regular Expression :split 2
<source lang="java">
/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * -Redistribution of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * -Redistribution in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Sun Microsystems, Inc. or the names of contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * This software is provided "AS IS," without a warranty of any kind. ALL * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN") * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You acknowledge that this software is not designed, licensed or intended * for use in the design, construction, operation or maintenance of any * nuclear facility. */
import java.util.regex.Pattern; public final class SplitTest2 {
private static String REGEX = "\\d"; private static String INPUT = "one9two4three7four1five"; public static void main(String[] argv) { Pattern p = Pattern.rupile(REGEX); String[] items = p.split(INPUT); for (int i = 0; i < items.length; i++) { System.out.println(items[i]); } }
}
</source>
Java Regular Expression : Split text
<source lang="java">
/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * -Redistribution of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * -Redistribution in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Sun Microsystems, Inc. or the names of contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * This software is provided "AS IS," without a warranty of any kind. ALL * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN") * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You acknowledge that this software is not designed, licensed or intended * for use in the design, construction, operation or maintenance of any * nuclear facility. */
import java.util.regex.Pattern; public final class SplitTest {
private static String REGEX = ":"; private static String INPUT = "one:two:three:four:five"; public static void main(String[] argv) { Pattern p = Pattern.rupile(REGEX); String[] items = p.split(INPUT); for (int i = 0; i < items.length; i++) { System.out.println(items[i]); } }
}
</source>
Match punct
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { Pattern punct = Pattern.rupile("\\G\\p{Punct}"); Matcher mat = punct.matcher("this is a test 999"); System.out.println(mat.find()); }
} //false
</source>
Match space
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { Pattern space = Pattern.rupile("\\G\\s"); Matcher mat = space.matcher("this is a test 999"); System.out.println(mat.find()); }
}
</source>
Match string ends
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { Pattern end = Pattern.rupile("\\G\\z"); Matcher mat = end.matcher("this is a test 999"); if (mat.find()) System.out.println(mat.group()); }
}
</source>
Match words
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { Pattern word = Pattern.rupile("\\G\\w+"); Matcher mat = word.matcher("this is a test 999"); System.out.println(mat.find()); }
} //true
</source>
Parse an Apache log file with StringTokenizer
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.util.*; /**
* Parse an Apache log file with StringTokenizer */
public class LogStrTok implements LogExample {
public static void main(String argv[]) { StringTokenizer matcher = new StringTokenizer(logEntryLine); System.out.println("tokens = " + matcher.countTokens()); // StringTokenizer CAN NOT count if you are changing the delimiter! // if (matcher.countTokens() != NUM_FIELDS) { // System.err.println("Bad log entry (or bug in StringTokenizer?):"); // System.err.println(logEntryLine); // } System.out.println("Hostname: " + matcher.nextToken()); // StringTokenizer makes you ask for tokens in order to skip them: matcher.nextToken(); // eat the "-" matcher.nextToken(); // again System.out.println("Date/Time: " + matcher.nextToken("]")); //matcher.nextToken(" "); // again System.out.println("Request: " + matcher.nextToken("\"")); matcher.nextToken(" "); // again System.out.println("Response: " + matcher.nextToken()); System.out.println("ByteCount: " + matcher.nextToken()); System.out.println("Referer: " + matcher.nextToken("\"")); matcher.nextToken(" "); // again System.out.println("User-Agent: " + matcher.nextToken("\"")); }
} /**
* Common fields for Apache Log demo. */
interface LogExample {
/** The number of fields that must be found. */ public static final int NUM_FIELDS = 9; /** The sample log entry to be parsed. */ public static final String logEntryLine = "123.45.67.89 - - [27/Oct/2000:09:27:09 -0400] \"GET /java/javaResources.html HTTP/1.0\" 200 10450 \"-\" \"Mozilla/4.6 [en] (X11; U; OpenBSD 2.8 i386; Nav)\"";
}
</source>
Print all the strings that match a given pattern from a file
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.util.regex.*; import java.io.*; /**
* Print all the strings that match a given pattern from a file. */
public class ReaderIter {
public static void main(String[] args) throws IOException { // The RE pattern Pattern patt = Pattern.rupile("[A-Za-z][a-z]+"); // A FileReader (see the I/O chapter) BufferedReader r = new BufferedReader(new FileReader("ReaderIter.java")); // For each line of input, try matching in it. String line; while ((line = r.readLine()) != null) { // For each match in the line, extract and print it. Matcher m = patt.matcher(line); while (m.find()) { // Simplest method: // System.out.println(m.group(0)); // Get the starting position of the text int start = m.start(0); // Get ending position int end = m.end(0); // Print whatever matched. // Use CharacterIterator.substring(offset, end); System.out.println(line.substring(start, end)); } } }
}
</source>
Quick demo of Regular Expressions substitution
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.util.regex.*; /**
* Quick demo of RE substitution: correct "demon" and other * spelling variants to the correct, non-satanic "daemon". * @author Ian F. Darwin, http://www.darwinsys.ru/ * @version $Id: ReplaceDemo.java,v 1.5 2004/02/09 03:33:42 ian Exp $ */
public class ReplaceDemo {
public static void main(String[] argv) { // Make an RE pattern to match almost any form (deamon, demon, etc.). String patt = "d[ae]{1,2}mon"; // i.e., 1 or 2 "a" or "e" any combo // A test input. String input = "Unix hath demons and deamons in it!"; System.out.println("Input: " + input); // Run it from a RE instance and see that it works Pattern r = Pattern.rupile(patt); Matcher m = r.matcher(input); System.out.println("ReplaceAll: " + m.replaceAll("daemon")); // Show the appendReplacement method m.reset(); StringBuffer sb = new StringBuffer(); System.out.print("Append methods: "); while (m.find()) { m.appendReplacement(sb, "daemon"); // Copy to before first match, // plus the word "daemon" } m.appendTail(sb); // copy remainder System.out.println(sb.toString()); }
}
</source>
Regular Expression Replace
<source lang="java">
/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * -Redistribution of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * -Redistribution in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Sun Microsystems, Inc. or the names of contributors may * be used to endorse or promote products derived from this software without * specific prior written permission. * * This software is provided "AS IS," without a warranty of any kind. ALL * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN") * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE, * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You acknowledge that this software is not designed, licensed or intended * for use in the design, construction, operation or maintenance of any * nuclear facility. */
import java.util.regex.Matcher; import java.util.regex.Pattern; public final class RegexTest {
private static String REGEX = "a*b"; private static String INPUT = "aabfooaabfooabfoob"; private static String REPLACE = "-"; public static void main(String[] args) { Pattern p = Pattern.rupile(REGEX); Matcher m = p.matcher(INPUT); // get a matcher object StringBuffer sb = new StringBuffer(); while (m.find()) { m.appendReplacement(sb, REPLACE); } m.appendTail(sb); System.out.println(sb.toString()); }
}
</source>
Regular Expression Search and Replace Program
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] args) { CharSequence inputStr = "a b c a b c"; String patternStr = "a"; String replacementStr = "x"; Pattern pattern = Pattern.rupile(patternStr); // Replace all occurrences of pattern in input Matcher matcher = pattern.matcher(inputStr); String output = matcher.replaceAll(replacementStr); System.out.println(output); }
}
</source>
Regular expression: Split Demo
<source lang="java">
// : c12:SplitDemo.java // From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002 // www.BruceEckel.ru. See copyright notice in CopyRight.txt. import java.util.Arrays; import java.util.regex.Pattern; public class SplitDemo {
public static void main(String[] args) { String input = "This!!unusual use!!of exclamation!!points"; System.out.println(Arrays.asList(Pattern.rupile("!!").split(input))); // Only do the first three: System.out .println(Arrays.asList(Pattern.rupile("!!").split(input, 3))); System.out.println(Arrays.asList("Aha! String has a split() built in!" .split(" "))); }
} ///:~
</source>
Remove trailing white space from a string
<source lang="java">
public class Main {
public static void main(String[] args) { String text = " a "; text = text.replaceAll("\\s+$", ""); System.out.println("Text: " + text); }
}
</source>
Removing Duplicate Whitespace in a String
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { } public static CharSequence removeDuplicateWhitespace(CharSequence inputStr) { String patternStr = "\\s+"; String replaceStr = " "; Pattern pattern = Pattern.rupile(patternStr); Matcher matcher = pattern.matcher(inputStr); return matcher.replaceAll(replaceStr); }
}
</source>
Replacing String Tokenizer
<source lang="java">
// : c12:ReplacingStringTokenizer.java // From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002 // www.BruceEckel.ru. See copyright notice in CopyRight.txt. import java.util.Arrays; import java.util.StringTokenizer; public class ReplacingStringTokenizer {
public static void main(String[] args) { String input = "But I"m not dead yet! I feel happy!"; StringTokenizer stoke = new StringTokenizer(input); while (stoke.hasMoreElements()) System.out.println(stoke.nextToken()); System.out.println(Arrays.asList(input.split(" "))); }
} ///:~
</source>
Searching and Replacing with Nonconstant Values Using a Regular Expression
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { CharSequence inputStr = "ab12 cd efg34 123"; String patternStr = "([a-zA-Z]+[0-9]+)"; Pattern pattern = Pattern.rupile(patternStr); Matcher matcher = pattern.matcher(inputStr); StringBuffer buf = new StringBuffer(); boolean found = false; while ((found = matcher.find())) { String replaceStr = matcher.group(); matcher.appendReplacement(buf, "found<" + replaceStr + ">"); } matcher.appendTail(buf); String result = buf.toString(); System.out.println(result); }
}
</source>
Simple split
<source lang="java">
public class Split {
public static void main(String args[]) { String statement = "I will not compromise. I will not " + "cooperate. There will be no concession, no conciliation, no " + "finding the middle group, and no give and take."; String tokens[] = null; String splitPattern = "compromise|cooperate|concession|" + "conciliation|(finding the middle group)|(give and take)"; tokens = statement.split(splitPattern); System.out.println("REGEX PATTERN:\n" + splitPattern + "\n"); System.out.println("STATEMENT:\n" + statement + "\n"); System.out.println("\nTOKENS"); for (int i = 0; i < tokens.length; i++) { System.out.println(tokens[i]); } }
}
</source>
Split a String into a Java Array of Strings divided by an Regular Expressions
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.util.regex.*; /** Split a String into a Java Array of Strings divided by an RE
*/
public class Split {
public static void main(String[] args) { String[] x = Pattern.rupile("ian").split( "the darwinian devonian explodian chicken"); for (int i=0; i<x.length; i++) { System.out.println(i + " \"" + x[i] + "\""); } }
}
</source>
Split the supplied content into lines, returning each line as an element in the returned list.
<source lang="java">
import java.util.Arrays; import java.util.Collections; import java.util.List; /*
* JBoss DNA (http://www.jboss.org/dna) * See the COPYRIGHT.txt file distributed with this work for information * regarding copyright ownership. Some portions may be licensed * to Red Hat, Inc. under one or more contributor license agreements.
- See the AUTHORS.txt file in the distribution for a full listing of
- individual contributors.
* * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA * is licensed to you under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * JBoss DNA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */
/**
* Utilities for string processing and manipulation. */
public class StringUtil {
/** * Split the supplied content into lines, returning each line as an element in the returned list. * * @param content the string content that is to be split * @return the list of lines; never null but may be an empty (unmodifiable) list if the supplied content is null or empty */ public static List<String> splitLines( final String content ) { if (content == null || content.length() == 0) return Collections.emptyList(); String[] lines = content.split("[\\r]?\\n"); return Arrays.asList(lines); }
}
</source>
Split-up string using regular expression
<source lang="java">
import java.util.regex.Pattern; public class Main {
public static void main(String[] args) { String pattern = "[,\\s]+"; String colours = "Red,White, Blue Green Yellow, Orange"; Pattern splitter = Pattern.rupile(pattern); String[] result = splitter.split(colours); for (String colour : result) { System.out.println("Colour = \"" + colour + "\""); } }
}
</source>
StringConvenience -- demonstrate java.lang.String convenience routine
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
/**
* StringConvenience -- demonstrate java.lang.String convenience routine * @author Ian F. Darwin * @version $Id: StringConvenience.java,v 1.2 2004/02/23 02:37:34 ian Exp $ */
public class StringConvenience {
public static void main(String[] argv) { String pattern = ".*Q[^u]\\d+\\..*"; String line = "Order QT300. Now!"; if (line.matches(pattern)) { System.out.println(line + " matches \"" + pattern + "\""); } else { System.out.println("NO MATCH"); } }
}
</source>
String replace
<source lang="java">
public class StyleSearchAndReplace {
public static void main(String args[]) { String statement = "The question as to whether the jab is" + " superior to the cross has been debated for some time in" + " boxing circles. However, it is my opinion that this" + " false dichotomy misses the point. I call your attention" + " to the fact that the best boxers often use a combination of" + " the two. I call your attention to the fact that Mohammed" + " Ali,the Greatest of the sport of boxing, used both. He had" + " a tremendous jab, yet used his cross effectively, often," + " and well"; String newStmt = statement.replaceAll("The question as to whether", "Whether"); newStmt = newStmt.replaceAll(" of the sport of boxing", ""); newStmt = newStmt.replaceAll("amount of success", "success"); newStmt = newStmt.replaceAll("However, it is my opinion that this", "This"); newStmt = newStmt.replaceAll("a combination of the two", "both"); newStmt = newStmt.replaceAll("This is in spite of the fact that" + " the", "The"); newStmt = newStmt.replaceAll("I call your attention to the fact that", ""); System.out.println("BEFORE:\n" + statement + "\n"); System.out.println("AFTER:\n" + newStmt); }
}
</source>
String split
<source lang="java">
public class StyleSplitExample {
public static void main(String args[]) { String phrase1 = "but simple justice, not charity"; strengthenSentence(phrase1); String phrase2 = "but that I love Rome more, not that I love Caesar less"; strengthenSentence(phrase2); String phrase3 = "ask what you can do for your country, ask not what your " + "country can do for you"; strengthenSentence(phrase3); } public static String strengthenSentence(String sentence) { String retval = null; String[] tokens = null; String splitPattern = ","; tokens = sentence.split(splitPattern); if (tokens == null) { String msg = " NO MATCH: pattern:" + sentence + "\r\n regex: " + splitPattern; } else { retval = tokens[1] + ", " + tokens[0]; System.out.println("BEFORE: " + sentence); System.out.println("AFTER : " + retval + "\n"); } return retval; }
}
</source>
Strip extra spaces in a XML string
<source lang="java">
public class Main {
public static void main(String[] args) { String xml = "<a>test 1</a> test 2 "; String out = xml.replaceAll(">\\s+<", "><"); System.out.println(xml); System.out.println(out); }
}
</source>
Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+
<source lang="java">
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {
public static void main(String[] argv) throws Exception { CharSequence inputStr = "ab12 cd efg34 asdf 123"; String patternStr = "([a-zA-Z]+[0-9]+)"; Pattern pattern = Pattern.rupile(patternStr); Matcher matcher = pattern.matcher(inputStr); StringBuffer buf = new StringBuffer(); boolean found = false; while ((found = matcher.find())) { String replaceStr = matcher.group(); matcher.appendReplacement(buf, "found<" + replaceStr + ">"); } matcher.appendTail(buf); String result = buf.toString(); System.out.println(result); }
} //found<ab12> cd found<efg34> asdf 123
</source>
Use replaceAll() to ignore case when replacing one substring with another
<source lang="java">
public class Main {
public static void main(String[] argv) throws Exception { String str = "Abc abc"; String result = str.replaceAll("(?i)abc", "DEF"); System.out.println("After replacement:\n" + " " + result); }
} /* After replacement:
DEF DEF
- /
</source>