Java/Regular Expressions/String Operation — различия между версиями
Admin (обсуждение | вклад) м (1 версия) |
|
(нет различий)
|
Текущая версия на 06:00, 1 июня 2010
Содержание
- 1 Apply proper uppercase and lowercase on a String
- 2 Calculating Word Frequencies with Regular Expressions
- 3 Create a string search and replace using regex
- 4 Determining If a String Matches a Pattern Exactly
- 5 Extract a substring by matching a regular expression.
- 6 Get all digits from a string
- 7 Get First Found regex
- 8 Get First Not Empty String in a String list
- 9 Get Found regex
- 10 Ignore case differences when searching for or replacing substrings.
- 11 Java Regular Expression :split 2
- 12 Java Regular Expression : Split text
- 13 Match punct
- 14 Match space
- 15 Match string ends
- 16 Match words
- 17 Parse an Apache log file with StringTokenizer
- 18 Print all the strings that match a given pattern from a file
- 19 Quick demo of Regular Expressions substitution
- 20 Regular Expression Replace
- 21 Regular Expression Search and Replace Program
- 22 Regular expression: Split Demo
- 23 Remove trailing white space from a string
- 24 Removing Duplicate Whitespace in a String
- 25 Replacing String Tokenizer
- 26 Searching and Replacing with Nonconstant Values Using a Regular Expression
- 27 Simple split
- 28 Split a String into a Java Array of Strings divided by an Regular Expressions
- 29 Split the supplied content into lines, returning each line as an element in the returned list.
- 30 Split-up string using regular expression
- 31 StringConvenience -- demonstrate java.lang.String convenience routine
- 32 String replace
- 33 String split
- 34 Strip extra spaces in a XML string
- 35 Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+
- 36 Use replaceAll() to ignore case when replacing one substring with another
Apply proper uppercase and lowercase on a String
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] args) {
String str = "this is a test";
StringBuffer sb = new StringBuffer();
Matcher m = Pattern.rupile("([a-z])([a-z]*)", Pattern.CASE_INSENSITIVE).matcher(str);
while (m.find()) {
m.appendReplacement(sb, m.group(1).toUpperCase() + m.group(2).toLowerCase());
}
System.out.println(m.appendTail(sb).toString());
}
}
//This Is A Test
Calculating Word Frequencies with Regular Expressions
import java.io.FileInputStream;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordCount {
public static void main(String args[]) throws Exception {
String filename = "WordCount.java";
// Map File from filename to byte buffer
FileInputStream input = new FileInputStream(filename);
FileChannel channel = input.getChannel();
int fileLength = (int) channel.size();
MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0,
fileLength);
// Convert to character buffer
Charset charset = Charset.forName("ISO-8859-1");
CharsetDecoder decoder = charset.newDecoder();
CharBuffer charBuffer = decoder.decode(buffer);
// Create line pattern
Pattern linePattern = Pattern.rupile(".*$", Pattern.MULTILINE);
// Create word pattern
Pattern wordBreakPattern = Pattern.rupile("[\\p{Punct}\\s}]");
// Match line pattern to buffer
Matcher lineMatcher = linePattern.matcher(charBuffer);
Map map = new TreeMap();
Integer ONE = new Integer(1);
// For each line
while (lineMatcher.find()) {
// Get line
CharSequence line = lineMatcher.group();
// Get array of words on line
String words[] = wordBreakPattern.split(line);
// For each word
for (int i = 0, n = words.length; i < n; i++) {
if (words[i].length() > 0) {
Integer frequency = (Integer) map.get(words[i]);
if (frequency == null) {
frequency = ONE;
} else {
int value = frequency.intValue();
frequency = new Integer(value + 1);
}
map.put(words[i], frequency);
}
}
}
System.out.println(map);
}
}
Create a string search and replace using regex
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class Main {
public static void main(String[] args) {
String source = "The quick brown fox jumps over the brown lazy dog.";
String find = "brown";
String replace = "red";
Pattern pattern = Pattern.rupile(find);
Matcher matcher = pattern.matcher(source);
String output = matcher.replaceAll(replace);
System.out.println("Source = " + source);
System.out.println("Output = " + output);
}
}
Determining If a String Matches a Pattern Exactly
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
// Compile regular expression
String patternStr = "b";
Pattern pattern = Pattern.rupile(patternStr);
// Determine if there is an exact match
CharSequence inputStr = "a b c";
Matcher matcher = pattern.matcher(inputStr);
boolean matchFound = matcher.matches();
// Try a different input
matcher.reset("b");
matchFound = matcher.matches();
// Determine if pattern matches beginning of input
matchFound = matcher.lookingAt();
}
}
Extract a substring by matching a regular expression.
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String args[]) {
Pattern pat = Pattern.rupile("\\b\\w+@XYZ\\.ru\\b");
Matcher mat = pat.matcher("t@XYZ.ru\n" + "a@XYZ.ru\n"
+ "n@XYZ.ru");
while (mat.find())
System.out.println("Match: " + mat.group());
}
}
/*
Match: t@XYZ.ru
Match: a@XYZ.ru
Match: n@XYZ.ru
*/
Get all digits from a string
public class Main {
public static void main(String[] argv) throws Exception {
System.out.println("abasdfasdf1 2wasdfasdf9_8asdfasdfz asdfasdfyx7".replaceAll("\\D", ""));
}
}
Get First Found regex
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
public static String getFirstFound(String contents, String regex) {
List<String> founds = getFound(contents, regex);
if (isEmpty(founds)) {
return null;
}
return founds.get(0);
}
public static List<String> getFound(String contents, String regex) {
if (isEmpty(regex) || isEmpty(contents)) {
return null;
}
List<String> results = new ArrayList<String>();
Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
Matcher matcher = pattern.matcher(contents);
while (matcher.find()) {
if (matcher.groupCount() > 0) {
results.add(matcher.group(1));
} else {
results.add(matcher.group());
}
}
return results;
}
public static boolean isEmpty(List<String> list) {
if (list == null || list.size() == 0) {
return true;
}
if (list.size() == 1 && isEmpty(list.get(0))) {
return true;
}
return false;
}
public static boolean isEmpty(String str) {
if (str != null && str.trim().length() > 0) {
return false;
}
return true;
}
}
Get First Not Empty String in a String list
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
public static String getFirstNotEmpty(List<String> list) {
if (isEmpty(list)) {
return null;
}
for (String item : list) {
if (!isEmpty(item)) {
return item;
}
}
return null;
}
public static List<String> getFound(String contents, String regex) {
if (isEmpty(regex) || isEmpty(contents)) {
return null;
}
List<String> results = new ArrayList<String>();
Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
Matcher matcher = pattern.matcher(contents);
while (matcher.find()) {
if (matcher.groupCount() > 0) {
results.add(matcher.group(1));
} else {
results.add(matcher.group());
}
}
return results;
}
public static boolean isEmpty(List<String> list) {
if (list == null || list.size() == 0) {
return true;
}
if (list.size() == 1 && isEmpty(list.get(0))) {
return true;
}
return false;
}
public static boolean isEmpty(String str) {
if (str != null && str.trim().length() > 0) {
return false;
}
return true;
}
}
Get Found regex
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
public static List<String> getFound(String contents, String regex) {
if (isEmpty(regex) || isEmpty(contents)) {
return null;
}
List<String> results = new ArrayList<String>();
Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
Matcher matcher = pattern.matcher(contents);
while (matcher.find()) {
if (matcher.groupCount() > 0) {
results.add(matcher.group(1));
} else {
results.add(matcher.group());
}
}
return results;
}
public static boolean isEmpty(String str) {
if (str != null && str.trim().length() > 0) {
return false;
}
return true;
}
}
Ignore case differences when searching for or replacing substrings.
public class Main {
public static void main(String args[]) {
String str = "This is a TEST.";
// Use matches() to find any version of test.
if (str.matches("(?i).*test.*"))
System.out.println("test is in the string.");
}
}
Java Regular Expression :split 2
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/
import java.util.regex.Pattern;
public final class SplitTest2 {
private static String REGEX = "\\d";
private static String INPUT = "one9two4three7four1five";
public static void main(String[] argv) {
Pattern p = Pattern.rupile(REGEX);
String[] items = p.split(INPUT);
for (int i = 0; i < items.length; i++) {
System.out.println(items[i]);
}
}
}
Java Regular Expression : Split text
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/
import java.util.regex.Pattern;
public final class SplitTest {
private static String REGEX = ":";
private static String INPUT = "one:two:three:four:five";
public static void main(String[] argv) {
Pattern p = Pattern.rupile(REGEX);
String[] items = p.split(INPUT);
for (int i = 0; i < items.length; i++) {
System.out.println(items[i]);
}
}
}
Match punct
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
Pattern punct = Pattern.rupile("\\G\\p{Punct}");
Matcher mat = punct.matcher("this is a test 999");
System.out.println(mat.find());
}
}
//false
Match space
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
Pattern space = Pattern.rupile("\\G\\s");
Matcher mat = space.matcher("this is a test 999");
System.out.println(mat.find());
}
}
Match string ends
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
Pattern end = Pattern.rupile("\\G\\z");
Matcher mat = end.matcher("this is a test 999");
if (mat.find())
System.out.println(mat.group());
}
}
Match words
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
Pattern word = Pattern.rupile("\\G\\w+");
Matcher mat = word.matcher("this is a test 999");
System.out.println(mat.find());
}
}
//true
Parse an Apache log file with StringTokenizer
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.*;
/**
* Parse an Apache log file with StringTokenizer
*/
public class LogStrTok implements LogExample {
public static void main(String argv[]) {
StringTokenizer matcher = new StringTokenizer(logEntryLine);
System.out.println("tokens = " + matcher.countTokens());
// StringTokenizer CAN NOT count if you are changing the delimiter!
// if (matcher.countTokens() != NUM_FIELDS) {
// System.err.println("Bad log entry (or bug in StringTokenizer?):");
// System.err.println(logEntryLine);
// }
System.out.println("Hostname: " + matcher.nextToken());
// StringTokenizer makes you ask for tokens in order to skip them:
matcher.nextToken(); // eat the "-"
matcher.nextToken(); // again
System.out.println("Date/Time: " + matcher.nextToken("]"));
//matcher.nextToken(" "); // again
System.out.println("Request: " + matcher.nextToken("\""));
matcher.nextToken(" "); // again
System.out.println("Response: " + matcher.nextToken());
System.out.println("ByteCount: " + matcher.nextToken());
System.out.println("Referer: " + matcher.nextToken("\""));
matcher.nextToken(" "); // again
System.out.println("User-Agent: " + matcher.nextToken("\""));
}
}
/**
* Common fields for Apache Log demo.
*/
interface LogExample {
/** The number of fields that must be found. */
public static final int NUM_FIELDS = 9;
/** The sample log entry to be parsed. */
public static final String logEntryLine = "123.45.67.89 - - [27/Oct/2000:09:27:09 -0400] \"GET /java/javaResources.html HTTP/1.0\" 200 10450 \"-\" \"Mozilla/4.6 [en] (X11; U; OpenBSD 2.8 i386; Nav)\"";
}
Print all the strings that match a given pattern from a file
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.*;
import java.io.*;
/**
* Print all the strings that match a given pattern from a file.
*/
public class ReaderIter {
public static void main(String[] args) throws IOException {
// The RE pattern
Pattern patt = Pattern.rupile("[A-Za-z][a-z]+");
// A FileReader (see the I/O chapter)
BufferedReader r = new BufferedReader(new FileReader("ReaderIter.java"));
// For each line of input, try matching in it.
String line;
while ((line = r.readLine()) != null) {
// For each match in the line, extract and print it.
Matcher m = patt.matcher(line);
while (m.find()) {
// Simplest method:
// System.out.println(m.group(0));
// Get the starting position of the text
int start = m.start(0);
// Get ending position
int end = m.end(0);
// Print whatever matched.
// Use CharacterIterator.substring(offset, end);
System.out.println(line.substring(start, end));
}
}
}
}
Quick demo of Regular Expressions substitution
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.*;
/**
* Quick demo of RE substitution: correct "demon" and other
* spelling variants to the correct, non-satanic "daemon".
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: ReplaceDemo.java,v 1.5 2004/02/09 03:33:42 ian Exp $
*/
public class ReplaceDemo {
public static void main(String[] argv) {
// Make an RE pattern to match almost any form (deamon, demon, etc.).
String patt = "d[ae]{1,2}mon"; // i.e., 1 or 2 "a" or "e" any combo
// A test input.
String input = "Unix hath demons and deamons in it!";
System.out.println("Input: " + input);
// Run it from a RE instance and see that it works
Pattern r = Pattern.rupile(patt);
Matcher m = r.matcher(input);
System.out.println("ReplaceAll: " + m.replaceAll("daemon"));
// Show the appendReplacement method
m.reset();
StringBuffer sb = new StringBuffer();
System.out.print("Append methods: ");
while (m.find()) {
m.appendReplacement(sb, "daemon"); // Copy to before first match,
// plus the word "daemon"
}
m.appendTail(sb); // copy remainder
System.out.println(sb.toString());
}
}
Regular Expression Replace
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class RegexTest {
private static String REGEX = "a*b";
private static String INPUT = "aabfooaabfooabfoob";
private static String REPLACE = "-";
public static void main(String[] args) {
Pattern p = Pattern.rupile(REGEX);
Matcher m = p.matcher(INPUT); // get a matcher object
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, REPLACE);
}
m.appendTail(sb);
System.out.println(sb.toString());
}
}
Regular Expression Search and Replace Program
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] args) {
CharSequence inputStr = "a b c a b c";
String patternStr = "a";
String replacementStr = "x";
Pattern pattern = Pattern.rupile(patternStr);
// Replace all occurrences of pattern in input
Matcher matcher = pattern.matcher(inputStr);
String output = matcher.replaceAll(replacementStr);
System.out.println(output);
}
}
Regular expression: Split Demo
// : c12:SplitDemo.java
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.util.Arrays;
import java.util.regex.Pattern;
public class SplitDemo {
public static void main(String[] args) {
String input = "This!!unusual use!!of exclamation!!points";
System.out.println(Arrays.asList(Pattern.rupile("!!").split(input)));
// Only do the first three:
System.out
.println(Arrays.asList(Pattern.rupile("!!").split(input, 3)));
System.out.println(Arrays.asList("Aha! String has a split() built in!"
.split(" ")));
}
} ///:~
Remove trailing white space from a string
public class Main {
public static void main(String[] args) {
String text = " a ";
text = text.replaceAll("\\s+$", "");
System.out.println("Text: " + text);
}
}
Removing Duplicate Whitespace in a String
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
}
public static CharSequence removeDuplicateWhitespace(CharSequence inputStr) {
String patternStr = "\\s+";
String replaceStr = " ";
Pattern pattern = Pattern.rupile(patternStr);
Matcher matcher = pattern.matcher(inputStr);
return matcher.replaceAll(replaceStr);
}
}
Replacing String Tokenizer
// : c12:ReplacingStringTokenizer.java
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.util.Arrays;
import java.util.StringTokenizer;
public class ReplacingStringTokenizer {
public static void main(String[] args) {
String input = "But I"m not dead yet! I feel happy!";
StringTokenizer stoke = new StringTokenizer(input);
while (stoke.hasMoreElements())
System.out.println(stoke.nextToken());
System.out.println(Arrays.asList(input.split(" ")));
}
} ///:~
Searching and Replacing with Nonconstant Values Using a Regular Expression
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
CharSequence inputStr = "ab12 cd efg34 123";
String patternStr = "([a-zA-Z]+[0-9]+)";
Pattern pattern = Pattern.rupile(patternStr);
Matcher matcher = pattern.matcher(inputStr);
StringBuffer buf = new StringBuffer();
boolean found = false;
while ((found = matcher.find())) {
String replaceStr = matcher.group();
matcher.appendReplacement(buf, "found<" + replaceStr + ">");
}
matcher.appendTail(buf);
String result = buf.toString();
System.out.println(result);
}
}
Simple split
public class Split {
public static void main(String args[]) {
String statement = "I will not compromise. I will not "
+ "cooperate. There will be no concession, no conciliation, no "
+ "finding the middle group, and no give and take.";
String tokens[] = null;
String splitPattern = "compromise|cooperate|concession|"
+ "conciliation|(finding the middle group)|(give and take)";
tokens = statement.split(splitPattern);
System.out.println("REGEX PATTERN:\n" + splitPattern + "\n");
System.out.println("STATEMENT:\n" + statement + "\n");
System.out.println("\nTOKENS");
for (int i = 0; i < tokens.length; i++) {
System.out.println(tokens[i]);
}
}
}
Split a String into a Java Array of Strings divided by an Regular Expressions
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.*;
/** Split a String into a Java Array of Strings divided by an RE
*/
public class Split {
public static void main(String[] args) {
String[] x =
Pattern.rupile("ian").split(
"the darwinian devonian explodian chicken");
for (int i=0; i<x.length; i++) {
System.out.println(i + " \"" + x[i] + "\"");
}
}
}
Split the supplied content into lines, returning each line as an element in the returned list.
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/*
* JBoss DNA (http://www.jboss.org/dna)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
*
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
/**
* Utilities for string processing and manipulation.
*/
public class StringUtil {
/**
* Split the supplied content into lines, returning each line as an element in the returned list.
*
* @param content the string content that is to be split
* @return the list of lines; never null but may be an empty (unmodifiable) list if the supplied content is null or empty
*/
public static List<String> splitLines( final String content ) {
if (content == null || content.length() == 0) return Collections.emptyList();
String[] lines = content.split("[\\r]?\\n");
return Arrays.asList(lines);
}
}
Split-up string using regular expression
import java.util.regex.Pattern;
public class Main {
public static void main(String[] args) {
String pattern = "[,\\s]+";
String colours = "Red,White, Blue Green Yellow, Orange";
Pattern splitter = Pattern.rupile(pattern);
String[] result = splitter.split(colours);
for (String colour : result) {
System.out.println("Colour = \"" + colour + "\"");
}
}
}
StringConvenience -- demonstrate java.lang.String convenience routine
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
/**
* StringConvenience -- demonstrate java.lang.String convenience routine
* @author Ian F. Darwin
* @version $Id: StringConvenience.java,v 1.2 2004/02/23 02:37:34 ian Exp $
*/
public class StringConvenience {
public static void main(String[] argv) {
String pattern = ".*Q[^u]\\d+\\..*";
String line = "Order QT300. Now!";
if (line.matches(pattern)) {
System.out.println(line + " matches \"" + pattern + "\"");
} else {
System.out.println("NO MATCH");
}
}
}
String replace
public class StyleSearchAndReplace {
public static void main(String args[]) {
String statement = "The question as to whether the jab is"
+ " superior to the cross has been debated for some time in"
+ " boxing circles. However, it is my opinion that this"
+ " false dichotomy misses the point. I call your attention"
+ " to the fact that the best boxers often use a combination of"
+ " the two. I call your attention to the fact that Mohammed"
+ " Ali,the Greatest of the sport of boxing, used both. He had"
+ " a tremendous jab, yet used his cross effectively, often,"
+ " and well";
String newStmt = statement.replaceAll("The question as to whether",
"Whether");
newStmt = newStmt.replaceAll(" of the sport of boxing", "");
newStmt = newStmt.replaceAll("amount of success", "success");
newStmt = newStmt.replaceAll("However, it is my opinion that this",
"This");
newStmt = newStmt.replaceAll("a combination of the two", "both");
newStmt = newStmt.replaceAll("This is in spite of the fact that"
+ " the", "The");
newStmt = newStmt.replaceAll("I call your attention to the fact that",
"");
System.out.println("BEFORE:\n" + statement + "\n");
System.out.println("AFTER:\n" + newStmt);
}
}
String split
public class StyleSplitExample {
public static void main(String args[]) {
String phrase1 = "but simple justice, not charity";
strengthenSentence(phrase1);
String phrase2 = "but that I love Rome more, not that I love Caesar less";
strengthenSentence(phrase2);
String phrase3 = "ask what you can do for your country, ask not what your "
+ "country can do for you";
strengthenSentence(phrase3);
}
public static String strengthenSentence(String sentence) {
String retval = null;
String[] tokens = null;
String splitPattern = ",";
tokens = sentence.split(splitPattern);
if (tokens == null) {
String msg = " NO MATCH: pattern:" + sentence
+ "\r\n regex: " + splitPattern;
} else {
retval = tokens[1] + ", " + tokens[0];
System.out.println("BEFORE: " + sentence);
System.out.println("AFTER : " + retval + "\n");
}
return retval;
}
}
Strip extra spaces in a XML string
public class Main {
public static void main(String[] args) {
String xml = "<a>test 1</a> <b>test 2</b> ";
String out = xml.replaceAll(">\\s+<", "><");
System.out.println(xml);
System.out.println(out);
}
}
Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] argv) throws Exception {
CharSequence inputStr = "ab12 cd efg34 asdf 123";
String patternStr = "([a-zA-Z]+[0-9]+)";
Pattern pattern = Pattern.rupile(patternStr);
Matcher matcher = pattern.matcher(inputStr);
StringBuffer buf = new StringBuffer();
boolean found = false;
while ((found = matcher.find())) {
String replaceStr = matcher.group();
matcher.appendReplacement(buf, "found<" + replaceStr + ">");
}
matcher.appendTail(buf);
String result = buf.toString();
System.out.println(result);
}
}
//found<ab12> cd found<efg34> asdf 123
Use replaceAll() to ignore case when replacing one substring with another
public class Main {
public static void main(String[] argv) throws Exception {
String str = "Abc abc";
String result = str.replaceAll("(?i)abc", "DEF");
System.out.println("After replacement:\n" + " " + result);
}
}
/*
After replacement:
DEF DEF
*/