Java/Regular Expressions/String Operation

Материал из Java эксперт
Перейти к: навигация, поиск

Содержание

Apply proper uppercase and lowercase on a String

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] args) {
   String str = "this is a test";
   StringBuffer sb = new StringBuffer();
   Matcher m = Pattern.rupile("([a-z])([a-z]*)", Pattern.CASE_INSENSITIVE).matcher(str);
   while (m.find()) {
     m.appendReplacement(sb, m.group(1).toUpperCase() + m.group(2).toLowerCase());
   }
   System.out.println(m.appendTail(sb).toString()); 
 }

} //This Is A Test


 </source>
   
  
 
  



Calculating Word Frequencies with Regular Expressions

   <source lang="java">
  

import java.io.FileInputStream; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; public class WordCount {

 public static void main(String args[]) throws Exception {
   String filename = "WordCount.java";
   // Map File from filename to byte buffer
   FileInputStream input = new FileInputStream(filename);
   FileChannel channel = input.getChannel();
   int fileLength = (int) channel.size();
   MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0,
       fileLength);
   // Convert to character buffer
   Charset charset = Charset.forName("ISO-8859-1");
   CharsetDecoder decoder = charset.newDecoder();
   CharBuffer charBuffer = decoder.decode(buffer);
   // Create line pattern
   Pattern linePattern = Pattern.rupile(".*$", Pattern.MULTILINE);
   // Create word pattern
   Pattern wordBreakPattern = Pattern.rupile("[\\p{Punct}\\s}]");
   // Match line pattern to buffer
   Matcher lineMatcher = linePattern.matcher(charBuffer);
   Map map = new TreeMap();
   Integer ONE = new Integer(1);
   // For each line
   while (lineMatcher.find()) {
     // Get line
     CharSequence line = lineMatcher.group();
     // Get array of words on line
     String words[] = wordBreakPattern.split(line);
     // For each word
     for (int i = 0, n = words.length; i < n; i++) {
       if (words[i].length() > 0) {
         Integer frequency = (Integer) map.get(words[i]);
         if (frequency == null) {
           frequency = ONE;
         } else {
           int value = frequency.intValue();
           frequency = new Integer(value + 1);
         }
         map.put(words[i], frequency);
       }
     }
   }
   System.out.println(map);
 }

}



 </source>
   
  
 
  



Create a string search and replace using regex

   <source lang="java">
 

import java.util.regex.Pattern; import java.util.regex.Matcher; public class Main {

 public static void main(String[] args) {
   String source = "The quick brown fox jumps over the brown lazy dog.";
   String find = "brown";
   String replace = "red";
   Pattern pattern = Pattern.rupile(find);
   Matcher matcher = pattern.matcher(source);
   String output = matcher.replaceAll(replace);
   System.out.println("Source = " + source);
   System.out.println("Output = " + output);
 }

}


 </source>
   
  
 
  



Determining If a String Matches a Pattern Exactly

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   // Compile regular expression
   String patternStr = "b";
   Pattern pattern = Pattern.rupile(patternStr);
   // Determine if there is an exact match
   CharSequence inputStr = "a b c";
   Matcher matcher = pattern.matcher(inputStr);
   boolean matchFound = matcher.matches(); 
   // Try a different input
   matcher.reset("b");
   matchFound = matcher.matches(); 
   // Determine if pattern matches beginning of input
   matchFound = matcher.lookingAt();
 }

}


 </source>
   
  
 
  



Extract a substring by matching a regular expression.

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String args[]) {
   Pattern pat = Pattern.rupile("\\b\\w+@XYZ\\.ru\\b");
   Matcher mat = pat.matcher("t@XYZ.ru\n" + "a@XYZ.ru\n"
       + "n@XYZ.ru");
   while (mat.find())
     System.out.println("Match: " + mat.group());
 }

} /* Match: t@XYZ.ru Match: a@XYZ.ru Match: n@XYZ.ru

  • /


 </source>
   
  
 
  



Get all digits from a string

   <source lang="java">
  

public class Main {

 public static void main(String[] argv) throws Exception {
   System.out.println("abasdfasdf1 2wasdfasdf9_8asdfasdfz asdfasdfyx7".replaceAll("\\D", ""));
 }

}


 </source>
   
  
 
  



Get First Found regex

   <source lang="java">
 

/**

* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {

 public static String getFirstFound(String contents, String regex) {
   List<String> founds = getFound(contents, regex);
   if (isEmpty(founds)) {
       return null;
   }
   return founds.get(0);

}

 public static List<String> getFound(String contents, String regex) {
     if (isEmpty(regex) || isEmpty(contents)) {
         return null;
     }
     List<String> results = new ArrayList<String>();
     Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
     Matcher matcher = pattern.matcher(contents);
     
     while (matcher.find()) {
         if (matcher.groupCount() > 0) {
             results.add(matcher.group(1));
         } else {
             results.add(matcher.group());
         }
     }
     return results;
 } 
 public static boolean isEmpty(List<String> list) {
   if (list == null || list.size() == 0) {
       return true;
   }
   if (list.size() == 1 && isEmpty(list.get(0))) {
       return true;
   }
   return false;

}

 public static boolean isEmpty(String str) {
     if (str != null && str.trim().length() > 0) {
         return false;
     }
     return true;
 }

}


 </source>
   
  
 
  



Get First Not Empty String in a String list

   <source lang="java">
 

/**

* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {

 public static String getFirstNotEmpty(List<String> list) {
   if (isEmpty(list)) {
     return null;
   }
   for (String item : list) {
     if (!isEmpty(item)) {
       return item;
     }
   }
   return null;
 }
 public static List<String> getFound(String contents, String regex) {
   if (isEmpty(regex) || isEmpty(contents)) {
     return null;
   }
   List<String> results = new ArrayList<String>();
   Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
   Matcher matcher = pattern.matcher(contents);
   while (matcher.find()) {
     if (matcher.groupCount() > 0) {
       results.add(matcher.group(1));
     } else {
       results.add(matcher.group());
     }
   }
   return results;
 }
 public static boolean isEmpty(List<String> list) {
   if (list == null || list.size() == 0) {
     return true;
   }
   if (list.size() == 1 && isEmpty(list.get(0))) {
     return true;
   }
   return false;
 }
 public static boolean isEmpty(String str) {
   if (str != null && str.trim().length() > 0) {
     return false;
   }
   return true;
 }

}


 </source>
   
  
 
  



Get Found regex

   <source lang="java">
 

/**

* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils {


 public static List<String> getFound(String contents, String regex) {
     if (isEmpty(regex) || isEmpty(contents)) {
         return null;
     }
     List<String> results = new ArrayList<String>();
     Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
     Matcher matcher = pattern.matcher(contents);
     
     while (matcher.find()) {
         if (matcher.groupCount() > 0) {
             results.add(matcher.group(1));
         } else {
             results.add(matcher.group());
         }
     }
     return results;
 } 
 public static boolean isEmpty(String str) {
     if (str != null && str.trim().length() > 0) {
         return false;
     }
     return true;
 }

}


 </source>
   
  
 
  



Ignore case differences when searching for or replacing substrings.

   <source lang="java">
 

public class Main {

 public static void main(String args[]) {
   String str = "This is a TEST.";
   // Use matches() to find any version of test.
   if (str.matches("(?i).*test.*"))
     System.out.println("test is in the string.");
 }

}


 </source>
   
  
 
  



Java Regular Expression :split 2

   <source lang="java">
  

/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*

* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
*  list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
*  this list of conditions and the following disclaimer in the documentation
*  and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/

import java.util.regex.Pattern; public final class SplitTest2 {

 private static String REGEX = "\\d";
 private static String INPUT = "one9two4three7four1five";
 public static void main(String[] argv) {
   Pattern p = Pattern.rupile(REGEX);
   String[] items = p.split(INPUT);
   for (int i = 0; i < items.length; i++) {
     System.out.println(items[i]);
   }
 }

}



 </source>
   
  
 
  



Java Regular Expression : Split text

   <source lang="java">
  

/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*

* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
*  list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
*  this list of conditions and the following disclaimer in the documentation
*  and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/

import java.util.regex.Pattern; public final class SplitTest {

 private static String REGEX = ":";
 private static String INPUT = "one:two:three:four:five";
 public static void main(String[] argv) {
   Pattern p = Pattern.rupile(REGEX);
   String[] items = p.split(INPUT);
   for (int i = 0; i < items.length; i++) {
     System.out.println(items[i]);
   }
 }

}



 </source>
   
  
 
  



Match punct

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   Pattern punct = Pattern.rupile("\\G\\p{Punct}");
   Matcher mat = punct.matcher("this is a test 999");
   System.out.println(mat.find());
 }

} //false


 </source>
   
  
 
  



Match space

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   Pattern space = Pattern.rupile("\\G\\s");
   Matcher mat = space.matcher("this is a test 999");
   System.out.println(mat.find());
 }

}


 </source>
   
  
 
  



Match string ends

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   Pattern end = Pattern.rupile("\\G\\z");
   Matcher mat = end.matcher("this is a test 999");
   if (mat.find())
     System.out.println(mat.group());
 }

}


 </source>
   
  
 
  



Match words

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   Pattern word = Pattern.rupile("\\G\\w+");
   Matcher mat = word.matcher("this is a test 999");
   System.out.println(mat.find());
 }

} //true


 </source>
   
  
 
  



Parse an Apache log file with StringTokenizer

   <source lang="java">
  

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.*; /**

* Parse an Apache log file with StringTokenizer
*/

public class LogStrTok implements LogExample {

 public static void main(String argv[]) {
   StringTokenizer matcher = new StringTokenizer(logEntryLine);
   System.out.println("tokens = " + matcher.countTokens());
   // StringTokenizer CAN NOT count if you are changing the delimiter!
   // if (matcher.countTokens() != NUM_FIELDS) {
   //   System.err.println("Bad log entry (or bug in StringTokenizer?):");
   //   System.err.println(logEntryLine);
   // }
   System.out.println("Hostname: " + matcher.nextToken());
   // StringTokenizer makes you ask for tokens in order to skip them:
   matcher.nextToken(); // eat the "-"
   matcher.nextToken(); // again
   System.out.println("Date/Time: " + matcher.nextToken("]"));
   //matcher.nextToken(" "); // again
   System.out.println("Request: " + matcher.nextToken("\""));
   matcher.nextToken(" "); // again
   System.out.println("Response: " + matcher.nextToken());
   System.out.println("ByteCount: " + matcher.nextToken());
   System.out.println("Referer: " + matcher.nextToken("\""));
   matcher.nextToken(" "); // again
   System.out.println("User-Agent: " + matcher.nextToken("\""));
 }

} /**

* Common fields for Apache Log demo.
*/

interface LogExample {

 /** The number of fields that must be found. */
 public static final int NUM_FIELDS = 9;
 /** The sample log entry to be parsed. */
 public static final String logEntryLine = "123.45.67.89 - - [27/Oct/2000:09:27:09 -0400] \"GET /java/javaResources.html HTTP/1.0\" 200 10450 \"-\" \"Mozilla/4.6 [en] (X11; U; OpenBSD 2.8 i386; Nav)\"";

}



 </source>
   
  
 
  



Print all the strings that match a given pattern from a file

   <source lang="java">
  

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.*; import java.io.*; /**

* Print all the strings that match a given pattern from a file.
*/

public class ReaderIter {

 public static void main(String[] args) throws IOException {
   // The RE pattern
   Pattern patt = Pattern.rupile("[A-Za-z][a-z]+");
   // A FileReader (see the I/O chapter)
   BufferedReader r = new BufferedReader(new FileReader("ReaderIter.java"));
   // For each line of input, try matching in it.
   String line;
   while ((line = r.readLine()) != null) {
     // For each match in the line, extract and print it.
     Matcher m = patt.matcher(line);
     while (m.find()) {
       // Simplest method:
       // System.out.println(m.group(0));
       // Get the starting position of the text
       int start = m.start(0);
       // Get ending position
       int end = m.end(0);
       // Print whatever matched.
       // Use CharacterIterator.substring(offset, end);
       System.out.println(line.substring(start, end));
     }
   }
 }

}



 </source>
   
  
 
  



Quick demo of Regular Expressions substitution

   <source lang="java">
  

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.*; /**

* Quick demo of RE substitution: correct "demon" and other
* spelling variants to the correct, non-satanic "daemon".
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: ReplaceDemo.java,v 1.5 2004/02/09 03:33:42 ian Exp $
*/

public class ReplaceDemo {

 public static void main(String[] argv) {
   // Make an RE pattern to match almost any form (deamon, demon, etc.).
   String patt = "d[ae]{1,2}mon";  // i.e., 1 or 2 "a" or "e" any combo
   // A test input.
   String input = "Unix hath demons and deamons in it!";
   System.out.println("Input: " + input);
   // Run it from a RE instance and see that it works
   Pattern r = Pattern.rupile(patt);
   Matcher m = r.matcher(input);
   System.out.println("ReplaceAll: " + m.replaceAll("daemon"));
   // Show the appendReplacement method
   m.reset();
   StringBuffer sb = new StringBuffer();
   System.out.print("Append methods: ");
   while (m.find()) {
     m.appendReplacement(sb, "daemon");  // Copy to before first match,
                       // plus the word "daemon"
   }
   m.appendTail(sb);            // copy remainder
   System.out.println(sb.toString());
 }

}



 </source>
   
  
 
  



Regular Expression Replace

   <source lang="java">
  

/* From http://java.sun.ru/docs/books/tutorial/index.html */ /*

* Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* -Redistribution of source code must retain the above copyright notice, this
*  list of conditions and the following disclaimer.
*
* -Redistribution in binary form must reproduce the above copyright notice,
*  this list of conditions and the following disclaimer in the documentation
*  and/or other materials provided with the distribution.
*
* Neither the name of Sun Microsystems, Inc. or the names of contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* This software is provided "AS IS," without a warranty of any kind. ALL
* EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
* ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
* OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
* AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
* AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
* DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
* REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
* INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
* OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
* EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
*
* You acknowledge that this software is not designed, licensed or intended
* for use in the design, construction, operation or maintenance of any
* nuclear facility.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; public final class RegexTest {

 private static String REGEX = "a*b";
 private static String INPUT = "aabfooaabfooabfoob";
 private static String REPLACE = "-";
 public static void main(String[] args) {
   Pattern p = Pattern.rupile(REGEX);
   Matcher m = p.matcher(INPUT); // get a matcher object
   StringBuffer sb = new StringBuffer();
   while (m.find()) {
     m.appendReplacement(sb, REPLACE);
   }
   m.appendTail(sb);
   System.out.println(sb.toString());
 }

}



 </source>
   
  
 
  



Regular Expression Search and Replace Program

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] args) {
   CharSequence inputStr = "a b c a b c";
   String patternStr = "a";
   String replacementStr = "x";
   Pattern pattern = Pattern.rupile(patternStr);
   // Replace all occurrences of pattern in input
   Matcher matcher = pattern.matcher(inputStr);
   String output = matcher.replaceAll(replacementStr);
   System.out.println(output);
 }

}


 </source>
   
  
 
  



Regular expression: Split Demo

   <source lang="java">
  

// : c12:SplitDemo.java // From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002 // www.BruceEckel.ru. See copyright notice in CopyRight.txt. import java.util.Arrays; import java.util.regex.Pattern; public class SplitDemo {

 public static void main(String[] args) {
   String input = "This!!unusual use!!of exclamation!!points";
   System.out.println(Arrays.asList(Pattern.rupile("!!").split(input)));
   // Only do the first three:
   System.out
       .println(Arrays.asList(Pattern.rupile("!!").split(input, 3)));
   System.out.println(Arrays.asList("Aha! String has a split() built in!"
       .split(" ")));
 }

} ///:~



 </source>
   
  
 
  



Remove trailing white space from a string

   <source lang="java">
 

public class Main {

 public static void main(String[] args) {
   String text = "     a     ";
   text = text.replaceAll("\\s+$", "");
   System.out.println("Text: " + text);
 }

}


 </source>
   
  
 
  



Removing Duplicate Whitespace in a String

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
 }
 public static CharSequence removeDuplicateWhitespace(CharSequence inputStr) {
   String patternStr = "\\s+";
   String replaceStr = " ";
   Pattern pattern = Pattern.rupile(patternStr);
   Matcher matcher = pattern.matcher(inputStr);
   return matcher.replaceAll(replaceStr);
 }

}


 </source>
   
  
 
  



Replacing String Tokenizer

   <source lang="java">
  

// : c12:ReplacingStringTokenizer.java // From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002 // www.BruceEckel.ru. See copyright notice in CopyRight.txt. import java.util.Arrays; import java.util.StringTokenizer; public class ReplacingStringTokenizer {

 public static void main(String[] args) {
   String input = "But I"m not dead yet! I feel happy!";
   StringTokenizer stoke = new StringTokenizer(input);
   while (stoke.hasMoreElements())
     System.out.println(stoke.nextToken());
   System.out.println(Arrays.asList(input.split(" ")));
 }

} ///:~



 </source>
   
  
 
  



Searching and Replacing with Nonconstant Values Using a Regular Expression

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   CharSequence inputStr = "ab12 cd efg34 123";
   String patternStr = "([a-zA-Z]+[0-9]+)";
   Pattern pattern = Pattern.rupile(patternStr);
   Matcher matcher = pattern.matcher(inputStr);
   StringBuffer buf = new StringBuffer();
   boolean found = false;
   while ((found = matcher.find())) {
     String replaceStr = matcher.group();
     matcher.appendReplacement(buf, "found<" + replaceStr + ">");
   }
   matcher.appendTail(buf);
   String result = buf.toString();
   System.out.println(result);
 }

}


 </source>
   
  
 
  



Simple split

   <source lang="java">
  

public class Split {

 public static void main(String args[]) {
   String statement = "I will not compromise. I will not "
       + "cooperate. There will be no concession, no conciliation, no "
       + "finding the middle group, and no give and take.";
   String tokens[] = null;
   String splitPattern = "compromise|cooperate|concession|"
       + "conciliation|(finding the middle group)|(give and take)";
   tokens = statement.split(splitPattern);
   System.out.println("REGEX PATTERN:\n" + splitPattern + "\n");
   System.out.println("STATEMENT:\n" + statement + "\n");
   System.out.println("\nTOKENS");
   for (int i = 0; i < tokens.length; i++) {
     System.out.println(tokens[i]);
   }
 }

}



 </source>
   
  
 
  



Split a String into a Java Array of Strings divided by an Regular Expressions

   <source lang="java">
  

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.*; /** Split a String into a Java Array of Strings divided by an RE

*/

public class Split {

 public static void main(String[] args) {
   String[] x = 
     Pattern.rupile("ian").split(
       "the darwinian devonian explodian chicken");
   for (int i=0; i<x.length; i++) {
     System.out.println(i + " \"" + x[i] + "\"");
   }
 }

}



 </source>
   
  
 
  



Split the supplied content into lines, returning each line as an element in the returned list.

   <source lang="java">
   

import java.util.Arrays; import java.util.Collections; import java.util.List; /*

* JBoss DNA (http://www.jboss.org/dna)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership.  Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
  • See the AUTHORS.txt file in the distribution for a full listing of
  • individual contributors.
*
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/

/**

* Utilities for string processing and manipulation.
*/

public class StringUtil {

 /**
  * Split the supplied content into lines, returning each line as an element in the returned list.
  * 
  * @param content the string content that is to be split
  * @return the list of lines; never null but may be an empty (unmodifiable) list if the supplied content is null or empty
  */
 public static List<String> splitLines( final String content ) {
     if (content == null || content.length() == 0) return Collections.emptyList();
     String[] lines = content.split("[\\r]?\\n");
     return Arrays.asList(lines);
 }

}



 </source>
   
  
 
  



Split-up string using regular expression

   <source lang="java">
 

import java.util.regex.Pattern; public class Main {

 public static void main(String[] args) {
   String pattern = "[,\\s]+";
   String colours = "Red,White, Blue   Green        Yellow, Orange";
   Pattern splitter = Pattern.rupile(pattern);
   String[] result = splitter.split(colours);
   for (String colour : result) {
     System.out.println("Colour = \"" + colour + "\"");
   }
 }

}


 </source>
   
  
 
  



StringConvenience -- demonstrate java.lang.String convenience routine

   <source lang="java">
  

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

/**

* StringConvenience -- demonstrate java.lang.String convenience routine
* @author Ian F. Darwin
* @version $Id: StringConvenience.java,v 1.2 2004/02/23 02:37:34 ian Exp $
*/

public class StringConvenience {

 public static void main(String[] argv) {
   String pattern = ".*Q[^u]\\d+\\..*";
   String line = "Order QT300. Now!";
   if (line.matches(pattern)) {
     System.out.println(line + " matches \"" + pattern + "\"");
   } else {
     System.out.println("NO MATCH");
   }
 }

}



 </source>
   
  
 
  



String replace

   <source lang="java">
  

public class StyleSearchAndReplace {

 public static void main(String args[]) {
   String statement = "The question as to whether the jab is"
       + " superior to the cross has been debated for some time in"
       + " boxing circles. However, it is my opinion that this"
       + " false dichotomy misses the point. I call your attention"
       + " to the fact that the best boxers often use a combination of"
       + " the two. I call your attention to the fact that Mohammed"
       + " Ali,the Greatest of the sport of boxing, used both. He had"
       + " a tremendous jab, yet used his cross effectively, often,"
       + " and well";
   String newStmt = statement.replaceAll("The question as to whether",
       "Whether");
   newStmt = newStmt.replaceAll(" of the sport of boxing", "");
   newStmt = newStmt.replaceAll("amount of success", "success");
   newStmt = newStmt.replaceAll("However, it is my opinion that this",
       "This");
   newStmt = newStmt.replaceAll("a combination of the two", "both");
   newStmt = newStmt.replaceAll("This is in spite of the fact that"
       + " the", "The");
   newStmt = newStmt.replaceAll("I call your attention to the fact that",
       "");
   System.out.println("BEFORE:\n" + statement + "\n");
   System.out.println("AFTER:\n" + newStmt);
 }

}



 </source>
   
  
 
  



String split

   <source lang="java">
  

public class StyleSplitExample {

 public static void main(String args[]) {
   String phrase1 = "but simple justice, not charity";
   strengthenSentence(phrase1);
   String phrase2 = "but that I love Rome more, not that I love Caesar less";
   strengthenSentence(phrase2);
   String phrase3 = "ask what you can do for your country, ask not what your "
       + "country can do for you";
   strengthenSentence(phrase3);
 }
 public static String strengthenSentence(String sentence) {
   String retval = null;
   String[] tokens = null;
   String splitPattern = ",";
   tokens = sentence.split(splitPattern);
   if (tokens == null) {
     String msg = "   NO MATCH: pattern:" + sentence
         + "\r\n             regex: " + splitPattern;
   } else {
     retval = tokens[1] + ", " + tokens[0];
     System.out.println("BEFORE: " + sentence);
     System.out.println("AFTER : " + retval + "\n");
   }
   return retval;
 }

}



 </source>
   
  
 
  



Strip extra spaces in a XML string

   <source lang="java">
   

public class Main {

 public static void main(String[] args) {
   String xml = "<a>test 1</a>    test 2 ";
   String out = xml.replaceAll(">\\s+<", "><");
   System.out.println(xml);
   System.out.println(out);
 }

}



 </source>
   
  
 
  



Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+

   <source lang="java">
 

import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main {

 public static void main(String[] argv) throws Exception {
   CharSequence inputStr = "ab12 cd efg34 asdf 123";
   String patternStr = "([a-zA-Z]+[0-9]+)";
   Pattern pattern = Pattern.rupile(patternStr);
   Matcher matcher = pattern.matcher(inputStr);
   StringBuffer buf = new StringBuffer();
   boolean found = false;
   while ((found = matcher.find())) {
     String replaceStr = matcher.group();
     matcher.appendReplacement(buf, "found<" + replaceStr + ">");
   }
   matcher.appendTail(buf);
   String result = buf.toString();
   System.out.println(result);
 }

} //found<ab12> cd found<efg34> asdf 123


 </source>
   
  
 
  



Use replaceAll() to ignore case when replacing one substring with another

   <source lang="java">
 

public class Main {

 public static void main(String[] argv) throws Exception {
   String str = "Abc abc";
   String result = str.replaceAll("(?i)abc", "DEF");
   System.out.println("After replacement:\n" + "   " + result);
 }

} /* After replacement:

  DEF DEF
  • /


 </source>