Java/Regular Expressions/String Operation

Материал из Java эксперт
Перейти к: навигация, поиск

Содержание

Apply proper uppercase and lowercase on a String

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] args) {
    String str = "this is a test";
    StringBuffer sb = new StringBuffer();
    Matcher m = Pattern.rupile("([a-z])([a-z]*)", Pattern.CASE_INSENSITIVE).matcher(str);
    while (m.find()) {
      m.appendReplacement(sb, m.group(1).toUpperCase() + m.group(2).toLowerCase());
    }
    System.out.println(m.appendTail(sb).toString()); 
  }
}
//This Is A Test





Calculating Word Frequencies with Regular Expressions

   
import java.io.FileInputStream;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WordCount {
  public static void main(String args[]) throws Exception {
    String filename = "WordCount.java";
    // Map File from filename to byte buffer
    FileInputStream input = new FileInputStream(filename);
    FileChannel channel = input.getChannel();
    int fileLength = (int) channel.size();
    MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0,
        fileLength);
    // Convert to character buffer
    Charset charset = Charset.forName("ISO-8859-1");
    CharsetDecoder decoder = charset.newDecoder();
    CharBuffer charBuffer = decoder.decode(buffer);
    // Create line pattern
    Pattern linePattern = Pattern.rupile(".*$", Pattern.MULTILINE);
    // Create word pattern
    Pattern wordBreakPattern = Pattern.rupile("[\\p{Punct}\\s}]");
    // Match line pattern to buffer
    Matcher lineMatcher = linePattern.matcher(charBuffer);
    Map map = new TreeMap();
    Integer ONE = new Integer(1);
    // For each line
    while (lineMatcher.find()) {
      // Get line
      CharSequence line = lineMatcher.group();
      // Get array of words on line
      String words[] = wordBreakPattern.split(line);
      // For each word
      for (int i = 0, n = words.length; i < n; i++) {
        if (words[i].length() > 0) {
          Integer frequency = (Integer) map.get(words[i]);
          if (frequency == null) {
            frequency = ONE;
          } else {
            int value = frequency.intValue();
            frequency = new Integer(value + 1);
          }
          map.put(words[i], frequency);
        }
      }
    }
    System.out.println(map);
  }
}





Create a string search and replace using regex

  
import java.util.regex.Pattern;
import java.util.regex.Matcher;
public class Main {
  public static void main(String[] args) {
    String source = "The quick brown fox jumps over the brown lazy dog.";
    String find = "brown";
    String replace = "red";
    Pattern pattern = Pattern.rupile(find);
    Matcher matcher = pattern.matcher(source);
    String output = matcher.replaceAll(replace);
    System.out.println("Source = " + source);
    System.out.println("Output = " + output);
  }
}





Determining If a String Matches a Pattern Exactly

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    // Compile regular expression
    String patternStr = "b";
    Pattern pattern = Pattern.rupile(patternStr);
    // Determine if there is an exact match
    CharSequence inputStr = "a b c";
    Matcher matcher = pattern.matcher(inputStr);
    boolean matchFound = matcher.matches(); 
    // Try a different input
    matcher.reset("b");
    matchFound = matcher.matches(); 
    // Determine if pattern matches beginning of input
    matchFound = matcher.lookingAt();
  }
}





Extract a substring by matching a regular expression.

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String args[]) {
    Pattern pat = Pattern.rupile("\\b\\w+@XYZ\\.ru\\b");
    Matcher mat = pat.matcher("t@XYZ.ru\n" + "a@XYZ.ru\n"
        + "n@XYZ.ru");
    while (mat.find())
      System.out.println("Match: " + mat.group());
  }
}
/*
Match: t@XYZ.ru
Match: a@XYZ.ru
Match: n@XYZ.ru
*/





Get all digits from a string

   
public class Main {
  public static void main(String[] argv) throws Exception {
    System.out.println("abasdfasdf1 2wasdfasdf9_8asdfasdfz asdfasdfyx7".replaceAll("\\D", ""));
  }
}





Get First Found regex

  
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
  
  public static String getFirstFound(String contents, String regex) {
    List<String> founds = getFound(contents, regex);
    if (isEmpty(founds)) {
        return null;
    }
    return founds.get(0);
}
  public static List<String> getFound(String contents, String regex) {
      if (isEmpty(regex) || isEmpty(contents)) {
          return null;
      }
      List<String> results = new ArrayList<String>();
      Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
      Matcher matcher = pattern.matcher(contents);
      
      while (matcher.find()) {
          if (matcher.groupCount() > 0) {
              results.add(matcher.group(1));
          } else {
              results.add(matcher.group());
          }
      }
      return results;
  } 
  public static boolean isEmpty(List<String> list) {
    if (list == null || list.size() == 0) {
        return true;
    }
    if (list.size() == 1 && isEmpty(list.get(0))) {
        return true;
    }
    return false;
}
  public static boolean isEmpty(String str) {
      if (str != null && str.trim().length() > 0) {
          return false;
      }
      return true;
  }
}





Get First Not Empty String in a String list

  
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
  public static String getFirstNotEmpty(List<String> list) {
    if (isEmpty(list)) {
      return null;
    }
    for (String item : list) {
      if (!isEmpty(item)) {
        return item;
      }
    }
    return null;
  }
  public static List<String> getFound(String contents, String regex) {
    if (isEmpty(regex) || isEmpty(contents)) {
      return null;
    }
    List<String> results = new ArrayList<String>();
    Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
    Matcher matcher = pattern.matcher(contents);
    while (matcher.find()) {
      if (matcher.groupCount() > 0) {
        results.add(matcher.group(1));
      } else {
        results.add(matcher.group());
      }
    }
    return results;
  }

  public static boolean isEmpty(List<String> list) {
    if (list == null || list.size() == 0) {
      return true;
    }
    if (list.size() == 1 && isEmpty(list.get(0))) {
      return true;
    }
    return false;
  }
  public static boolean isEmpty(String str) {
    if (str != null && str.trim().length() > 0) {
      return false;
    }
    return true;
  }
}





Get Found regex

  
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
  
  
  public static List<String> getFound(String contents, String regex) {
      if (isEmpty(regex) || isEmpty(contents)) {
          return null;
      }
      List<String> results = new ArrayList<String>();
      Pattern pattern = Pattern.rupile(regex, Pattern.UNICODE_CASE);
      Matcher matcher = pattern.matcher(contents);
      
      while (matcher.find()) {
          if (matcher.groupCount() > 0) {
              results.add(matcher.group(1));
          } else {
              results.add(matcher.group());
          }
      }
      return results;
  } 
  public static boolean isEmpty(String str) {
      if (str != null && str.trim().length() > 0) {
          return false;
      }
      return true;
  }
}





Ignore case differences when searching for or replacing substrings.

  
public class Main {
  public static void main(String args[]) {
    String str = "This is a TEST.";
    // Use matches() to find any version of test.
    if (str.matches("(?i).*test.*"))
      System.out.println("test is in the string.");
  }
}





Java Regular Expression :split 2

   
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
 * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * -Redistribution of source code must retain the above copyright notice, this
 *  list of conditions and the following disclaimer.
 *
 * -Redistribution in binary form must reproduce the above copyright notice,
 *  this list of conditions and the following disclaimer in the documentation
 *  and/or other materials provided with the distribution.
 *
 * Neither the name of Sun Microsystems, Inc. or the names of contributors may
 * be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * This software is provided "AS IS," without a warranty of any kind. ALL
 * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
 * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
 * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
 * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
 * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
 * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
 * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
 * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
 * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
 * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 *
 * You acknowledge that this software is not designed, licensed or intended
 * for use in the design, construction, operation or maintenance of any
 * nuclear facility.
 */
import java.util.regex.Pattern;
public final class SplitTest2 {
  private static String REGEX = "\\d";
  private static String INPUT = "one9two4three7four1five";
  public static void main(String[] argv) {
    Pattern p = Pattern.rupile(REGEX);
    String[] items = p.split(INPUT);
    for (int i = 0; i < items.length; i++) {
      System.out.println(items[i]);
    }
  }
}





Java Regular Expression : Split text

   
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
 * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * -Redistribution of source code must retain the above copyright notice, this
 *  list of conditions and the following disclaimer.
 *
 * -Redistribution in binary form must reproduce the above copyright notice,
 *  this list of conditions and the following disclaimer in the documentation
 *  and/or other materials provided with the distribution.
 *
 * Neither the name of Sun Microsystems, Inc. or the names of contributors may
 * be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * This software is provided "AS IS," without a warranty of any kind. ALL
 * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
 * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
 * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
 * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
 * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
 * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
 * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
 * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
 * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
 * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 *
 * You acknowledge that this software is not designed, licensed or intended
 * for use in the design, construction, operation or maintenance of any
 * nuclear facility.
 */
import java.util.regex.Pattern;
public final class SplitTest {
  private static String REGEX = ":";
  private static String INPUT = "one:two:three:four:five";
  public static void main(String[] argv) {
    Pattern p = Pattern.rupile(REGEX);
    String[] items = p.split(INPUT);
    for (int i = 0; i < items.length; i++) {
      System.out.println(items[i]);
    }
  }
}





Match punct

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    Pattern punct = Pattern.rupile("\\G\\p{Punct}");
    Matcher mat = punct.matcher("this is a test 999");
    System.out.println(mat.find());
  }
}
//false





Match space

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    Pattern space = Pattern.rupile("\\G\\s");
    Matcher mat = space.matcher("this is a test 999");
    System.out.println(mat.find());
  }
}





Match string ends

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    Pattern end = Pattern.rupile("\\G\\z");
    Matcher mat = end.matcher("this is a test 999");
    if (mat.find())
      System.out.println(mat.group());
  }
}





Match words

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    Pattern word = Pattern.rupile("\\G\\w+");
    Matcher mat = word.matcher("this is a test 999");
    System.out.println(mat.find());
  }
}
//true





Parse an Apache log file with StringTokenizer

   
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.*;
/**
 * Parse an Apache log file with StringTokenizer
 */
public class LogStrTok implements LogExample {
  public static void main(String argv[]) {
    StringTokenizer matcher = new StringTokenizer(logEntryLine);
    System.out.println("tokens = " + matcher.countTokens());
    // StringTokenizer CAN NOT count if you are changing the delimiter!
    // if (matcher.countTokens() != NUM_FIELDS) {
    //   System.err.println("Bad log entry (or bug in StringTokenizer?):");
    //   System.err.println(logEntryLine);
    // }
    System.out.println("Hostname: " + matcher.nextToken());
    // StringTokenizer makes you ask for tokens in order to skip them:
    matcher.nextToken(); // eat the "-"
    matcher.nextToken(); // again
    System.out.println("Date/Time: " + matcher.nextToken("]"));
    //matcher.nextToken(" "); // again
    System.out.println("Request: " + matcher.nextToken("\""));
    matcher.nextToken(" "); // again
    System.out.println("Response: " + matcher.nextToken());
    System.out.println("ByteCount: " + matcher.nextToken());
    System.out.println("Referer: " + matcher.nextToken("\""));
    matcher.nextToken(" "); // again
    System.out.println("User-Agent: " + matcher.nextToken("\""));
  }
}
/**
 * Common fields for Apache Log demo.
 */
interface LogExample {
  /** The number of fields that must be found. */
  public static final int NUM_FIELDS = 9;
  /** The sample log entry to be parsed. */
  public static final String logEntryLine = "123.45.67.89 - - [27/Oct/2000:09:27:09 -0400] \"GET /java/javaResources.html HTTP/1.0\" 200 10450 \"-\" \"Mozilla/4.6 [en] (X11; U; OpenBSD 2.8 i386; Nav)\"";
}





Print all the strings that match a given pattern from a file

   
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.*;
import java.io.*;
/**
 * Print all the strings that match a given pattern from a file.
 */
public class ReaderIter {
  public static void main(String[] args) throws IOException {
    // The RE pattern
    Pattern patt = Pattern.rupile("[A-Za-z][a-z]+");
    // A FileReader (see the I/O chapter)
    BufferedReader r = new BufferedReader(new FileReader("ReaderIter.java"));
    // For each line of input, try matching in it.
    String line;
    while ((line = r.readLine()) != null) {
      // For each match in the line, extract and print it.
      Matcher m = patt.matcher(line);
      while (m.find()) {
        // Simplest method:
        // System.out.println(m.group(0));
        // Get the starting position of the text
        int start = m.start(0);
        // Get ending position
        int end = m.end(0);
        // Print whatever matched.
        // Use CharacterIterator.substring(offset, end);
        System.out.println(line.substring(start, end));
      }
    }
  }
}





Quick demo of Regular Expressions substitution

   
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.*;
/**
 * Quick demo of RE substitution: correct "demon" and other
 * spelling variants to the correct, non-satanic "daemon".
 * @author Ian F. Darwin, http://www.darwinsys.ru/
 * @version $Id: ReplaceDemo.java,v 1.5 2004/02/09 03:33:42 ian Exp $
 */
public class ReplaceDemo {
  public static void main(String[] argv) {
    // Make an RE pattern to match almost any form (deamon, demon, etc.).
    String patt = "d[ae]{1,2}mon";  // i.e., 1 or 2 "a" or "e" any combo
    // A test input.
    String input = "Unix hath demons and deamons in it!";
    System.out.println("Input: " + input);
    // Run it from a RE instance and see that it works
    Pattern r = Pattern.rupile(patt);
    Matcher m = r.matcher(input);
    System.out.println("ReplaceAll: " + m.replaceAll("daemon"));
    // Show the appendReplacement method
    m.reset();
    StringBuffer sb = new StringBuffer();
    System.out.print("Append methods: ");
    while (m.find()) {
      m.appendReplacement(sb, "daemon");  // Copy to before first match,
                        // plus the word "daemon"
    }
    m.appendTail(sb);            // copy remainder
    System.out.println(sb.toString());
  }
}





Regular Expression Replace

   
/* From http://java.sun.ru/docs/books/tutorial/index.html */
/*
 * Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * -Redistribution of source code must retain the above copyright notice, this
 *  list of conditions and the following disclaimer.
 *
 * -Redistribution in binary form must reproduce the above copyright notice,
 *  this list of conditions and the following disclaimer in the documentation
 *  and/or other materials provided with the distribution.
 *
 * Neither the name of Sun Microsystems, Inc. or the names of contributors may
 * be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * This software is provided "AS IS," without a warranty of any kind. ALL
 * EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, INCLUDING
 * ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
 * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED. SUN MIDROSYSTEMS, INC. ("SUN")
 * AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
 * AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS
 * DERIVATIVES. IN NO EVENT WILL SUN OR ITS LICENSORS BE LIABLE FOR ANY LOST
 * REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL,
 * INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND REGARDLESS OF THE THEORY
 * OF LIABILITY, ARISING OUT OF THE USE OF OR INABILITY TO USE THIS SOFTWARE,
 * EVEN IF SUN HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
 *
 * You acknowledge that this software is not designed, licensed or intended
 * for use in the design, construction, operation or maintenance of any
 * nuclear facility.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class RegexTest {
  private static String REGEX = "a*b";
  private static String INPUT = "aabfooaabfooabfoob";
  private static String REPLACE = "-";
  public static void main(String[] args) {
    Pattern p = Pattern.rupile(REGEX);
    Matcher m = p.matcher(INPUT); // get a matcher object
    StringBuffer sb = new StringBuffer();
    while (m.find()) {
      m.appendReplacement(sb, REPLACE);
    }
    m.appendTail(sb);
    System.out.println(sb.toString());
  }
}





Regular Expression Search and Replace Program

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] args) {
    CharSequence inputStr = "a b c a b c";
    String patternStr = "a";
    String replacementStr = "x";
    Pattern pattern = Pattern.rupile(patternStr);
    // Replace all occurrences of pattern in input
    Matcher matcher = pattern.matcher(inputStr);
    String output = matcher.replaceAll(replacementStr);
    System.out.println(output);
  }
}





Regular expression: Split Demo

   
// : c12:SplitDemo.java
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.util.Arrays;
import java.util.regex.Pattern;
public class SplitDemo {
  public static void main(String[] args) {
    String input = "This!!unusual use!!of exclamation!!points";
    System.out.println(Arrays.asList(Pattern.rupile("!!").split(input)));
    // Only do the first three:
    System.out
        .println(Arrays.asList(Pattern.rupile("!!").split(input, 3)));
    System.out.println(Arrays.asList("Aha! String has a split() built in!"
        .split(" ")));
  }
} ///:~





Remove trailing white space from a string

  
public class Main {
  public static void main(String[] args) {
    String text = "     a     ";
    text = text.replaceAll("\\s+$", "");
    System.out.println("Text: " + text);
  }
}





Removing Duplicate Whitespace in a String

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
  }
  public static CharSequence removeDuplicateWhitespace(CharSequence inputStr) {
    String patternStr = "\\s+";
    String replaceStr = " ";
    Pattern pattern = Pattern.rupile(patternStr);
    Matcher matcher = pattern.matcher(inputStr);
    return matcher.replaceAll(replaceStr);
  }
}





Replacing String Tokenizer

   
// : c12:ReplacingStringTokenizer.java
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.util.Arrays;
import java.util.StringTokenizer;
public class ReplacingStringTokenizer {
  public static void main(String[] args) {
    String input = "But I"m not dead yet! I feel happy!";
    StringTokenizer stoke = new StringTokenizer(input);
    while (stoke.hasMoreElements())
      System.out.println(stoke.nextToken());
    System.out.println(Arrays.asList(input.split(" ")));
  }
} ///:~





Searching and Replacing with Nonconstant Values Using a Regular Expression

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    CharSequence inputStr = "ab12 cd efg34 123";
    String patternStr = "([a-zA-Z]+[0-9]+)";
    Pattern pattern = Pattern.rupile(patternStr);
    Matcher matcher = pattern.matcher(inputStr);
    StringBuffer buf = new StringBuffer();
    boolean found = false;
    while ((found = matcher.find())) {
      String replaceStr = matcher.group();
      matcher.appendReplacement(buf, "found<" + replaceStr + ">");
    }
    matcher.appendTail(buf);
    String result = buf.toString();
    System.out.println(result);
  }
}





Simple split

   
public class Split {
  public static void main(String args[]) {
    String statement = "I will not compromise. I will not "
        + "cooperate. There will be no concession, no conciliation, no "
        + "finding the middle group, and no give and take.";
    String tokens[] = null;
    String splitPattern = "compromise|cooperate|concession|"
        + "conciliation|(finding the middle group)|(give and take)";
    tokens = statement.split(splitPattern);
    System.out.println("REGEX PATTERN:\n" + splitPattern + "\n");
    System.out.println("STATEMENT:\n" + statement + "\n");
    System.out.println("\nTOKENS");
    for (int i = 0; i < tokens.length; i++) {
      System.out.println(tokens[i]);
    }
  }
}





Split a String into a Java Array of Strings divided by an Regular Expressions

   
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.*;
/** Split a String into a Java Array of Strings divided by an RE
 */
public class Split {
  public static void main(String[] args) {
    String[] x = 
      Pattern.rupile("ian").split(
        "the darwinian devonian explodian chicken");
    for (int i=0; i<x.length; i++) {
      System.out.println(i + " \"" + x[i] + "\"");
    }
  }
}





Split the supplied content into lines, returning each line as an element in the returned list.

    
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/*
 * JBoss DNA (http://www.jboss.org/dna)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of 
* individual contributors.
 *
 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * JBoss DNA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */

/**
 * Utilities for string processing and manipulation.
 */
public class StringUtil {

  /**
   * Split the supplied content into lines, returning each line as an element in the returned list.
   * 
   * @param content the string content that is to be split
   * @return the list of lines; never null but may be an empty (unmodifiable) list if the supplied content is null or empty
   */
  public static List<String> splitLines( final String content ) {
      if (content == null || content.length() == 0) return Collections.emptyList();
      String[] lines = content.split("[\\r]?\\n");
      return Arrays.asList(lines);
  }
}





Split-up string using regular expression

  
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] args) {
    String pattern = "[,\\s]+";
    String colours = "Red,White, Blue   Green        Yellow, Orange";
    Pattern splitter = Pattern.rupile(pattern);
    String[] result = splitter.split(colours);
    for (String colour : result) {
      System.out.println("Colour = \"" + colour + "\"");
    }
  }
}





StringConvenience -- demonstrate java.lang.String convenience routine

   
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
/**
 * StringConvenience -- demonstrate java.lang.String convenience routine
 * @author Ian F. Darwin
 * @version $Id: StringConvenience.java,v 1.2 2004/02/23 02:37:34 ian Exp $
 */
public class StringConvenience {
  public static void main(String[] argv) {
    String pattern = ".*Q[^u]\\d+\\..*";
    String line = "Order QT300. Now!";
    if (line.matches(pattern)) {
      System.out.println(line + " matches \"" + pattern + "\"");
    } else {
      System.out.println("NO MATCH");
    }
  }
}





String replace

   
public class StyleSearchAndReplace {
  public static void main(String args[]) {
    String statement = "The question as to whether the jab is"
        + " superior to the cross has been debated for some time in"
        + " boxing circles. However, it is my opinion that this"
        + " false dichotomy misses the point. I call your attention"
        + " to the fact that the best boxers often use a combination of"
        + " the two. I call your attention to the fact that Mohammed"
        + " Ali,the Greatest of the sport of boxing, used both. He had"
        + " a tremendous jab, yet used his cross effectively, often,"
        + " and well";
    String newStmt = statement.replaceAll("The question as to whether",
        "Whether");
    newStmt = newStmt.replaceAll(" of the sport of boxing", "");
    newStmt = newStmt.replaceAll("amount of success", "success");
    newStmt = newStmt.replaceAll("However, it is my opinion that this",
        "This");
    newStmt = newStmt.replaceAll("a combination of the two", "both");
    newStmt = newStmt.replaceAll("This is in spite of the fact that"
        + " the", "The");
    newStmt = newStmt.replaceAll("I call your attention to the fact that",
        "");
    System.out.println("BEFORE:\n" + statement + "\n");
    System.out.println("AFTER:\n" + newStmt);
  }
}





String split

   
public class StyleSplitExample {
  public static void main(String args[]) {
    String phrase1 = "but simple justice, not charity";
    strengthenSentence(phrase1);
    String phrase2 = "but that I love Rome more, not that I love Caesar less";
    strengthenSentence(phrase2);
    String phrase3 = "ask what you can do for your country, ask not what your "
        + "country can do for you";
    strengthenSentence(phrase3);
  }
  public static String strengthenSentence(String sentence) {
    String retval = null;
    String[] tokens = null;
    String splitPattern = ",";
    tokens = sentence.split(splitPattern);
    if (tokens == null) {
      String msg = "   NO MATCH: pattern:" + sentence
          + "\r\n             regex: " + splitPattern;
    } else {
      retval = tokens[1] + ", " + tokens[0];
      System.out.println("BEFORE: " + sentence);
      System.out.println("AFTER : " + retval + "\n");
    }
    return retval;
  }
}





Strip extra spaces in a XML string

    

public class Main {
  public static void main(String[] args) {
    String xml = "<a>test 1</a>    <b>test 2</b> ";
    String out = xml.replaceAll(">\\s+<", "><");
    System.out.println(xml);
    System.out.println(out);
  }
}





Use Matcher.appendReplacement() to match [a-zA-Z]+[0-9]+

  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
  public static void main(String[] argv) throws Exception {
    CharSequence inputStr = "ab12 cd efg34 asdf 123";
    String patternStr = "([a-zA-Z]+[0-9]+)";
    Pattern pattern = Pattern.rupile(patternStr);
    Matcher matcher = pattern.matcher(inputStr);
    StringBuffer buf = new StringBuffer();
    boolean found = false;
    while ((found = matcher.find())) {
      String replaceStr = matcher.group();
      matcher.appendReplacement(buf, "found<" + replaceStr + ">");
    }
    matcher.appendTail(buf);
    String result = buf.toString();
    System.out.println(result);
  }
}
//found<ab12> cd found<efg34> asdf 123





Use replaceAll() to ignore case when replacing one substring with another

  
public class Main {
  public static void main(String[] argv) throws Exception {
    String str = "Abc abc";
    String result = str.replaceAll("(?i)abc", "DEF");
    System.out.println("After replacement:\n" + "   " + result);
  }
}
/*
After replacement:
   DEF DEF
*/