Java Tutorial/Regular Expressions/Introduction

Материал из Java эксперт
Перейти к: навигация, поиск

A negative behind ahead

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NegativeLookBehindExample {
  public static void main(String args[]) throws Exception {
    String regex = "(and)";
    Pattern pattern = Pattern.rupile(regex);
    String candidate = "candidate";
    Matcher matcher = pattern.matcher(candidate);
    String msg = "";
    int counter = 0;
    String tmp = null;
    while (matcher.find()) {
      int start = matcher.start();
      int end = matcher.end();
      tmp = ":" + matcher.group() + ":";
      msg += tmp;
      System.out.println("counter = " + counter);
      counter++;
      System.out.println("start = " + start);
      System.out.println("end = " + end);
      System.out.println("tmp = " + tmp);
      System.out.println("candidate.length() = " + candidate.length() + "\n");
    }
  }
}
/**/



counter = 0
start = 1
end = 4
tmp = :and:
candidate.length() = 9


A negative look ahead

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainClass {
  public static void main(String args[]) throws Exception {
    // define the pattern
    String regex = "First (?!Second)[A-Z]\\w+";
    // compile the pattern
    Pattern pattern = Pattern.rupile(regex);
    String candidate = "First Second asdf ";
    candidate += "John third, John second asdf, ";
    candidate += "John Fourth.";
    Matcher matcher = pattern.matcher(candidate);
    String tmp = null;
    while (matcher.find()) {
      tmp = matcher.group();
      System.out.println("MATCH:" + tmp);
    }
  }
}





A positive look ahead

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainClass {
  public static void main(String args[]) {
    String regex = "(?=^255).*";
    Pattern pattern = Pattern.rupile(regex);
    String candidate = "255.0.0.1";
    Matcher matcher = pattern.matcher(candidate);
    if (matcher.find())
      ip = matcher.group();
  }
}





A possessive qualifier

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainClass {
  public static void main(String args[]) {
    String regex = "(\\w++)(\\d\\d)(\\w+)";
    Pattern pattern = Pattern.rupile(regex);
    String candidate = "X99SuperJava";
    Matcher matcher = pattern.matcher(candidate);
    if (matcher.find()) {
      System.out.println("GROUP 0:" + matcher.group(0));
      System.out.println("GROUP 1:" + matcher.group(1));
      System.out.println("GROUP 2:" + matcher.group(2));
      System.out.println("GROUP 3:" + matcher.group(3));
    } else {
      System.out.println("NO MATCHES");
    }
    System.out.println("Done");
  }
}
/*
 */



NO MATCHES
Done


Escapes characters that have special meaning to regular expressions

/*
 * Static String formatting and query routines.
 * Copyright (C) 2001-2005 Stephen Ostermiller
 * http://ostermiller.org/contact.pl?regarding=Java+Utilities
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See COPYING.TXT for details.
 */

import java.util.HashMap;
import java.util.regex.Pattern;
/**
 * Utilities for String formatting, manipulation, and queries.
 * More information about this class is available from .
 *
 * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
 * @since ostermillerutils 1.00.00
 */
public class StringHelper {
  /**
   * Escapes characters that have special meaning to
   * regular expressions
   *
   * @param s String to be escaped
   * @return escaped String
   * @throws NullPointerException if s is null.
   *
   * @since ostermillerutils 1.02.25
   */
  public static String escapeRegularExpressionLiteral(String s){
    // According to the documentation in the Pattern class:
    //
    // The backslash character ("\") serves to introduce escaped constructs,
    // as defined in the table above, as well as to quote characters that
    // otherwise would be interpreted as unescaped constructs. Thus the
    // expression \\ matches a single backslash and \{ matches a left brace.
    //
    // It is an error to use a backslash prior to any alphabetic character
    // that does not denote an escaped construct; these are reserved for future
    // extensions to the regular-expression language. A backslash may be used
    // prior to a non-alphabetic character regardless of whether that character
    // is part of an unescaped construct.
    //
    // As a result, escape everything except [0-9a-zA-Z]
    int length = s.length();
    int newLength = length;
    // first check for characters that might
    // be dangerous and calculate a length
    // of the string that has escapes.
    for (int i=0; i<length; i++){
      char c = s.charAt(i);
      if (!((c>="0" && c<="9") || (c>="A" && c<="Z") || (c>="a" && c<="z"))){
        newLength += 1;
      }
    }
    if (length == newLength){
      // nothing to escape in the string
      return s;
    }
    StringBuffer sb = new StringBuffer(newLength);
    for (int i=0; i<length; i++){
      char c = s.charAt(i);
      if (!((c>="0" && c<="9") || (c>="A" && c<="Z") || (c>="a" && c<="z"))){
        sb.append("\\");
      }
      sb.append(c);
    }
    return sb.toString();
  }
}





Finding all words that start with an "a"

Define the matching pattern as a word boundry, a lowercase a, any number of immedietly trailing letters numbers, or underscores, followed by a word boundary



import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainClass {
  public static void main(String args[]) throws Exception {
    String candidate = "applying a pattern.";
    String regex = "\\ba\\w*\\b";
    Pattern p = Pattern.rupile(regex);
    Matcher m = p.matcher(candidate);
    String val = null;
    System.out.println("INPUT: " + candidate);
    System.out.println("REGEX: " + regex + "\r\n");
    while (m.find()) {
      val = m.group();
      System.out.println("MATCH: " + val);
    }
    if (val == null) {
      System.out.println("NO MATCHES: ");
    }
  }
}





Find the starting point of the second "Bond"

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainClass {
  public static void main(String args[]) {
    String candidateString = "My name is Bond. James Bond.";
    String matchHelper[] = { "          ^", "                      ^" };
    Pattern p = Pattern.rupile("Bond");
    Matcher matcher = p.matcher(candidateString);
    // Find the starting point of the second "Bond"
    matcher.find();
    int nextIndex = matcher.start();
    System.out.println(candidateString);
    System.out.println(matchHelper[1] + nextIndex);
  }
}





Java Character Class

Character ClassMatches\p{javaLowerCase}Everything that Character.isLowerCase() matches\p{javaUpperCase}Everything that Character.isUpperCase() matches\p{javaWhitespace}Everything that Character.isWhitespace() matches\p{javaMirrored}Everything that Character.isMirrored() matches


Match a particular character a specified number of times.

There are two general ways the repetition operators work.

One class of operators is greedy, that is, they match as much as they can, until the end.

The other class is reluctant (or lazy), and matches only to the first chance they can terminate.

For example, the regular expression .*; matches any number of characters up to the last semicolon it finds.

To only match up to the first semicolon, the reluctant version .*?; must be used.

Greedy OperatorDescriptionX?Matches X zero or one timeX*Matches X zero or more timesX+Matches X one or more timesX{n}Matches X exactly n times, where n is any numberX{n,}Matches X at least n timesX{n,m}Matches X at least n, but no more than m times Reluctant (Lazy) OperatorDescriptionX??Matches X zero or one timeX*?Matches X zero or more timesX+?Matches X one or more timesX{n}?Matches X exactly n times, where n is any numberX{n,}?Matches X at least n timesX{n,m}?Matches X at least n, but no more than m times


Match Java source file and file and class name

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegExpExample {
  public static void main(String args[]) {
    String unadornedClassRE = "^\\s*class (\\w+)";
    String doubleIdentifierRE = "\\b(\\w+)\\s+\\1\\b";
    Pattern classPattern = Pattern.rupile(unadornedClassRE);
    Pattern doublePattern = Pattern.rupile(doubleIdentifierRE);
    Matcher classMatcher, doubleMatcher;
    String line = " class MainClass";
    classMatcher = classPattern.matcher(line);
    doubleMatcher = doublePattern.matcher(line);
    if (classMatcher.find()) {
      System.out.println("The class [" + classMatcher.group(1) + "] is not public");
    }
    while (doubleMatcher.find()) {
      System.out.println("The word \"" + doubleMatcher.group(1) + "\" occurs twice at position "
          + doubleMatcher.start());
    }
  }
}





Meta-characters predefined to match specific characters.

Meta-CharacterMatches\\A single backslash\0nAn octal value describing a character, where n is a number such that 0 <= n <= 7\0mnnThe character with octal value 0mnn (0 <= m <= 3, 0 <= n <= 7)\0nnThe character with octal value 0nn (0 <= n <= 7)\0xhhThe character with hexadecimal value hh (where 0 <= h <= F)\uhhhhThe character with hexadecimal value hhhh (where 0 <= h <= F)\tA tab (character "\u0009")\nA newline (linefeed) ("\u000A")\rA carriage-return ("\u000D")\fA form-feed ("\u000C")\aA bell/beep character ("\u0007")\eAn escape character ("\u001B")\cxThe control character corresponding to x, such as \cc is control-c.Any single character


Meta-characters to match against certain string boundaries.

Meta-CharacterMatches^Beginning of the line$End of the line\bA word boundary\BA non-word boundary\AThe beginning of the input\GThe end of the previous match\ZThe end of the input before any line terminators (such as carriage-return or linefeed)\zThe end of the input


Pattern helper

/*
 * Static String formatting and query routines.
 * Copyright (C) 2001-2005 Stephen Ostermiller
 * http://ostermiller.org/contact.pl?regarding=Java+Utilities
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See COPYING.TXT for details.
 */

import java.util.HashMap;
import java.util.regex.Pattern;
/**
 * Utilities for String formatting, manipulation, and queries.
 * More information about this class is available from .
 *
 * @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
 * @since ostermillerutils 1.00.00
 */
public class StringHelper {

  /**
   * Build a regular expression that is each of the terms or"d together.
   *
   * @param terms a list of search terms.
   * @param sb place to build the regular expression.
   * @throws IllegalArgumentException if the length of terms is zero.
   *
   * @since ostermillerutils 1.02.25
   */
  private static void buildFindAnyPattern(String[] terms, StringBuffer sb){
    if (terms.length == 0) throw new IllegalArgumentException("There must be at least one term to find.");
    sb.append("(?:");
    for (int i=0; i<terms.length; i++){
      if (i>0) sb.append("|");
      sb.append("(?:");
      sb.append(escapeRegularExpressionLiteral(terms[i]));
      sb.append(")");
    }
    sb.append(")");
  }
  /**
   * Compile a pattern that can will match a string if the string
   * contains any of the given terms.
   * 
   * Usage:<br>
   * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static Pattern getContainsAnyPattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?s).*");
    buildFindAnyPattern(terms, sb);
    sb.append(".*");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * equals any of the given terms.
   * 
   * Usage:<br>
   * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
   public static Pattern getEqualsAnyPattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?s)\\A");
    buildFindAnyPattern(terms, sb);
    sb.append("\\z");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * starts with any of the given terms.
   * 
   * Usage:<br>
   * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
   public static Pattern getStartsWithAnyPattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?s)\\A");
    buildFindAnyPattern(terms, sb);
    sb.append(".*");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * ends with any of the given terms.
   * 
   * Usage:<br>
   * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static Pattern getEndsWithAnyPattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?s).*");
    buildFindAnyPattern(terms, sb);
    sb.append("\\z");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * contains any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * Usage:<br>
   * <code>boolean b = getContainsAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it contains any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static Pattern getContainsAnyIgnoreCasePattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?i)(?u)(?s).*");
    buildFindAnyPattern(terms, sb);
    sb.append(".*");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * equals any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * Usage:<br>
   * <code>boolean b = getEqualsAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it equals any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
   public static Pattern getEqualsAnyIgnoreCasePattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?i)(?u)(?s)\\A");
    buildFindAnyPattern(terms, sb);
    sb.append("\\z");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * starts with any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * Usage:<br>
   * <code>boolean b = getStartsWithAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it starts with any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
   public static Pattern getStartsWithAnyIgnoreCasePattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?i)(?u)(?s)\\A");
    buildFindAnyPattern(terms, sb);
    sb.append(".*");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Compile a pattern that can will match a string if the string
   * ends with any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * Usage:<br>
   * <code>boolean b = getEndsWithAnyPattern(terms).matcher(s).matches();</code>
   * 
   * If multiple strings are matched against the same set of terms,
   * it is more efficient to reuse the pattern returned by this function.
   *
   * @param terms Array of search strings.
   * @return Compiled pattern that can be used to match a string to see if it ends with any of the terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static Pattern getEndsWithAnyIgnoreCasePattern(String[] terms){
    StringBuffer sb = new StringBuffer();
    sb.append("(?i)(?u)(?s).*");
    buildFindAnyPattern(terms, sb);
    sb.append("\\z");
    return Pattern.rupile(sb.toString());
  }
  /**
   * Tests to see if the given string contains any of the given terms.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getContainsAnyPattern(String[])
   *
   * @param s String that may contain any of the given terms.
   * @param terms list of substrings that may be contained in the given string.
   * @return true iff one of the terms is a substring of the given string.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean containsAny(String s, String[] terms){
    return getContainsAnyPattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string equals any of the given terms.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getEqualsAnyPattern(String[])
   *
   * @param s String that may equal any of the given terms.
   * @param terms list of strings that may equal the given string.
   * @return true iff one of the terms is equal to the given string.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean equalsAny(String s, String[] terms){
    return getEqualsAnyPattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string starts with any of the given terms.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getStartsWithAnyPattern(String[])
   *
   * @param s String that may start with any of the given terms.
   * @param terms list of strings that may start with the given string.
   * @return true iff the given string starts with one of the given terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean startsWithAny(String s, String[] terms){
    return getStartsWithAnyPattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string ends with any of the given terms.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getEndsWithAnyPattern(String[])
   *
   * @param s String that may end with any of the given terms.
   * @param terms list of strings that may end with the given string.
   * @return true iff the given string ends with one of the given terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean endsWithAny(String s, String[] terms){
    return getEndsWithAnyPattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string contains any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getContainsAnyIgnoreCasePattern(String[])
   *
   * @param s String that may contain any of the given terms.
   * @param terms list of substrings that may be contained in the given string.
   * @return true iff one of the terms is a substring of the given string.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean containsAnyIgnoreCase(String s, String[] terms){
    return getContainsAnyIgnoreCasePattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string equals any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getEqualsAnyIgnoreCasePattern(String[])
   *
   * @param s String that may equal any of the given terms.
   * @param terms list of strings that may equal the given string.
   * @return true iff one of the terms is equal to the given string.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean equalsAnyIgnoreCase(String s, String[] terms){
    return getEqualsAnyIgnoreCasePattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string starts with any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getStartsWithAnyIgnoreCasePattern(String[])
   *
   * @param s String that may start with any of the given terms.
   * @param terms list of strings that may start with the given string.
   * @return true iff the given string starts with one of the given terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean startsWithAnyIgnoreCase(String s, String[] terms){
    return getStartsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
  }
  /**
   * Tests to see if the given string ends with any of the given terms.
   * 
   * Case is ignored when matching using Unicode case rules.
   * 
   * This implementation is more efficient than the brute force approach
   * of testing the string against each of the terms.  It instead compiles
   * a single regular expression that can test all the terms at once, and
   * uses that expression against the string.
   * 
   * This is a convenience method.  If multiple strings are tested against
   * the same set of terms, it is more efficient not to compile the regular
   * expression multiple times.
   * @see #getEndsWithAnyIgnoreCasePattern(String[])
   *
   * @param s String that may end with any of the given terms.
   * @param terms list of strings that may end with the given string.
   * @return true iff the given string ends with one of the given terms.
   *
   * @since ostermillerutils 1.02.25
   */
  public static boolean endsWithAnyIgnoreCase(String s, String[] terms){
    return getEndsWithAnyIgnoreCasePattern(terms).matcher(s).matches();
  }
  /**
   * Escapes characters that have special meaning to
   * regular expressions
   *
   * @param s String to be escaped
   * @return escaped String
   * @throws NullPointerException if s is null.
   *
   * @since ostermillerutils 1.02.25
   */
  public static String escapeRegularExpressionLiteral(String s){
    // According to the documentation in the Pattern class:
    //
    // The backslash character ("\") serves to introduce escaped constructs,
    // as defined in the table above, as well as to quote characters that
    // otherwise would be interpreted as unescaped constructs. Thus the
    // expression \\ matches a single backslash and \{ matches a left brace.
    //
    // It is an error to use a backslash prior to any alphabetic character
    // that does not denote an escaped construct; these are reserved for future
    // extensions to the regular-expression language. A backslash may be used
    // prior to a non-alphabetic character regardless of whether that character
    // is part of an unescaped construct.
    //
    // As a result, escape everything except [0-9a-zA-Z]
    int length = s.length();
    int newLength = length;
    // first check for characters that might
    // be dangerous and calculate a length
    // of the string that has escapes.
    for (int i=0; i<length; i++){
      char c = s.charAt(i);
      if (!((c>="0" && c<="9") || (c>="A" && c<="Z") || (c>="a" && c<="z"))){
        newLength += 1;
      }
    }
    if (length == newLength){
      // nothing to escape in the string
      return s;
    }
    StringBuffer sb = new StringBuffer(newLength);
    for (int i=0; i<length; i++){
      char c = s.charAt(i);
      if (!((c>="0" && c<="9") || (c>="A" && c<="Z") || (c>="a" && c<="z"))){
        sb.append("\\");
      }
      sb.append(c);
    }
    return sb.toString();
  }
}





POSIX character classes and Java character classes

Character Class Meta-CharacterMatches\p{Lower}Lowercase letter [a-z]\p{Upper}Uppercase letter [A-Z]\p{ASCII}All ASCII [\x00-\x7F]\p{Alpha}Any lowercase or uppercase letter\p{Digit}A digit [0�9]\p{Alnum}Any letter or digit\p{Punct}Punctuation [!"#$%&"()*+,-./:;<=>?@[\]^_`{|}~]\p{Graph}A visible character: any letter, digit, or punctuation\p{Print}A printable character; same as \p{Graph}\p{Blank}A space or tab [ \t]\p{Cntrl}A control character [\x00-\x1F\x7F]\p{XDigit}Hexadecimal digit [0�9a�fA�F]\p{Space}A whitespace character [ \t\n\x0B\f\r]


Read regular expression from console

/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.io.Console;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class RegexTestHarness2 {
  public static void main(String[] args) {
    Pattern pattern = null;
    Matcher matcher = null;
    Console console = System.console();
    if (console == null) {
      System.err.println("No console.");
      System.exit(1);
    }
    while (true) {
      try {
        pattern = Pattern.rupile(console.readLine("%nEnter your regex: "));
        matcher = pattern.matcher(console
            .readLine("Enter input string to search: "));
      } catch (PatternSyntaxException pse) {
        console.format("There is a problem with the regular expression!%n");
        console.format("The pattern in question is: %s%n", pse.getPattern());
        console.format("The description is: %s%n", pse.getDescription());
        console.format("The message is: %s%n", pse.getMessage());
        console.format("The index is: %s%n", pse.getIndex());
        System.exit(0);
      }
      boolean found = false;
      while (matcher.find()) {
        console.format("I found the text \"%s\" starting at "
            + "index %d and ending at index %d.%n", matcher.group(), matcher
            .start(), matcher.end());
        found = true;
      }
      if (!found) {
        console.format("No match found.%n");
      }
    }
  }
}





Regex Test Harness

/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.io.Console;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexTestHarness {
  public static void main(String[] args) {
    Console console = System.console();
    if (console == null) {
      System.err.println("No console.");
      System.exit(1);
    }
    while (true) {
      Pattern pattern = Pattern.rupile(console
          .readLine("%nEnter your regex: "));
      Matcher matcher = pattern.matcher(console
          .readLine("Enter input string to search: "));
      boolean found = false;
      while (matcher.find()) {
        console.format("I found the text \"%s\" starting at "
            + "index %d and ending at index %d.%n", matcher.group(), matcher
            .start(), matcher.end());
        found = true;
      }
      if (!found) {
        console.format("No match found.%n");
      }
    }
  }
}





Regular expression languages also have character classes.

Character classes specify a list of possible characters that can match any single character in the string you want to match.

  1. Using the expression [^012], any single digit except for 0, 1, and 2 is matched.
  2. You can specify character ranges using the dash.
  3. The character class [a�z] matches any single lowercase letter.
  4. [^a�z] matches any character except a lowercase letter.
  5. [0�9] to match a single digit.
  6. [0�3] to match a 0, 1, 2, or 3.
  7. [a�zA�Z] to match any single letter.

Character Class Meta-CharacterMatches.Any single character\dA digit [0�9]\DA nondigit [^0�9]\sA whitespace character [ \t\n\x0B\f\r]\SA nonwhitespace character [^\s]\wA word character [a�zA�Z_0�9]\WA nonword character [^\w]


Simple validation using the Pattern and Matcher objects

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class MainClass {
  public static void main(String args[]) {
    Pattern p = null;
    try {
      p = Pattern.rupile("Java \\d");
    } catch (PatternSyntaxException pex) {
      pex.printStackTrace();
      System.exit(0);
    }
    String candidate = "Java 4";
    Matcher m = p.matcher(candidate);
    if (m != null)
      System.out.println(m.find());
  }
}