Java/Regular Expressions/Basic Regular Expressions

Материал из Java эксперт
Версия от 06:00, 1 июня 2010; Admin (обсуждение | вклад) (1 версия)
(разн.) ← Предыдущая | Текущая версия (разн.) | Следующая → (разн.)
Перейти к: навигация, поиск

Character Class Matches

  
\p{javaLowerCase}               Everything that Character.isLowerCase() matches
\p{javaUpperCase}               Everything that Character.isUpperCase() matches
\p{javaWhitespace}              Everything that Character.isWhitespace() matches
\p{javaMirrored}                Everything that Character.isMirrored() matches





Characters classes specifies a list of possible characters

  
Character Class Meta-Character            Matches
.                                         Any single character
\d                                        A digit [0-9]
\D                                        A nondigit [^0-9]
\s                                        A whitespace character [ \t\n\x0B\f\r]
\S                                        A nonwhitespace character[^\s]
\w                                        A word character  [a-zA-Z_0-9]
\W                                        A nonword character [^\w]





demonstrate Regular Expressions: Match -> group()

group()" src="http://www.jexp.ru/Code/JavaImages/REmatch.PNG">



 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * REmatch -- demonstrate RE Match -> group()
 * 
 * @author Ian F. Darwin, http://www.darwinsys.ru/
 * @version $Id: REmatch.java,v 1.5 2004/02/28 02:21:26 ian Exp $
 */
public class REmatch {
  public static void main(String[] argv) {
    String patt = "Q[^u]\\d+\\.";
    Pattern r = Pattern.rupile(patt);
    String line = "Order QT300. Now!";
    Matcher m = r.matcher(line);
    if (m.find()) {
      System.out.println(patt + " matches \"" + m.group(0) + "\" in \""
          + line + "\"");
    } else {
      System.out.println("NO MATCH");
    }
  }
}





Displays directory listing using regular expressions

 
// : c12:DirList.java
// Displays directory listing using regular expressions.
// {Args: "D.*\.java"}
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.io.File;
import java.io.FilenameFilter;
import java.util.Arrays;
import java.util.ruparator;
import java.util.regex.Pattern;
public class DirList {
  public static void main(String[] args) {
    File path = new File(".");
    String[] list;
    if (args.length == 0)
      list = path.list();
    else
      list = path.list(new DirFilter(args[0]));
    Arrays.sort(list, new AlphabeticComparator());
    for (int i = 0; i < list.length; i++)
      System.out.println(list[i]);
  }
}
class DirFilter implements FilenameFilter {
  private Pattern pattern;
  public DirFilter(String regex) {
    pattern = Pattern.rupile(regex);
  }
  public boolean accept(File dir, String name) {
    // Strip path information, search for regex:
    return pattern.matcher(new File(name).getName()).matches();
  }
} ///:~
class AlphabeticComparator implements Comparator {
  public int compare(Object o1, Object o2) {
    String s1 = (String) o1;
    String s2 = (String) o2;
    return s1.toLowerCase().rupareTo(s2.toLowerCase());
  }
} ///:~





Greedy Operator Description

  
X?                        Matches X zero or one time
X*                        Matches X zero or more times
X+                        Matches X one or more times
X{n}                      Matches X exactly n times, where n is any number
X{n,}                     Matches X at least n times
X{n,m}                    Matches X at least n, but no more than m times





Like Regular Expression Demo in a TextField

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.awt.BorderLayout;
import java.awt.Container;
import javax.swing.JFrame;
import javax.swing.JTextArea;
/* Like REDemo but shows the groups in a TextField
 */
public class REDemo2 extends REDemo {
  JTextArea logTextArea;
  
  /** "main program" method - construct and show */
  public static void main(String[] av) {
    JFrame f = new JFrame("REDemo2");
    f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    REDemo2 comp = new REDemo2();
    Container cp = f.getContentPane();
    cp.add(comp, BorderLayout.NORTH);
    cp.add(comp.logTextArea, BorderLayout.SOUTH);
    f.pack();
    f.setVisible(true);
  }
  REDemo2() {
    super();
    logTextArea = new JTextArea(10,40);
    add(logTextArea);
  }
  protected boolean tryMatch() {
    if (pattern == null) {
      return false;
    }
    logTextArea.setText("");
    if (!super.tryMatch()) {
      return false;
    }
    int n = matcher.groupCount();
    matcher.reset(stringTF.getText());
    if (match.isSelected() && matcher.matches()) {
      logTextArea.setText(matcher.group());
      return true;
    }
    if (find.isSelected() && matcher.find()) {
      logTextArea.setText(matcher.group());
      return true;
    }
    if (findAll.isSelected()) {
      int i;
      for (i = 0; i < n; i++) {
        matcher.find();
        logTextArea.append(i + ": " + matcher.group(i) + "\n");
      }
      if (i > 0) {
        return true;
      }
    }
    setMatches(false);
    return false;
  }
}





Matcher and Pattern demo

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */

import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetParen0 {
  public static void main(String[] args) {
    Pattern myRE = Pattern.rupile("d.*ian");
    Matcher matcher = myRE
        .matcher("darwinian pterodactyls soared over the devonian space");
    matcher.lookingAt();
    String result = matcher.group(0);
    System.out.println(result);
  }
}





Matcher and Pattern demo 2

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetParen1 {
  public static void main(String[] args) {
    Pattern patt = Pattern.rupile("(\\w+)\\s(\\d+)");
    Matcher matcher = patt.matcher("Bananas 123");
    matcher.lookingAt();
    System.out.println("Name: " + matcher.group(1));
    System.out.println("Number: " + matcher.group(2));
  }
}





Match SQL string

 
/* 
 * 
 * The ObjectStyle Group Software License, version 1.1
 * ObjectStyle Group - http://objectstyle.org/
 * 
 * Copyright (c) 2002-2005, Andrei (Andrus) Adamchik and individual authors
 * of the software. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 * 
 * 3. The end-user documentation included with the redistribution, if any,
 *    must include the following acknowlegement:
 *    "This product includes software developed by independent contributors
 *    and hosted on ObjectStyle Group web site (http://objectstyle.org/)."
 *    Alternately, this acknowlegement may appear in the software itself,
 *    if and wherever such third-party acknowlegements normally appear.
 * 
 * 4. The names "ObjectStyle Group" and "Cayenne" must not be used to endorse
 *    or promote products derived from this software without prior written
 *    permission. For written permission, email
 *    "andrus at objectstyle dot org".
 * 
 * 5. Products derived from this software may not be called "ObjectStyle"
 *    or "Cayenne", nor may "ObjectStyle" or "Cayenne" appear in their
 *    names without prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED ``AS IS"" AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE OBJECTSTYLE GROUP OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * 
 * 
 * This software consists of voluntary contributions made by many
 * individuals and hosted on ObjectStyle Group web site.  For more
 * information on the ObjectStyle Group, please see
 * <http://objectstyle.org/>.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * A collection of utility methods related to regular expressions processing.
 * 
 * @since 1.2
 * @author Andrus Adamchik
 */
class RegexUtil {
    static final Pattern BACKSLASH = Pattern.rupile("\\\\");
    static final Pattern DOT = Pattern.rupile("\\.");
    /**
     * Replaces all backslashes "\" with forward slashes "/". Convenience method to
     * convert path Strings to URI format.
     */
    static String substBackslashes(String string) {
        if (string == null) {
            return null;
        }
        Matcher matcher = BACKSLASH.matcher(string);
        return matcher.find() ? matcher.replaceAll("\\/") : string;
    }
    /**
     * Returns package name for the Java class as a path separated with forward slash
     * ("/"). Method is used to lookup resources that are located in package
     * subdirectories. For example, a String "a/b/c" will be returned for class name
     * "a.b.c.ClassName".
     */
    static String getPackagePath(String className) {
        if (className == null) {
            return "";
        }
        Matcher matcher = DOT.matcher(className);
        if (matcher.find()) {
            String path = matcher.replaceAll("\\/");
            return path.substring(0, path.lastIndexOf("/"));
        }
        else {
            return "";
        }
    }
    /**
     * Converts a SQL-style pattern to a valid Perl regular expression. E.g.:
     * <p>
     * <code>"billing_%"</code> will become <code>^billing_.*$</code>
     * <p>
     * <code>"user?"</code> will become <code>^user.?$</code>
     */
    static String sqlPatternToRegex(String pattern) {
        if (pattern == null) {
            throw new NullPointerException("Null pattern.");
        }
        if (pattern.length() == 0) {
            throw new IllegalArgumentException("Empty pattern.");
        }
        StringBuffer buffer = new StringBuffer();
        // convert * into regex syntax
        // e.g. abc*x becomes ^abc.*x$
        // or abc?x becomes ^abc.?x$
        buffer.append("^");
        for (int j = 0; j < pattern.length(); j++) {
            char nextChar = pattern.charAt(j);
            if (nextChar == "%") {
                nextChar = "*";
            }
            if (nextChar == "*" || nextChar == "?") {
                buffer.append(".");
            }
            // escape special chars
            else if (nextChar == "."
                    || nextChar == "/"
                    || nextChar == "$"
                    || nextChar == "^") {
                buffer.append("\\");
            }
            buffer.append(nextChar);
        }
        buffer.append("$");
        return buffer.toString();
    }
    private RegexUtil() {
        super();
    }
}





Match the Q[^u] pattern against strings from command line

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * Match the "Q[^u] pattern against strings from command line.
 * 
 * @author Ian F. Darwin, http://www.darwinsys.ru/
 * @version $Id: RE_QnotU_Args.java,v 1.4 2004/02/09 03:33:42 ian Exp $
 */
public class RE_QnotU_Args {
  public static void main(String[] argv) {
    String patt = "^Q[^u]\\d+\\.";
    Pattern r = Pattern.rupile(patt);
      Matcher m = r.matcher("RE_QnotU_Args");
      boolean found = m.lookingAt();
      System.out.println(patt + (found ? " matches " : " doesn"t match ")
          + "RE_QnotU_Args");
  }
}





Meta-characters to match against certain string boundaries

  
Meta-Character            Matches
^                         Beginning of the line
$                         End of the line
\b                        A word boundary
\B                        A nonword boundary
\A                        The beginning of the input
\G                        The end of the previous match
\Z                        The end of the input before any line terminators (such as carriage-return or linefeed)
\z                        The end of the input





POSIX character classes and Java character classes

  
Character Class Meta-Character               Matches
\p{Lower}                                    Lowercase letter [a-z]
\p{Upper}                                    Uppercase letter [A-Z]
\p{ASCII}                                    All ASCII [\x00-\x7F]
\p{Alpha}                                    Any lowercase or uppercase letter
\p{Digit}                                    A digit [0-9]
\p{Alnum}                                    Any letter or digit
\p{Punct}                                    Punctuation [!"#$%&"()*+,-./:;<=>?@[\]^_`{|}~]
\p{Graph}                                    A visible character: any letter, digit, or punctuation
\p{Print}                                    A printable character; same as \p{Graph}
\p{Blank}                                    A space or tab [ \t]
\p{Cntrl}                                    A control character [\x00-\x1F\x7F]
\p{XDigit}                                   Hexadecimal digit  [0-9a-fA-F]
\p{Space}                                    A whitespace character [ \t\n\x0B\f\r]





Regular Expressions in Action

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * Graph of a book"s sales rank on a given bookshop site.
 * 
 * @author Ian F. Darwin, http://www.darwinsys.ru/, Java Cookbook author,
 *         originally translated fairly literally from Perl into Java.
 * @author Patrick Killelea <p@patrick.net>: original Perl version, from the 2nd
 *         edition of his book "Web Performance Tuning".
 * @version $Id: BookRank.java,v 1.8 2004/03/20 20:48:03 ian Exp $
 */
public class BookRank {
  public final static String DATA_FILE = "book.sales";
  public final static String GRAPH_FILE = "book.png";
  /** Grab the sales rank off the web page and log it. */
  public static void main(String[] args) throws Exception {
    Properties p = new Properties();
    String title = p.getProperty("title", "NO TITLE IN PROPERTIES");
    // The url must have the "isbn=" at the very end, or otherwise
    // be amenable to being string-catted to, like the default.
    String url = p.getProperty("url", "http://test.ing/test.cgi?isbn=");
    // The 10-digit ISBN for the book.
    String isbn = p.getProperty("isbn", "0000000000");
    // The RE pattern (MUST have ONE capture group for the number)
    String pattern = p.getProperty("pattern", "Rank: (\\d+)");
    // Looking for something like this in the input:
    //   <b>QuickBookShop.web Sales Rank: </b>
    //   26,252
    //   </font><br>
    Pattern r = Pattern.rupile(pattern);
    // Open the URL and get a Reader from it.
    BufferedReader is = new BufferedReader(new InputStreamReader(new URL(
        url + isbn).openStream()));
    // Read the URL looking for the rank information, as
    // a single long string, so can match RE across multi-lines.
    String input = "input from console";
    // System.out.println(input);
    // If found, append to sales data file.
    Matcher m = r.matcher(input);
    if (m.find()) {
      PrintWriter pw = new PrintWriter(new FileWriter(DATA_FILE, true));
      String date = // `date +"%m %d %H %M %S %Y"`;
      new SimpleDateFormat("MM dd hh mm ss yyyy ").format(new Date());
      // Paren 1 is the digits (and maybe ","s) that matched; remove comma
      Matcher noComma = Pattern.rupile(",").matcher(m.group(1));
      pw.println(date + noComma.replaceAll(""));
      pw.close();
    } else {
      System.err.println("WARNING: pattern `" + pattern
          + "" did not match in `" + url + isbn + ""!");
    }
    // Whether current data found or not, draw the graph, using
    // external plotting program against all historical data.
    // Could use gnuplot, R, any other math/graph program.
    // Better yet: use one of the Java plotting APIs.
    String gnuplot_cmd = "set term png\n" + "set output \"" + GRAPH_FILE
        + "\"\n" + "set xdata time\n"
        + "set ylabel \"Book sales rank\"\n" + "set bmargin 3\n"
        + "set logscale y\n" + "set yrange [1:60000] reverse\n"
        + "set timefmt \"%m %d %H %M %S %Y\"\n" + "plot \"" + DATA_FILE
        + "\" using 1:7 title \"" + title + "\" with lines\n";
    Process proc = Runtime.getRuntime().exec("/usr/local/bin/gnuplot");
    PrintWriter gp = new PrintWriter(proc.getOutputStream());
    gp.print(gnuplot_cmd);
    gp.close();
  }
}





Reluctant (Lazy) Operator Description

  
X??                              Matches X zero or one time
X*?                              Matches X zero or more times
X+?                              Matches X one or more times
X{n}?                            Matches X exactly n times, where n is any number
X{n,}?                           Matches X at least n times
X{n,m}?                          Matches X at least n, but no more than m times





Show case control using Regular Expressions class.

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
 * Show case control using RE class.
 * 
 * @author Ian F. Darwin, http://www.darwinsys.ru/
 * @version $Id: CaseMatch.java,v 1.4 2004/02/09 03:33:41 ian Exp $
 */
public class CaseMatch {
  public static void main(String[] argv) {
    String pattern = "^q[^u]\\d+\\.";
    String input = "QA777. is the next flight. It is on time.";
    Pattern reCaseInsens = Pattern.rupile(pattern,
        Pattern.CASE_INSENSITIVE);
    Pattern reCaseSens = Pattern.rupile(pattern);
    boolean found;
    Matcher m;
    m = reCaseInsens.matcher(input); // will match any case
    found = m.lookingAt(); // will match any case
    System.out.println("IGNORE_CASE match " + found);
    m = reCaseSens.matcher(input); // Get matcher w/o case-insens flag
    found = m.lookingAt(); // will match case-sensitively
    System.out.println("MATCH_NORMAL match was " + found);
  }
}





Simple example of using Regular Expressions class.

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.Pattern;
/**
 * Simple example of using RE class.
 * 
 * @author Ian F. Darwin, http://www.darwinsys.ru/
 * @version $Id: RESimple.java,v 1.4 2004/02/28 02:14:11 ian Exp $
 */
public class RESimple {
  public static void main(String[] argv) {
    String pattern = "^Q[^u]\\d+\\.";
    String input = "QA777. is the next flight. It is on time.";
    Pattern p = Pattern.rupile(pattern);
    boolean found = p.matcher(input).lookingAt();
    System.out.println(""" + pattern + """
        + (found ? " matches "" : " doesn"t match "") + input + """);
  }
}





Standalone Swing GUI application for demonstrating Regular expressions.

 
/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */

import java.awt.GridLayout;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.swing.ButtonGroup;
import javax.swing.JCheckBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JRadioButton;
import javax.swing.JTextField;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;
/**
 * Standalone Swing GUI application for demonstrating REs. <br/>TODO: Show the
 * entire match, and $1 and up as captures that matched.
 * 
 * @author Ian Darwin, http://www.darwinsys.ru/
 * @version #Id$
 */
public class REDemo extends JPanel {
  protected Pattern pattern;
  protected Matcher matcher;
  protected JTextField patternTF, stringTF;
  protected JCheckBox compiledOK;
  protected JRadioButton match, find, findAll;
  protected JTextField matchesTF;
  /** "main program" method - construct and show */
  public static void main(String[] av) {
    JFrame f = new JFrame("REDemo");
    f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    REDemo comp = new REDemo();
    f.setContentPane(comp);
    f.pack();
    f.setLocation(200, 200);
    f.setVisible(true);
  }
  /** Construct the REDemo object including its GUI */
  public REDemo() {
    super();
    JPanel top = new JPanel();
    top.add(new JLabel("Pattern:", JLabel.RIGHT));
    patternTF = new JTextField(20);
    patternTF.getDocument().addDocumentListener(new PatternListener());
    top.add(patternTF);
    top.add(new JLabel("Syntax OK?"));
    compiledOK = new JCheckBox();
    top.add(compiledOK);
    ChangeListener cl = new ChangeListener() {
      public void stateChanged(ChangeEvent ce) {
        tryMatch();
      }
    };
    JPanel switchPane = new JPanel();
    ButtonGroup bg = new ButtonGroup();
    match = new JRadioButton("Match");
    match.setSelected(true);
    match.addChangeListener(cl);
    bg.add(match);
    switchPane.add(match);
    find = new JRadioButton("Find");
    find.addChangeListener(cl);
    bg.add(find);
    switchPane.add(find);
    findAll = new JRadioButton("Find All");
    findAll.addChangeListener(cl);
    bg.add(findAll);
    switchPane.add(findAll);
    JPanel strPane = new JPanel();
    strPane.add(new JLabel("String:", JLabel.RIGHT));
    stringTF = new JTextField(20);
    stringTF.getDocument().addDocumentListener(new StringListener());
    strPane.add(stringTF);
    strPane.add(new JLabel("Matches:"));
    matchesTF = new JTextField(3);
    strPane.add(matchesTF);
    setLayout(new GridLayout(0, 1, 5, 5));
    add(top);
    add(strPane);
    add(switchPane);
  }
  protected void setMatches(boolean b) {
    if (b)
      matchesTF.setText("Yes");
    else
      matchesTF.setText("No");
  }
  protected void setMatches(int n) {
    matchesTF.setText(Integer.toString(n));
  }
  protected void tryCompile() {
    pattern = null;
    try {
      pattern = Pattern.rupile(patternTF.getText());
      matcher = pattern.matcher("");
      compiledOK.setSelected(true);
    } catch (PatternSyntaxException ex) {
      compiledOK.setSelected(false);
    }
  }
  protected boolean tryMatch() {
    if (pattern == null)
      return false;
    matcher.reset(stringTF.getText());
    if (match.isSelected() && matcher.matches()) {
      setMatches(true);
      return true;
    }
    if (find.isSelected() && matcher.find()) {
      setMatches(true);
      return true;
    }
    if (findAll.isSelected()) {
      int i = 0;
      while (matcher.find()) {
        ++i;
      }
      if (i > 0) {
        setMatches(i);
        return true;
      }
    }
    setMatches(false);
    return false;
  }
  /** Any change to the pattern tries to compile the result. */
  class PatternListener implements DocumentListener {
    public void changedUpdate(DocumentEvent ev) {
      tryCompile();
    }
    public void insertUpdate(DocumentEvent ev) {
      tryCompile();
    }
    public void removeUpdate(DocumentEvent ev) {
      tryCompile();
    }
  }
  /** Any change to the input string tries to match the result */
  class StringListener implements DocumentListener {
    public void changedUpdate(DocumentEvent ev) {
      tryMatch();
    }
    public void insertUpdate(DocumentEvent ev) {
      tryMatch();
    }
    public void removeUpdate(DocumentEvent ev) {
      tryMatch();
    }
  }
}