Java/Regular Expressions/Basic Regular Expressions

Материал из Java эксперт
Перейти к: навигация, поиск

Character Class Matches

   <source lang="java">
 

\p{javaLowerCase} Everything that Character.isLowerCase() matches \p{javaUpperCase} Everything that Character.isUpperCase() matches \p{javaWhitespace} Everything that Character.isWhitespace() matches \p{javaMirrored} Everything that Character.isMirrored() matches

 </source>
   
  
 
  



Characters classes specifies a list of possible characters

   <source lang="java">
 

Character Class Meta-Character Matches . Any single character \d A digit [0-9] \D A nondigit [^0-9] \s A whitespace character [ \t\n\x0B\f\r] \S A nonwhitespace character[^\s] \w A word character [a-zA-Z_0-9] \W A nonword character [^\w]

 </source>
   
  
 
  



demonstrate Regular Expressions: Match -> group()

group()" src="http://www.jexp.ru/Code/JavaImages/REmatch.PNG">



   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; /**

* REmatch -- demonstrate RE Match -> group()
* 
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: REmatch.java,v 1.5 2004/02/28 02:21:26 ian Exp $
*/

public class REmatch {

 public static void main(String[] argv) {
   String patt = "Q[^u]\\d+\\.";
   Pattern r = Pattern.rupile(patt);
   String line = "Order QT300. Now!";
   Matcher m = r.matcher(line);
   if (m.find()) {
     System.out.println(patt + " matches \"" + m.group(0) + "\" in \""
         + line + "\"");
   } else {
     System.out.println("NO MATCH");
   }
 }

}


 </source>
   
  
 
  



Displays directory listing using regular expressions

   <source lang="java">

// : c12:DirList.java // Displays directory listing using regular expressions. // {Args: "D.*\.java"} // From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002 // www.BruceEckel.ru. See copyright notice in CopyRight.txt. import java.io.File; import java.io.FilenameFilter; import java.util.Arrays; import java.util.ruparator; import java.util.regex.Pattern; public class DirList {

 public static void main(String[] args) {
   File path = new File(".");
   String[] list;
   if (args.length == 0)
     list = path.list();
   else
     list = path.list(new DirFilter(args[0]));
   Arrays.sort(list, new AlphabeticComparator());
   for (int i = 0; i < list.length; i++)
     System.out.println(list[i]);
 }

} class DirFilter implements FilenameFilter {

 private Pattern pattern;
 public DirFilter(String regex) {
   pattern = Pattern.rupile(regex);
 }
 public boolean accept(File dir, String name) {
   // Strip path information, search for regex:
   return pattern.matcher(new File(name).getName()).matches();
 }

} ///:~ class AlphabeticComparator implements Comparator {

 public int compare(Object o1, Object o2) {
   String s1 = (String) o1;
   String s2 = (String) o2;
   return s1.toLowerCase().rupareTo(s2.toLowerCase());
 }

} ///:~


 </source>
   
  
 
  



Greedy Operator Description

   <source lang="java">
 

X? Matches X zero or one time X* Matches X zero or more times X+ Matches X one or more times X{n} Matches X exactly n times, where n is any number X{n,} Matches X at least n times X{n,m} Matches X at least n, but no more than m times

 </source>
   
  
 
  



Like Regular Expression Demo in a TextField

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.awt.BorderLayout; import java.awt.Container; import javax.swing.JFrame; import javax.swing.JTextArea; /* Like REDemo but shows the groups in a TextField

*/

public class REDemo2 extends REDemo {

 JTextArea logTextArea;
 
 /** "main program" method - construct and show */
 public static void main(String[] av) {
   JFrame f = new JFrame("REDemo2");
   f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
   REDemo2 comp = new REDemo2();
   Container cp = f.getContentPane();
   cp.add(comp, BorderLayout.NORTH);
   cp.add(comp.logTextArea, BorderLayout.SOUTH);
   f.pack();
   f.setVisible(true);
 }
 REDemo2() {
   super();
   logTextArea = new JTextArea(10,40);
   add(logTextArea);
 }
 protected boolean tryMatch() {
   if (pattern == null) {
     return false;
   }
   logTextArea.setText("");
   if (!super.tryMatch()) {
     return false;
   }
   int n = matcher.groupCount();
   matcher.reset(stringTF.getText());
   if (match.isSelected() && matcher.matches()) {
     logTextArea.setText(matcher.group());
     return true;
   }
   if (find.isSelected() && matcher.find()) {
     logTextArea.setText(matcher.group());
     return true;
   }
   if (findAll.isSelected()) {
     int i;
     for (i = 0; i < n; i++) {
       matcher.find();
       logTextArea.append(i + ": " + matcher.group(i) + "\n");
     }
     if (i > 0) {
       return true;
     }
   }
   setMatches(false);
   return false;
 }

}


 </source>
   
  
 
  



Matcher and Pattern demo

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; public class GetParen0 {

 public static void main(String[] args) {
   Pattern myRE = Pattern.rupile("d.*ian");
   Matcher matcher = myRE
       .matcher("darwinian pterodactyls soared over the devonian space");
   matcher.lookingAt();
   String result = matcher.group(0);
   System.out.println(result);
 }

}


 </source>
   
  
 
  



Matcher and Pattern demo 2

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; public class GetParen1 {

 public static void main(String[] args) {
   Pattern patt = Pattern.rupile("(\\w+)\\s(\\d+)");
   Matcher matcher = patt.matcher("Bananas 123");
   matcher.lookingAt();
   System.out.println("Name: " + matcher.group(1));
   System.out.println("Number: " + matcher.group(2));
 }

}


 </source>
   
  
 
  



Match SQL string

   <source lang="java">

/*

* 
* The ObjectStyle Group Software License, version 1.1
* ObjectStyle Group - http://objectstyle.org/
* 
* Copyright (c) 2002-2005, Andrei (Andrus) Adamchik and individual authors
* of the software. All rights reserved.
* 
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in
*    the documentation and/or other materials provided with the
*    distribution.
* 
* 3. The end-user documentation included with the redistribution, if any,
*    must include the following acknowlegement:
*    "This product includes software developed by independent contributors
*    and hosted on ObjectStyle Group web site (http://objectstyle.org/)."
*    Alternately, this acknowlegement may appear in the software itself,
*    if and wherever such third-party acknowlegements normally appear.
* 
* 4. The names "ObjectStyle Group" and "Cayenne" must not be used to endorse
*    or promote products derived from this software without prior written
*    permission. For written permission, email
*    "andrus at objectstyle dot org".
* 
* 5. Products derived from this software may not be called "ObjectStyle"
*    or "Cayenne", nor may "ObjectStyle" or "Cayenne" appear in their
*    names without prior written permission.
* 
* THIS SOFTWARE IS PROVIDED ``AS IS"" AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED.  IN NO EVENT SHALL THE OBJECTSTYLE GROUP OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* 
* 
* This software consists of voluntary contributions made by many
* individuals and hosted on ObjectStyle Group web site.  For more
* information on the ObjectStyle Group, please see
* <http://objectstyle.org/>.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; /**

* A collection of utility methods related to regular expressions processing.
* 
* @since 1.2
* @author Andrus Adamchik
*/

class RegexUtil {

   static final Pattern BACKSLASH = Pattern.rupile("\\\\");
   static final Pattern DOT = Pattern.rupile("\\.");
   /**
    * Replaces all backslashes "\" with forward slashes "/". Convenience method to
    * convert path Strings to URI format.
    */
   static String substBackslashes(String string) {
       if (string == null) {
           return null;
       }
       Matcher matcher = BACKSLASH.matcher(string);
       return matcher.find() ? matcher.replaceAll("\\/") : string;
   }
   /**
    * Returns package name for the Java class as a path separated with forward slash
    * ("/"). Method is used to lookup resources that are located in package
    * subdirectories. For example, a String "a/b/c" will be returned for class name
    * "a.b.c.ClassName".
    */
   static String getPackagePath(String className) {
       if (className == null) {
           return "";
       }
       Matcher matcher = DOT.matcher(className);
       if (matcher.find()) {
           String path = matcher.replaceAll("\\/");
           return path.substring(0, path.lastIndexOf("/"));
       }
       else {
           return "";
       }
   }
   /**
    * Converts a SQL-style pattern to a valid Perl regular expression. E.g.:
*

* "billing_%" will become ^billing_.*$ * <p> * "user?" will become ^user.?$ */ static String sqlPatternToRegex(String pattern) { if (pattern == null) { throw new NullPointerException("Null pattern."); } if (pattern.length() == 0) { throw new IllegalArgumentException("Empty pattern."); } StringBuffer buffer = new StringBuffer(); // convert * into regex syntax // e.g. abc*x becomes ^abc.*x$ // or abc?x becomes ^abc.?x$ buffer.append("^"); for (int j = 0; j < pattern.length(); j++) { char nextChar = pattern.charAt(j); if (nextChar == "%") { nextChar = "*"; } if (nextChar == "*" || nextChar == "?") { buffer.append("."); } // escape special chars else if (nextChar == "." || nextChar == "/" || nextChar == "$" || nextChar == "^") { buffer.append("\\"); } buffer.append(nextChar); } buffer.append("$"); return buffer.toString(); } private RegexUtil() { super(); } } </source>

Match the Q[^u] pattern against strings from command line

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; /**

* Match the "Q[^u] pattern against strings from command line.
* 
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: RE_QnotU_Args.java,v 1.4 2004/02/09 03:33:42 ian Exp $
*/

public class RE_QnotU_Args {

 public static void main(String[] argv) {
   String patt = "^Q[^u]\\d+\\.";
   Pattern r = Pattern.rupile(patt);
     Matcher m = r.matcher("RE_QnotU_Args");
     boolean found = m.lookingAt();
     System.out.println(patt + (found ? " matches " : " doesn"t match ")
         + "RE_QnotU_Args");
 }

}


 </source>
   
  
 
  



Meta-characters to match against certain string boundaries

   <source lang="java">
 

Meta-Character Matches ^ Beginning of the line $ End of the line \b A word boundary \B A nonword boundary \A The beginning of the input \G The end of the previous match \Z The end of the input before any line terminators (such as carriage-return or linefeed) \z The end of the input

 </source>
   
  
 
  



POSIX character classes and Java character classes

   <source lang="java">
 

Character Class Meta-Character Matches \p{Lower} Lowercase letter [a-z] \p{Upper} Uppercase letter [A-Z] \p{ASCII} All ASCII [\x00-\x7F] \p{Alpha} Any lowercase or uppercase letter \p{Digit} A digit [0-9] \p{Alnum} Any letter or digit \p{Punct} Punctuation [!"#$%&"()*+,-./:;<=>?@[\]^_`{|}~] \p{Graph} A visible character: any letter, digit, or punctuation \p{Print} A printable character; same as \p{Graph} \p{Blank} A space or tab [ \t] \p{Cntrl} A control character [\x00-\x1F\x7F] \p{XDigit} Hexadecimal digit [0-9a-fA-F] \p{Space} A whitespace character [ \t\n\x0B\f\r]

 </source>
   
  
 
  



Regular Expressions in Action

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.io.BufferedReader; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; /**

* Graph of a book"s sales rank on a given bookshop site.
* 
* @author Ian F. Darwin, http://www.darwinsys.ru/, Java Cookbook author,
*         originally translated fairly literally from Perl into Java.
* @author Patrick Killelea <p@patrick.net>: original Perl version, from the 2nd
*         edition of his book "Web Performance Tuning".
* @version $Id: BookRank.java,v 1.8 2004/03/20 20:48:03 ian Exp $
*/

public class BookRank {

 public final static String DATA_FILE = "book.sales";
 public final static String GRAPH_FILE = "book.png";
 /** Grab the sales rank off the web page and log it. */
 public static void main(String[] args) throws Exception {
   Properties p = new Properties();
   String title = p.getProperty("title", "NO TITLE IN PROPERTIES");
   // The url must have the "isbn=" at the very end, or otherwise
   // be amenable to being string-catted to, like the default.
   String url = p.getProperty("url", "http://test.ing/test.cgi?isbn=");
   // The 10-digit ISBN for the book.
   String isbn = p.getProperty("isbn", "0000000000");
   // The RE pattern (MUST have ONE capture group for the number)
   String pattern = p.getProperty("pattern", "Rank: (\\d+)");
   // Looking for something like this in the input:
   //   QuickBookShop.web Sales Rank: 
   //   26,252
   //   </font>
Pattern r = Pattern.rupile(pattern); // Open the URL and get a Reader from it. BufferedReader is = new BufferedReader(new InputStreamReader(new URL( url + isbn).openStream())); // Read the URL looking for the rank information, as // a single long string, so can match RE across multi-lines. String input = "input from console"; // System.out.println(input); // If found, append to sales data file. Matcher m = r.matcher(input); if (m.find()) { PrintWriter pw = new PrintWriter(new FileWriter(DATA_FILE, true)); String date = // `date +"%m %d %H %M %S %Y"`; new SimpleDateFormat("MM dd hh mm ss yyyy ").format(new Date()); // Paren 1 is the digits (and maybe ","s) that matched; remove comma Matcher noComma = Pattern.rupile(",").matcher(m.group(1)); pw.println(date + noComma.replaceAll("")); pw.close(); } else { System.err.println("WARNING: pattern `" + pattern + "" did not match in `" + url + isbn + ""!"); } // Whether current data found or not, draw the graph, using // external plotting program against all historical data. // Could use gnuplot, R, any other math/graph program. // Better yet: use one of the Java plotting APIs. String gnuplot_cmd = "set term png\n" + "set output \"" + GRAPH_FILE + "\"\n" + "set xdata time\n" + "set ylabel \"Book sales rank\"\n" + "set bmargin 3\n" + "set logscale y\n" + "set yrange [1:60000] reverse\n" + "set timefmt \"%m %d %H %M %S %Y\"\n" + "plot \"" + DATA_FILE + "\" using 1:7 title \"" + title + "\" with lines\n"; Process proc = Runtime.getRuntime().exec("/usr/local/bin/gnuplot"); PrintWriter gp = new PrintWriter(proc.getOutputStream()); gp.print(gnuplot_cmd); gp.close(); }

}


 </source>
   
  
 
  



Reluctant (Lazy) Operator Description

   <source lang="java">
 

X?? Matches X zero or one time X*? Matches X zero or more times X+? Matches X one or more times X{n}? Matches X exactly n times, where n is any number X{n,}? Matches X at least n times X{n,m}? Matches X at least n, but no more than m times

 </source>
   
  
 
  



Show case control using Regular Expressions class.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Matcher; import java.util.regex.Pattern; /**

* Show case control using RE class.
* 
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: CaseMatch.java,v 1.4 2004/02/09 03:33:41 ian Exp $
*/

public class CaseMatch {

 public static void main(String[] argv) {
   String pattern = "^q[^u]\\d+\\.";
   String input = "QA777. is the next flight. It is on time.";
   Pattern reCaseInsens = Pattern.rupile(pattern,
       Pattern.CASE_INSENSITIVE);
   Pattern reCaseSens = Pattern.rupile(pattern);
   boolean found;
   Matcher m;
   m = reCaseInsens.matcher(input); // will match any case
   found = m.lookingAt(); // will match any case
   System.out.println("IGNORE_CASE match " + found);
   m = reCaseSens.matcher(input); // Get matcher w/o case-insens flag
   found = m.lookingAt(); // will match case-sensitively
   System.out.println("MATCH_NORMAL match was " + found);
 }

}


 </source>
   
  
 
  



Simple example of using Regular Expressions class.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.Pattern; /**

* Simple example of using RE class.
* 
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: RESimple.java,v 1.4 2004/02/28 02:14:11 ian Exp $
*/

public class RESimple {

 public static void main(String[] argv) {
   String pattern = "^Q[^u]\\d+\\.";
   String input = "QA777. is the next flight. It is on time.";
   Pattern p = Pattern.rupile(pattern);
   boolean found = p.matcher(input).lookingAt();
   System.out.println(""" + pattern + """
       + (found ? " matches "" : " doesn"t match "") + input + """);
 }

}


 </source>
   
  
 
  



Standalone Swing GUI application for demonstrating Regular expressions.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.awt.GridLayout; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import javax.swing.ButtonGroup; import javax.swing.JCheckBox; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.JRadioButton; import javax.swing.JTextField; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import javax.swing.event.DocumentEvent; import javax.swing.event.DocumentListener; /**

* Standalone Swing GUI application for demonstrating REs. 
TODO: Show the * entire match, and $1 and up as captures that matched. * * @author Ian Darwin, http://www.darwinsys.ru/ * @version #Id$ */

public class REDemo extends JPanel {

 protected Pattern pattern;
 protected Matcher matcher;
 protected JTextField patternTF, stringTF;
 protected JCheckBox compiledOK;
 protected JRadioButton match, find, findAll;
 protected JTextField matchesTF;
 /** "main program" method - construct and show */
 public static void main(String[] av) {
   JFrame f = new JFrame("REDemo");
   f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
   REDemo comp = new REDemo();
   f.setContentPane(comp);
   f.pack();
   f.setLocation(200, 200);
   f.setVisible(true);
 }
 /** Construct the REDemo object including its GUI */
 public REDemo() {
   super();
   JPanel top = new JPanel();
   top.add(new JLabel("Pattern:", JLabel.RIGHT));
   patternTF = new JTextField(20);
   patternTF.getDocument().addDocumentListener(new PatternListener());
   top.add(patternTF);
   top.add(new JLabel("Syntax OK?"));
   compiledOK = new JCheckBox();
   top.add(compiledOK);
   ChangeListener cl = new ChangeListener() {
     public void stateChanged(ChangeEvent ce) {
       tryMatch();
     }
   };
   JPanel switchPane = new JPanel();
   ButtonGroup bg = new ButtonGroup();
   match = new JRadioButton("Match");
   match.setSelected(true);
   match.addChangeListener(cl);
   bg.add(match);
   switchPane.add(match);
   find = new JRadioButton("Find");
   find.addChangeListener(cl);
   bg.add(find);
   switchPane.add(find);
   findAll = new JRadioButton("Find All");
   findAll.addChangeListener(cl);
   bg.add(findAll);
   switchPane.add(findAll);
   JPanel strPane = new JPanel();
   strPane.add(new JLabel("String:", JLabel.RIGHT));
   stringTF = new JTextField(20);
   stringTF.getDocument().addDocumentListener(new StringListener());
   strPane.add(stringTF);
   strPane.add(new JLabel("Matches:"));
   matchesTF = new JTextField(3);
   strPane.add(matchesTF);
   setLayout(new GridLayout(0, 1, 5, 5));
   add(top);
   add(strPane);
   add(switchPane);
 }
 protected void setMatches(boolean b) {
   if (b)
     matchesTF.setText("Yes");
   else
     matchesTF.setText("No");
 }
 protected void setMatches(int n) {
   matchesTF.setText(Integer.toString(n));
 }
 protected void tryCompile() {
   pattern = null;
   try {
     pattern = Pattern.rupile(patternTF.getText());
     matcher = pattern.matcher("");
     compiledOK.setSelected(true);
   } catch (PatternSyntaxException ex) {
     compiledOK.setSelected(false);
   }
 }
 protected boolean tryMatch() {
   if (pattern == null)
     return false;
   matcher.reset(stringTF.getText());
   if (match.isSelected() && matcher.matches()) {
     setMatches(true);
     return true;
   }
   if (find.isSelected() && matcher.find()) {
     setMatches(true);
     return true;
   }
   if (findAll.isSelected()) {
     int i = 0;
     while (matcher.find()) {
       ++i;
     }
     if (i > 0) {
       setMatches(i);
       return true;
     }
   }
   setMatches(false);
   return false;
 }
 /** Any change to the pattern tries to compile the result. */
 class PatternListener implements DocumentListener {
   public void changedUpdate(DocumentEvent ev) {
     tryCompile();
   }
   public void insertUpdate(DocumentEvent ev) {
     tryCompile();
   }
   public void removeUpdate(DocumentEvent ev) {
     tryCompile();
   }
 }
 /** Any change to the input string tries to match the result */
 class StringListener implements DocumentListener {
   public void changedUpdate(DocumentEvent ev) {
     tryMatch();
   }
   public void insertUpdate(DocumentEvent ev) {
     tryMatch();
   }
   public void removeUpdate(DocumentEvent ev) {
     tryMatch();
   }
 }

}


 </source>