Java/Regular Expressions/Basic Regular Expressions
Версия от 18:01, 31 мая 2010; (обсуждение)
Содержание
- 1 Character Class Matches
- 2 Characters classes specifies a list of possible characters
- 3 demonstrate Regular Expressions: Match -> group()
- 4 Displays directory listing using regular expressions
- 5 Greedy Operator Description
- 6 Like Regular Expression Demo in a TextField
- 7 Matcher and Pattern demo
- 8 Matcher and Pattern demo 2
- 9 Match SQL string
- 10 Match the Q[^u] pattern against strings from command line
- 11 Meta-characters to match against certain string boundaries
- 12 POSIX character classes and Java character classes
- 13 Regular Expressions in Action
- 14 Reluctant (Lazy) Operator Description
- 15 Show case control using Regular Expressions class.
- 16 Simple example of using Regular Expressions class.
- 17 Standalone Swing GUI application for demonstrating Regular expressions.
Character Class Matches
\p{javaLowerCase} Everything that Character.isLowerCase() matches
\p{javaUpperCase} Everything that Character.isUpperCase() matches
\p{javaWhitespace} Everything that Character.isWhitespace() matches
\p{javaMirrored} Everything that Character.isMirrored() matches
Characters classes specifies a list of possible characters
Character Class Meta-Character Matches
. Any single character
\d A digit [0-9]
\D A nondigit [^0-9]
\s A whitespace character [ \t\n\x0B\f\r]
\S A nonwhitespace character[^\s]
\w A word character [a-zA-Z_0-9]
\W A nonword character [^\w]
demonstrate Regular Expressions: Match -> group()
group()" src="http://www.jexp.ru/Code/JavaImages/REmatch.PNG">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* REmatch -- demonstrate RE Match -> group()
*
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: REmatch.java,v 1.5 2004/02/28 02:21:26 ian Exp $
*/
public class REmatch {
public static void main(String[] argv) {
String patt = "Q[^u]\\d+\\.";
Pattern r = Pattern.rupile(patt);
String line = "Order QT300. Now!";
Matcher m = r.matcher(line);
if (m.find()) {
System.out.println(patt + " matches \"" + m.group(0) + "\" in \""
+ line + "\"");
} else {
System.out.println("NO MATCH");
}
}
}
Displays directory listing using regular expressions
// : c12:DirList.java
// Displays directory listing using regular expressions.
// {Args: "D.*\.java"}
// From "Thinking in Java, 3rd ed." (c) Bruce Eckel 2002
// www.BruceEckel.ru. See copyright notice in CopyRight.txt.
import java.io.File;
import java.io.FilenameFilter;
import java.util.Arrays;
import java.util.ruparator;
import java.util.regex.Pattern;
public class DirList {
public static void main(String[] args) {
File path = new File(".");
String[] list;
if (args.length == 0)
list = path.list();
else
list = path.list(new DirFilter(args[0]));
Arrays.sort(list, new AlphabeticComparator());
for (int i = 0; i < list.length; i++)
System.out.println(list[i]);
}
}
class DirFilter implements FilenameFilter {
private Pattern pattern;
public DirFilter(String regex) {
pattern = Pattern.rupile(regex);
}
public boolean accept(File dir, String name) {
// Strip path information, search for regex:
return pattern.matcher(new File(name).getName()).matches();
}
} ///:~
class AlphabeticComparator implements Comparator {
public int compare(Object o1, Object o2) {
String s1 = (String) o1;
String s2 = (String) o2;
return s1.toLowerCase().rupareTo(s2.toLowerCase());
}
} ///:~
Greedy Operator Description
X? Matches X zero or one time
X* Matches X zero or more times
X+ Matches X one or more times
X{n} Matches X exactly n times, where n is any number
X{n,} Matches X at least n times
X{n,m} Matches X at least n, but no more than m times
Like Regular Expression Demo in a TextField
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.awt.BorderLayout;
import java.awt.Container;
import javax.swing.JFrame;
import javax.swing.JTextArea;
/* Like REDemo but shows the groups in a TextField
*/
public class REDemo2 extends REDemo {
JTextArea logTextArea;
/** "main program" method - construct and show */
public static void main(String[] av) {
JFrame f = new JFrame("REDemo2");
f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
REDemo2 comp = new REDemo2();
Container cp = f.getContentPane();
cp.add(comp, BorderLayout.NORTH);
cp.add(comp.logTextArea, BorderLayout.SOUTH);
f.pack();
f.setVisible(true);
}
REDemo2() {
super();
logTextArea = new JTextArea(10,40);
add(logTextArea);
}
protected boolean tryMatch() {
if (pattern == null) {
return false;
}
logTextArea.setText("");
if (!super.tryMatch()) {
return false;
}
int n = matcher.groupCount();
matcher.reset(stringTF.getText());
if (match.isSelected() && matcher.matches()) {
logTextArea.setText(matcher.group());
return true;
}
if (find.isSelected() && matcher.find()) {
logTextArea.setText(matcher.group());
return true;
}
if (findAll.isSelected()) {
int i;
for (i = 0; i < n; i++) {
matcher.find();
logTextArea.append(i + ": " + matcher.group(i) + "\n");
}
if (i > 0) {
return true;
}
}
setMatches(false);
return false;
}
}
Matcher and Pattern demo
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetParen0 {
public static void main(String[] args) {
Pattern myRE = Pattern.rupile("d.*ian");
Matcher matcher = myRE
.matcher("darwinian pterodactyls soared over the devonian space");
matcher.lookingAt();
String result = matcher.group(0);
System.out.println(result);
}
}
Matcher and Pattern demo 2
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GetParen1 {
public static void main(String[] args) {
Pattern patt = Pattern.rupile("(\\w+)\\s(\\d+)");
Matcher matcher = patt.matcher("Bananas 123");
matcher.lookingAt();
System.out.println("Name: " + matcher.group(1));
System.out.println("Number: " + matcher.group(2));
}
}
Match SQL string
/*
*
* The ObjectStyle Group Software License, version 1.1
* ObjectStyle Group - http://objectstyle.org/
*
* Copyright (c) 2002-2005, Andrei (Andrus) Adamchik and individual authors
* of the software. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution, if any,
* must include the following acknowlegement:
* "This product includes software developed by independent contributors
* and hosted on ObjectStyle Group web site (http://objectstyle.org/)."
* Alternately, this acknowlegement may appear in the software itself,
* if and wherever such third-party acknowlegements normally appear.
*
* 4. The names "ObjectStyle Group" and "Cayenne" must not be used to endorse
* or promote products derived from this software without prior written
* permission. For written permission, email
* "andrus at objectstyle dot org".
*
* 5. Products derived from this software may not be called "ObjectStyle"
* or "Cayenne", nor may "ObjectStyle" or "Cayenne" appear in their
* names without prior written permission.
*
* THIS SOFTWARE IS PROVIDED ``AS IS"" AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE OBJECTSTYLE GROUP OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* This software consists of voluntary contributions made by many
* individuals and hosted on ObjectStyle Group web site. For more
* information on the ObjectStyle Group, please see
* <http://objectstyle.org/>.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A collection of utility methods related to regular expressions processing.
*
* @since 1.2
* @author Andrus Adamchik
*/
class RegexUtil {
static final Pattern BACKSLASH = Pattern.rupile("\\\\");
static final Pattern DOT = Pattern.rupile("\\.");
/**
* Replaces all backslashes "\" with forward slashes "/". Convenience method to
* convert path Strings to URI format.
*/
static String substBackslashes(String string) {
if (string == null) {
return null;
}
Matcher matcher = BACKSLASH.matcher(string);
return matcher.find() ? matcher.replaceAll("\\/") : string;
}
/**
* Returns package name for the Java class as a path separated with forward slash
* ("/"). Method is used to lookup resources that are located in package
* subdirectories. For example, a String "a/b/c" will be returned for class name
* "a.b.c.ClassName".
*/
static String getPackagePath(String className) {
if (className == null) {
return "";
}
Matcher matcher = DOT.matcher(className);
if (matcher.find()) {
String path = matcher.replaceAll("\\/");
return path.substring(0, path.lastIndexOf("/"));
}
else {
return "";
}
}
/**
* Converts a SQL-style pattern to a valid Perl regular expression. E.g.:
* <p>
* <code>"billing_%"</code> will become <code>^billing_.*$</code>
* <p>
* <code>"user?"</code> will become <code>^user.?$</code>
*/
static String sqlPatternToRegex(String pattern) {
if (pattern == null) {
throw new NullPointerException("Null pattern.");
}
if (pattern.length() == 0) {
throw new IllegalArgumentException("Empty pattern.");
}
StringBuffer buffer = new StringBuffer();
// convert * into regex syntax
// e.g. abc*x becomes ^abc.*x$
// or abc?x becomes ^abc.?x$
buffer.append("^");
for (int j = 0; j < pattern.length(); j++) {
char nextChar = pattern.charAt(j);
if (nextChar == "%") {
nextChar = "*";
}
if (nextChar == "*" || nextChar == "?") {
buffer.append(".");
}
// escape special chars
else if (nextChar == "."
|| nextChar == "/"
|| nextChar == "$"
|| nextChar == "^") {
buffer.append("\\");
}
buffer.append(nextChar);
}
buffer.append("$");
return buffer.toString();
}
private RegexUtil() {
super();
}
}
Match the Q[^u] pattern against strings from command line
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Match the "Q[^u] pattern against strings from command line.
*
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: RE_QnotU_Args.java,v 1.4 2004/02/09 03:33:42 ian Exp $
*/
public class RE_QnotU_Args {
public static void main(String[] argv) {
String patt = "^Q[^u]\\d+\\.";
Pattern r = Pattern.rupile(patt);
Matcher m = r.matcher("RE_QnotU_Args");
boolean found = m.lookingAt();
System.out.println(patt + (found ? " matches " : " doesn"t match ")
+ "RE_QnotU_Args");
}
}
Meta-characters to match against certain string boundaries
Meta-Character Matches
^ Beginning of the line
$ End of the line
\b A word boundary
\B A nonword boundary
\A The beginning of the input
\G The end of the previous match
\Z The end of the input before any line terminators (such as carriage-return or linefeed)
\z The end of the input
POSIX character classes and Java character classes
Character Class Meta-Character Matches
\p{Lower} Lowercase letter [a-z]
\p{Upper} Uppercase letter [A-Z]
\p{ASCII} All ASCII [\x00-\x7F]
\p{Alpha} Any lowercase or uppercase letter
\p{Digit} A digit [0-9]
\p{Alnum} Any letter or digit
\p{Punct} Punctuation [!"#$%&"()*+,-./:;<=>?@[\]^_`{|}~]
\p{Graph} A visible character: any letter, digit, or punctuation
\p{Print} A printable character; same as \p{Graph}
\p{Blank} A space or tab [ \t]
\p{Cntrl} A control character [\x00-\x1F\x7F]
\p{XDigit} Hexadecimal digit [0-9a-fA-F]
\p{Space} A whitespace character [ \t\n\x0B\f\r]
Regular Expressions in Action
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Graph of a book"s sales rank on a given bookshop site.
*
* @author Ian F. Darwin, http://www.darwinsys.ru/, Java Cookbook author,
* originally translated fairly literally from Perl into Java.
* @author Patrick Killelea <p@patrick.net>: original Perl version, from the 2nd
* edition of his book "Web Performance Tuning".
* @version $Id: BookRank.java,v 1.8 2004/03/20 20:48:03 ian Exp $
*/
public class BookRank {
public final static String DATA_FILE = "book.sales";
public final static String GRAPH_FILE = "book.png";
/** Grab the sales rank off the web page and log it. */
public static void main(String[] args) throws Exception {
Properties p = new Properties();
String title = p.getProperty("title", "NO TITLE IN PROPERTIES");
// The url must have the "isbn=" at the very end, or otherwise
// be amenable to being string-catted to, like the default.
String url = p.getProperty("url", "http://test.ing/test.cgi?isbn=");
// The 10-digit ISBN for the book.
String isbn = p.getProperty("isbn", "0000000000");
// The RE pattern (MUST have ONE capture group for the number)
String pattern = p.getProperty("pattern", "Rank: (\\d+)");
// Looking for something like this in the input:
// <b>QuickBookShop.web Sales Rank: </b>
// 26,252
// </font><br>
Pattern r = Pattern.rupile(pattern);
// Open the URL and get a Reader from it.
BufferedReader is = new BufferedReader(new InputStreamReader(new URL(
url + isbn).openStream()));
// Read the URL looking for the rank information, as
// a single long string, so can match RE across multi-lines.
String input = "input from console";
// System.out.println(input);
// If found, append to sales data file.
Matcher m = r.matcher(input);
if (m.find()) {
PrintWriter pw = new PrintWriter(new FileWriter(DATA_FILE, true));
String date = // `date +"%m %d %H %M %S %Y"`;
new SimpleDateFormat("MM dd hh mm ss yyyy ").format(new Date());
// Paren 1 is the digits (and maybe ","s) that matched; remove comma
Matcher noComma = Pattern.rupile(",").matcher(m.group(1));
pw.println(date + noComma.replaceAll(""));
pw.close();
} else {
System.err.println("WARNING: pattern `" + pattern
+ "" did not match in `" + url + isbn + ""!");
}
// Whether current data found or not, draw the graph, using
// external plotting program against all historical data.
// Could use gnuplot, R, any other math/graph program.
// Better yet: use one of the Java plotting APIs.
String gnuplot_cmd = "set term png\n" + "set output \"" + GRAPH_FILE
+ "\"\n" + "set xdata time\n"
+ "set ylabel \"Book sales rank\"\n" + "set bmargin 3\n"
+ "set logscale y\n" + "set yrange [1:60000] reverse\n"
+ "set timefmt \"%m %d %H %M %S %Y\"\n" + "plot \"" + DATA_FILE
+ "\" using 1:7 title \"" + title + "\" with lines\n";
Process proc = Runtime.getRuntime().exec("/usr/local/bin/gnuplot");
PrintWriter gp = new PrintWriter(proc.getOutputStream());
gp.print(gnuplot_cmd);
gp.close();
}
}
Reluctant (Lazy) Operator Description
X?? Matches X zero or one time
X*? Matches X zero or more times
X+? Matches X one or more times
X{n}? Matches X exactly n times, where n is any number
X{n,}? Matches X at least n times
X{n,m}? Matches X at least n, but no more than m times
Show case control using Regular Expressions class.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Show case control using RE class.
*
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: CaseMatch.java,v 1.4 2004/02/09 03:33:41 ian Exp $
*/
public class CaseMatch {
public static void main(String[] argv) {
String pattern = "^q[^u]\\d+\\.";
String input = "QA777. is the next flight. It is on time.";
Pattern reCaseInsens = Pattern.rupile(pattern,
Pattern.CASE_INSENSITIVE);
Pattern reCaseSens = Pattern.rupile(pattern);
boolean found;
Matcher m;
m = reCaseInsens.matcher(input); // will match any case
found = m.lookingAt(); // will match any case
System.out.println("IGNORE_CASE match " + found);
m = reCaseSens.matcher(input); // Get matcher w/o case-insens flag
found = m.lookingAt(); // will match case-sensitively
System.out.println("MATCH_NORMAL match was " + found);
}
}
Simple example of using Regular Expressions class.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.Pattern;
/**
* Simple example of using RE class.
*
* @author Ian F. Darwin, http://www.darwinsys.ru/
* @version $Id: RESimple.java,v 1.4 2004/02/28 02:14:11 ian Exp $
*/
public class RESimple {
public static void main(String[] argv) {
String pattern = "^Q[^u]\\d+\\.";
String input = "QA777. is the next flight. It is on time.";
Pattern p = Pattern.rupile(pattern);
boolean found = p.matcher(input).lookingAt();
System.out.println(""" + pattern + """
+ (found ? " matches "" : " doesn"t match "") + input + """);
}
}
Standalone Swing GUI application for demonstrating Regular expressions.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.awt.GridLayout;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.swing.ButtonGroup;
import javax.swing.JCheckBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JRadioButton;
import javax.swing.JTextField;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import javax.swing.event.DocumentEvent;
import javax.swing.event.DocumentListener;
/**
* Standalone Swing GUI application for demonstrating REs. <br/>TODO: Show the
* entire match, and $1 and up as captures that matched.
*
* @author Ian Darwin, http://www.darwinsys.ru/
* @version #Id$
*/
public class REDemo extends JPanel {
protected Pattern pattern;
protected Matcher matcher;
protected JTextField patternTF, stringTF;
protected JCheckBox compiledOK;
protected JRadioButton match, find, findAll;
protected JTextField matchesTF;
/** "main program" method - construct and show */
public static void main(String[] av) {
JFrame f = new JFrame("REDemo");
f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
REDemo comp = new REDemo();
f.setContentPane(comp);
f.pack();
f.setLocation(200, 200);
f.setVisible(true);
}
/** Construct the REDemo object including its GUI */
public REDemo() {
super();
JPanel top = new JPanel();
top.add(new JLabel("Pattern:", JLabel.RIGHT));
patternTF = new JTextField(20);
patternTF.getDocument().addDocumentListener(new PatternListener());
top.add(patternTF);
top.add(new JLabel("Syntax OK?"));
compiledOK = new JCheckBox();
top.add(compiledOK);
ChangeListener cl = new ChangeListener() {
public void stateChanged(ChangeEvent ce) {
tryMatch();
}
};
JPanel switchPane = new JPanel();
ButtonGroup bg = new ButtonGroup();
match = new JRadioButton("Match");
match.setSelected(true);
match.addChangeListener(cl);
bg.add(match);
switchPane.add(match);
find = new JRadioButton("Find");
find.addChangeListener(cl);
bg.add(find);
switchPane.add(find);
findAll = new JRadioButton("Find All");
findAll.addChangeListener(cl);
bg.add(findAll);
switchPane.add(findAll);
JPanel strPane = new JPanel();
strPane.add(new JLabel("String:", JLabel.RIGHT));
stringTF = new JTextField(20);
stringTF.getDocument().addDocumentListener(new StringListener());
strPane.add(stringTF);
strPane.add(new JLabel("Matches:"));
matchesTF = new JTextField(3);
strPane.add(matchesTF);
setLayout(new GridLayout(0, 1, 5, 5));
add(top);
add(strPane);
add(switchPane);
}
protected void setMatches(boolean b) {
if (b)
matchesTF.setText("Yes");
else
matchesTF.setText("No");
}
protected void setMatches(int n) {
matchesTF.setText(Integer.toString(n));
}
protected void tryCompile() {
pattern = null;
try {
pattern = Pattern.rupile(patternTF.getText());
matcher = pattern.matcher("");
compiledOK.setSelected(true);
} catch (PatternSyntaxException ex) {
compiledOK.setSelected(false);
}
}
protected boolean tryMatch() {
if (pattern == null)
return false;
matcher.reset(stringTF.getText());
if (match.isSelected() && matcher.matches()) {
setMatches(true);
return true;
}
if (find.isSelected() && matcher.find()) {
setMatches(true);
return true;
}
if (findAll.isSelected()) {
int i = 0;
while (matcher.find()) {
++i;
}
if (i > 0) {
setMatches(i);
return true;
}
}
setMatches(false);
return false;
}
/** Any change to the pattern tries to compile the result. */
class PatternListener implements DocumentListener {
public void changedUpdate(DocumentEvent ev) {
tryCompile();
}
public void insertUpdate(DocumentEvent ev) {
tryCompile();
}
public void removeUpdate(DocumentEvent ev) {
tryCompile();
}
}
/** Any change to the input string tries to match the result */
class StringListener implements DocumentListener {
public void changedUpdate(DocumentEvent ev) {
tryMatch();
}
public void insertUpdate(DocumentEvent ev) {
tryMatch();
}
public void removeUpdate(DocumentEvent ev) {
tryMatch();
}
}
}