Java/Regular Expressions/Grep
Версия от 18:01, 31 мая 2010; (обсуждение)
Содержание
A command-line grep-like program.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.util.regex.*;
import java.io.*;
/** A command-line grep-like program. No options, but takes a pattern
* and an arbitrary list of text files.
*/
public class Grep1 {
/** The pattern we"re looking for */
protected Pattern pattern;
/** The matcher for this pattern */
protected Matcher matcher;
/** Main will make a Grep object for the pattern, and run it
* on all input files listed in argv.
*/
public static void main(String[] argv) throws Exception {
if (argv.length < 1) {
System.err.println("Usage: Grep1 pattern [filename]");
System.exit(1);
}
Grep1 pg = new Grep1(argv[0]);
if (argv.length == 1)
pg.process(new BufferedReader(new InputStreamReader(System.in)),
"(standard input)", false);
else
for (int i=1; i<argv.length; i++) {
pg.process(new BufferedReader(new FileReader(argv[i])),
argv[i], true);
}
}
/** Construct a Grep1 program */
public Grep1(String patt) {
pattern = Pattern.rupile(patt);
matcher = pattern.matcher("");
}
/** Do the work of scanning one file
* @param ifile BufferedReader object already open
* @param fileName String Name of the input file
* @param printFileName Boolean - true to print filename
* before lines that match.
*/
public void process(
BufferedReader inputFile, String fileName, boolean printFileName) {
String inputLine;
try {
while ((inputLine = inputFile.readLine()) != null) {
matcher.reset(inputLine);
if (matcher.lookingAt()) {
if (printFileName) {
System.out.print(fileName + ": ");
}
System.out.println(inputLine);
}
}
inputFile.close();
} catch (IOException e) { System.err.println(e); }
}
}
A grep-like program using NIO but NOT LINE BASED.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.io.*;
import java.nio.*;
import java.nio.channels.*;
import java.nio.charset.*;
import java.util.regex.*;
/** A grep-like program using NIO but NOT LINE BASED.
* Pattern and file name(s) must be on command line.
*/
public class GrepNIO {
public static void main(String[] args) throws IOException {
if (args.length < 2) {
System.err.println("Usage: GrepNIO patt file [...]");
System.exit(1);
}
Pattern p=Pattern.rupile(args[0]);
for (int i=1; i<args.length; i++)
process(p, args[i]);
}
static void process(Pattern pattern, String fileName) throws IOException {
// Get a FileChannel from the given file.
FileChannel fc = new FileInputStream(fileName).getChannel();
// Map the file"s content
ByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
// Decode ByteBuffer into CharBuffer
CharBuffer cbuf =
Charset.forName("ISO-8859-1").newDecoder().decode(buf);
Matcher m = pattern.matcher(cbuf);
while (m.find()) {
System.out.println(m.group(0));
}
}
}
Another Grep
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
// Sun Microsystems Example Code @(#)Grep.java 1.1 01/05/10
//Search a list of files for lines that match a given regular-expression
//pattern. Demonstrates NIO mapped byte buffers, charsets, and regular
//expressions.
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class GrepSun {
// Charset and decoder for ISO-8859-15
private static Charset charset = Charset.forName("ISO-8859-15");
private static CharsetDecoder decoder = charset.newDecoder();
// Pattern used to parse lines
private static Pattern linePattern = Pattern.rupile(".*\r?\n");
// The input pattern that we"re looking for
private static Pattern pattern;
// Compile the pattern from the command line
//
private static void compile(String pat) {
try {
pattern = Pattern.rupile(pat);
} catch (PatternSyntaxException x) {
System.err.println(x.getMessage());
System.exit(1);
}
}
// Use the linePattern to break the given CharBuffer into lines, applying
// the input pattern to each line to see if we have a match
//
private static void grep(File f, CharBuffer cb) {
Matcher lm = linePattern.matcher(cb); // Line matcher
Matcher pm = null; // Pattern matcher
int lines = 0;
while (lm.find()) {
lines++;
CharSequence cs = lm.group(); // The current line
if (pm == null)
pm = pattern.matcher(cs);
else
pm.reset(cs);
if (pm.find())
System.out.print(f + ":" + lines + ":" + cs);
if (lm.end() == cb.limit())
break;
}
}
// Search for occurrences of the input pattern in the given file
//
private static void grep(File f) throws IOException {
// Open the file and then get a channel from the stream
FileInputStream fis = new FileInputStream(f);
FileChannel fc = fis.getChannel();
// Get the file"s size and then map it into memory
int sz = (int) fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
// Decode the file into a char buffer
CharBuffer cb = decoder.decode(bb);
// Perform the search
grep(f, cb);
// Close the channel and the stream
fc.close();
}
public static void main(String[] args) {
if (args.length < 2) {
System.err.println("Usage: java Grep pattern file...");
return;
}
compile(args[0]);
for (int i = 1; i < args.length; i++) {
File f = new File(args[i]);
try {
grep(f);
} catch (IOException x) {
System.err.println(f + ": " + x);
}
}
}
}
Grep0 - Match lines from stdin against the pattern on the command line.
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.io.*;
import java.util.regex.*;
/** Grep0 - Match lines from stdin against the pattern on the command line.
*/
public class Grep0 {
public static void main(String[] args) throws IOException {
BufferedReader is =
new BufferedReader(new InputStreamReader(System.in));
if (args.length != 1) {
System.err.println("Usage: MatchLines pattern");
System.exit(1);
}
Pattern patt = Pattern.rupile(args[0]);
Matcher matcher = patt.matcher("");
String line = null;
while ((line = is.readLine()) != null) {
matcher.reset(line);
if (matcher.find()) {
System.out.println("MATCH: " + line);
}
}
}
}