Java/Regular Expressions/Grep
Содержание
A command-line grep-like program.
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.util.regex.*; import java.io.*; /** A command-line grep-like program. No options, but takes a pattern
* and an arbitrary list of text files. */
public class Grep1 {
/** The pattern we"re looking for */ protected Pattern pattern; /** The matcher for this pattern */ protected Matcher matcher; /** Main will make a Grep object for the pattern, and run it * on all input files listed in argv. */ public static void main(String[] argv) throws Exception { if (argv.length < 1) { System.err.println("Usage: Grep1 pattern [filename]"); System.exit(1); } Grep1 pg = new Grep1(argv[0]); if (argv.length == 1) pg.process(new BufferedReader(new InputStreamReader(System.in)), "(standard input)", false); else for (int i=1; i<argv.length; i++) { pg.process(new BufferedReader(new FileReader(argv[i])), argv[i], true); } } /** Construct a Grep1 program */ public Grep1(String patt) { pattern = Pattern.rupile(patt); matcher = pattern.matcher(""); } /** Do the work of scanning one file * @param ifile BufferedReader object already open * @param fileName String Name of the input file * @param printFileName Boolean - true to print filename * before lines that match. */ public void process( BufferedReader inputFile, String fileName, boolean printFileName) { String inputLine; try { while ((inputLine = inputFile.readLine()) != null) { matcher.reset(inputLine); if (matcher.lookingAt()) { if (printFileName) { System.out.print(fileName + ": "); } System.out.println(inputLine); } } inputFile.close(); } catch (IOException e) { System.err.println(e); } }
}
</source>
A grep-like program using NIO but NOT LINE BASED.
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.io.*; import java.nio.*; import java.nio.channels.*; import java.nio.charset.*; import java.util.regex.*; /** A grep-like program using NIO but NOT LINE BASED.
* Pattern and file name(s) must be on command line. */
public class GrepNIO {
public static void main(String[] args) throws IOException { if (args.length < 2) { System.err.println("Usage: GrepNIO patt file [...]"); System.exit(1); } Pattern p=Pattern.rupile(args[0]); for (int i=1; i<args.length; i++) process(p, args[i]); } static void process(Pattern pattern, String fileName) throws IOException { // Get a FileChannel from the given file. FileChannel fc = new FileInputStream(fileName).getChannel(); // Map the file"s content ByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); // Decode ByteBuffer into CharBuffer CharBuffer cbuf = Charset.forName("ISO-8859-1").newDecoder().decode(buf); Matcher m = pattern.matcher(cbuf); while (m.find()) { System.out.println(m.group(0)); } }
}
</source>
Another Grep
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
// Sun Microsystems Example Code @(#)Grep.java 1.1 01/05/10 //Search a list of files for lines that match a given regular-expression //pattern. Demonstrates NIO mapped byte buffers, charsets, and regular //expressions. import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; public class GrepSun {
// Charset and decoder for ISO-8859-15 private static Charset charset = Charset.forName("ISO-8859-15"); private static CharsetDecoder decoder = charset.newDecoder(); // Pattern used to parse lines private static Pattern linePattern = Pattern.rupile(".*\r?\n"); // The input pattern that we"re looking for private static Pattern pattern; // Compile the pattern from the command line // private static void compile(String pat) { try { pattern = Pattern.rupile(pat); } catch (PatternSyntaxException x) { System.err.println(x.getMessage()); System.exit(1); } } // Use the linePattern to break the given CharBuffer into lines, applying // the input pattern to each line to see if we have a match // private static void grep(File f, CharBuffer cb) { Matcher lm = linePattern.matcher(cb); // Line matcher Matcher pm = null; // Pattern matcher int lines = 0; while (lm.find()) { lines++; CharSequence cs = lm.group(); // The current line if (pm == null) pm = pattern.matcher(cs); else pm.reset(cs); if (pm.find()) System.out.print(f + ":" + lines + ":" + cs); if (lm.end() == cb.limit()) break; } } // Search for occurrences of the input pattern in the given file // private static void grep(File f) throws IOException { // Open the file and then get a channel from the stream FileInputStream fis = new FileInputStream(f); FileChannel fc = fis.getChannel(); // Get the file"s size and then map it into memory int sz = (int) fc.size(); MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz); // Decode the file into a char buffer CharBuffer cb = decoder.decode(bb); // Perform the search grep(f, cb); // Close the channel and the stream fc.close(); } public static void main(String[] args) { if (args.length < 2) { System.err.println("Usage: java Grep pattern file..."); return; } compile(args[0]); for (int i = 1; i < args.length; i++) { File f = new File(args[i]); try { grep(f); } catch (IOException x) { System.err.println(f + ": " + x); } } }
}
</source>
Grep0 - Match lines from stdin against the pattern on the command line.
<source lang="java">
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002. * All rights reserved. Software written by Ian F. Darwin and others. * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS"" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s, * pioneering role in inventing and promulgating (and standardizing) the Java * language and environment is gratefully acknowledged. * * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for * inventing predecessor languages C and C++ is also gratefully acknowledged. */
import java.io.*; import java.util.regex.*; /** Grep0 - Match lines from stdin against the pattern on the command line.
*/
public class Grep0 {
public static void main(String[] args) throws IOException { BufferedReader is = new BufferedReader(new InputStreamReader(System.in)); if (args.length != 1) { System.err.println("Usage: MatchLines pattern"); System.exit(1); } Pattern patt = Pattern.rupile(args[0]); Matcher matcher = patt.matcher(""); String line = null; while ((line = is.readLine()) != null) { matcher.reset(line); if (matcher.find()) { System.out.println("MATCH: " + line); } } }
}
</source>