Java/Regular Expressions/Grep

Материал из Java эксперт
Перейти к: навигация, поиск

A command-line grep-like program.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.util.regex.*; import java.io.*; /** A command-line grep-like program. No options, but takes a pattern

* and an arbitrary list of text files.
*/

public class Grep1 {

 /** The pattern we"re looking for */
 protected Pattern pattern;
 /** The matcher for this pattern */
 protected Matcher matcher;
 /** Main will make a Grep object for the pattern, and run it
  * on all input files listed in argv.
  */
 public static void main(String[] argv) throws Exception {
   if (argv.length < 1) {
       System.err.println("Usage: Grep1 pattern [filename]");
       System.exit(1);
   }
   Grep1 pg = new Grep1(argv[0]);
   if (argv.length == 1)
     pg.process(new BufferedReader(new InputStreamReader(System.in)),
       "(standard input)", false);
   else
     for (int i=1; i<argv.length; i++) {
       pg.process(new BufferedReader(new FileReader(argv[i])),
         argv[i], true);
     }
 }
 /** Construct a Grep1 program */
 public Grep1(String patt) {
   pattern = Pattern.rupile(patt);
   matcher = pattern.matcher("");
 }
 /** Do the work of scanning one file
  * @param ifile BufferedReader object already open
  * @param fileName String Name of the input file
  * @param printFileName Boolean - true to print filename
  * before lines that match.
  */
 public void process(
   BufferedReader inputFile, String fileName, boolean printFileName) {
   String inputLine;
   try {
     while ((inputLine = inputFile.readLine()) != null) {
       matcher.reset(inputLine);
       if (matcher.lookingAt()) {
         if (printFileName) {
           System.out.print(fileName + ": ");
         }
         System.out.println(inputLine);
       }
     }
     inputFile.close();
   } catch (IOException e) { System.err.println(e); }
 }

}


      </source>
   
  
 
  



A grep-like program using NIO but NOT LINE BASED.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.io.*; import java.nio.*; import java.nio.channels.*; import java.nio.charset.*; import java.util.regex.*; /** A grep-like program using NIO but NOT LINE BASED.

* Pattern and file name(s) must be on command line.
*/

public class GrepNIO {

 public static void main(String[] args) throws IOException {
   if (args.length < 2) {
     System.err.println("Usage: GrepNIO patt file [...]");
     System.exit(1);
   }
   Pattern p=Pattern.rupile(args[0]);
   for (int i=1; i<args.length; i++)
     process(p, args[i]);
 }
 static void process(Pattern pattern, String fileName) throws IOException {
   // Get a FileChannel from the given file.
   FileChannel fc = new FileInputStream(fileName).getChannel();
   // Map the file"s content
   ByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
   // Decode ByteBuffer into CharBuffer
   CharBuffer cbuf =
     Charset.forName("ISO-8859-1").newDecoder().decode(buf);
   Matcher m = pattern.matcher(cbuf);
   while (m.find()) {
     System.out.println(m.group(0));
   }
 }

}


      </source>
   
  
 
  



Another Grep

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

// Sun Microsystems Example Code @(#)Grep.java 1.1 01/05/10 //Search a list of files for lines that match a given regular-expression //pattern. Demonstrates NIO mapped byte buffers, charsets, and regular //expressions. import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.CharBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; public class GrepSun {

 // Charset and decoder for ISO-8859-15
 private static Charset charset = Charset.forName("ISO-8859-15");
 private static CharsetDecoder decoder = charset.newDecoder();
 // Pattern used to parse lines
 private static Pattern linePattern = Pattern.rupile(".*\r?\n");
 // The input pattern that we"re looking for
 private static Pattern pattern;
 // Compile the pattern from the command line
 //
 private static void compile(String pat) {
   try {
     pattern = Pattern.rupile(pat);
   } catch (PatternSyntaxException x) {
     System.err.println(x.getMessage());
     System.exit(1);
   }
 }
 // Use the linePattern to break the given CharBuffer into lines, applying
 // the input pattern to each line to see if we have a match
 //
 private static void grep(File f, CharBuffer cb) {
   Matcher lm = linePattern.matcher(cb); // Line matcher
   Matcher pm = null; // Pattern matcher
   int lines = 0;
   while (lm.find()) {
     lines++;
     CharSequence cs = lm.group(); // The current line
     if (pm == null)
       pm = pattern.matcher(cs);
     else
       pm.reset(cs);
     if (pm.find())
       System.out.print(f + ":" + lines + ":" + cs);
     if (lm.end() == cb.limit())
       break;
   }
 }
 // Search for occurrences of the input pattern in the given file
 //
 private static void grep(File f) throws IOException {
   // Open the file and then get a channel from the stream
   FileInputStream fis = new FileInputStream(f);
   FileChannel fc = fis.getChannel();
   // Get the file"s size and then map it into memory
   int sz = (int) fc.size();
   MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
   // Decode the file into a char buffer
   CharBuffer cb = decoder.decode(bb);
   // Perform the search
   grep(f, cb);
   // Close the channel and the stream
   fc.close();
 }
 public static void main(String[] args) {
   if (args.length < 2) {
     System.err.println("Usage: java Grep pattern file...");
     return;
   }
   compile(args[0]);
   for (int i = 1; i < args.length; i++) {
     File f = new File(args[i]);
     try {
       grep(f);
     } catch (IOException x) {
       System.err.println(f + ": " + x);
     }
   }
 }

}

      </source>
   
  
 
  



Grep0 - Match lines from stdin against the pattern on the command line.

   <source lang="java">

/*

* Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
*    notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
*    notice, this list of conditions and the following disclaimer in the
*    documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* 
* Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
* pioneering role in inventing and promulgating (and standardizing) the Java 
* language and environment is gratefully acknowledged.
* 
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/

import java.io.*; import java.util.regex.*; /** Grep0 - Match lines from stdin against the pattern on the command line.

*/

public class Grep0 {

 public static void main(String[] args) throws IOException {
   BufferedReader is =
     new BufferedReader(new InputStreamReader(System.in));
   if (args.length != 1) {
     System.err.println("Usage: MatchLines pattern");
     System.exit(1);
   }
   Pattern patt = Pattern.rupile(args[0]);
   Matcher matcher = patt.matcher("");
   String line = null;
   while ((line = is.readLine()) != null) {
     matcher.reset(line);
     if (matcher.find()) {
       System.out.println("MATCH: " + line);
     }
   }
 }

}

      </source>