Java/Regular Expressions/Grep

Материал из Java эксперт
Перейти к: навигация, поиск

A command-line grep-like program.

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.util.regex.*;
import java.io.*;
/** A command-line grep-like program. No options, but takes a pattern
 * and an arbitrary list of text files.
 */
public class Grep1 {
  /** The pattern we"re looking for */
  protected Pattern pattern;
  /** The matcher for this pattern */
  protected Matcher matcher;
  /** Main will make a Grep object for the pattern, and run it
   * on all input files listed in argv.
   */
  public static void main(String[] argv) throws Exception {
    if (argv.length < 1) {
        System.err.println("Usage: Grep1 pattern [filename]");
        System.exit(1);
    }
    Grep1 pg = new Grep1(argv[0]);
    if (argv.length == 1)
      pg.process(new BufferedReader(new InputStreamReader(System.in)),
        "(standard input)", false);
    else
      for (int i=1; i<argv.length; i++) {
        pg.process(new BufferedReader(new FileReader(argv[i])),
          argv[i], true);
      }
  }
  /** Construct a Grep1 program */
  public Grep1(String patt) {
    pattern = Pattern.rupile(patt);
    matcher = pattern.matcher("");
  }
  /** Do the work of scanning one file
   * @param ifile BufferedReader object already open
   * @param fileName String Name of the input file
   * @param printFileName Boolean - true to print filename
   * before lines that match.
   */
  public void process(
    BufferedReader inputFile, String fileName, boolean printFileName) {
    String inputLine;
    try {
      while ((inputLine = inputFile.readLine()) != null) {
        matcher.reset(inputLine);
        if (matcher.lookingAt()) {
          if (printFileName) {
            System.out.print(fileName + ": ");
          }
          System.out.println(inputLine);
        }
      }
      inputFile.close();
    } catch (IOException e) { System.err.println(e); }
  }
}





A grep-like program using NIO but NOT LINE BASED.

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.io.*;
import java.nio.*;
import java.nio.channels.*;
import java.nio.charset.*;
import java.util.regex.*;
/** A grep-like program using NIO but NOT LINE BASED.
 * Pattern and file name(s) must be on command line.
 */
public class GrepNIO {
  public static void main(String[] args) throws IOException {
    if (args.length < 2) {
      System.err.println("Usage: GrepNIO patt file [...]");
      System.exit(1);
    }
    Pattern p=Pattern.rupile(args[0]);
    for (int i=1; i<args.length; i++)
      process(p, args[i]);
  }
  static void process(Pattern pattern, String fileName) throws IOException {
    // Get a FileChannel from the given file.
    FileChannel fc = new FileInputStream(fileName).getChannel();
    // Map the file"s content
    ByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
    // Decode ByteBuffer into CharBuffer
    CharBuffer cbuf =
      Charset.forName("ISO-8859-1").newDecoder().decode(buf);
    Matcher m = pattern.matcher(cbuf);
    while (m.find()) {
      System.out.println(m.group(0));
    }
  }
}





Another Grep

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
// Sun Microsystems Example Code @(#)Grep.java 1.1 01/05/10
//Search a list of files for lines that match a given regular-expression
//pattern. Demonstrates NIO mapped byte buffers, charsets, and regular
//expressions.
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class GrepSun {
  // Charset and decoder for ISO-8859-15
  private static Charset charset = Charset.forName("ISO-8859-15");
  private static CharsetDecoder decoder = charset.newDecoder();
  // Pattern used to parse lines
  private static Pattern linePattern = Pattern.rupile(".*\r?\n");
  // The input pattern that we"re looking for
  private static Pattern pattern;
  // Compile the pattern from the command line
  //
  private static void compile(String pat) {
    try {
      pattern = Pattern.rupile(pat);
    } catch (PatternSyntaxException x) {
      System.err.println(x.getMessage());
      System.exit(1);
    }
  }
  // Use the linePattern to break the given CharBuffer into lines, applying
  // the input pattern to each line to see if we have a match
  //
  private static void grep(File f, CharBuffer cb) {
    Matcher lm = linePattern.matcher(cb); // Line matcher
    Matcher pm = null; // Pattern matcher
    int lines = 0;
    while (lm.find()) {
      lines++;
      CharSequence cs = lm.group(); // The current line
      if (pm == null)
        pm = pattern.matcher(cs);
      else
        pm.reset(cs);
      if (pm.find())
        System.out.print(f + ":" + lines + ":" + cs);
      if (lm.end() == cb.limit())
        break;
    }
  }
  // Search for occurrences of the input pattern in the given file
  //
  private static void grep(File f) throws IOException {
    // Open the file and then get a channel from the stream
    FileInputStream fis = new FileInputStream(f);
    FileChannel fc = fis.getChannel();
    // Get the file"s size and then map it into memory
    int sz = (int) fc.size();
    MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
    // Decode the file into a char buffer
    CharBuffer cb = decoder.decode(bb);
    // Perform the search
    grep(f, cb);
    // Close the channel and the stream
    fc.close();
  }
  public static void main(String[] args) {
    if (args.length < 2) {
      System.err.println("Usage: java Grep pattern file...");
      return;
    }
    compile(args[0]);
    for (int i = 1; i < args.length; i++) {
      File f = new File(args[i]);
      try {
        grep(f);
      } catch (IOException x) {
        System.err.println(f + ": " + x);
      }
    }
  }
}





Grep0 - Match lines from stdin against the pattern on the command line.

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.ru/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS""
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun"s Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun"s, and James Gosling"s,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.io.*;
import java.util.regex.*;
/** Grep0 - Match lines from stdin against the pattern on the command line.
 */
public class Grep0 {
  public static void main(String[] args) throws IOException {
    BufferedReader is =
      new BufferedReader(new InputStreamReader(System.in));
    if (args.length != 1) {
      System.err.println("Usage: MatchLines pattern");
      System.exit(1);
    }
    Pattern patt = Pattern.rupile(args[0]);
    Matcher matcher = patt.matcher("");
    String line = null;
    while ((line = is.readLine()) != null) {
      matcher.reset(line);
      if (matcher.find()) {
        System.out.println("MATCH: " + line);
      }
    }
  }
}