Java Tutorial/File/Reader

Материал из Java эксперт
Перейти к: навигация, поиск

An InputStream backed by a Reader

/*
 * Java Network Programming, Second Edition
 * Merlin Hughes, Michael Shoffner, Derek Hamner
 * Manning Publications Company; ISBN 188477749X
 *
 * http://nitric.ru/jnp/
 *
 * Copyright (c) 1997-1999 Merlin Hughes, Michael Shoffner, Derek Hamner;
 * all rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE ABOVE NAMED AUTHORS "AS IS" AND ANY
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS, THEIR
 * PUBLISHER OR THEIR EMPLOYERS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
/**
 * An InputStream backed by a Reader
 */
public class ReaderInputStream extends InputStream {
  protected Reader reader;
  protected ByteArrayOutputStream byteArrayOut;
  protected Writer writer;
  protected char[] chars;
  protected byte[] buffer;
  protected int index, length;
  /**
   * Constructor to supply a Reader
   *
   * @param reader - the Reader used by the InputStream
   */
  public ReaderInputStream(Reader reader) {
    this.reader = reader;
    byteArrayOut = new ByteArrayOutputStream();
    writer = new OutputStreamWriter(byteArrayOut);
    chars = new char[1024];
  }
  /**
   * Constructor to supply a Reader and an encoding
   *
   * @param reader   - the Reader used by the InputStream
   * @param encoding - the encoding to use for the InputStream
   * @throws UnsupportedEncodingException if the encoding is not supported
   */
  public ReaderInputStream(Reader reader, String encoding) throws UnsupportedEncodingException {
    this.reader = reader;
    byteArrayOut = new ByteArrayOutputStream();
    writer = new OutputStreamWriter(byteArrayOut, encoding);
    chars = new char[1024];
  }
  /**
   * @see java.io.InputStream#read()
   */
  public int read() throws IOException {
    if (index >= length)
      fillBuffer();
    if (index >= length)
      return -1;
    return 0xff & buffer[index++];
  }
  protected void fillBuffer() throws IOException {
    if (length < 0)
      return;
    int numChars = reader.read(chars);
    if (numChars < 0) {
      length = -1;
    } else {
      byteArrayOut.reset();
      writer.write(chars, 0, numChars);
      writer.flush();
      buffer = byteArrayOut.toByteArray();
      length = buffer.length;
      index = 0;
    }
  }
  /**
   * @see java.io.InputStream#read(byte[], int, int)
   */
  public int read(byte[] data, int off, int len) throws IOException {
    if (index >= length)
      fillBuffer();
    if (index >= length)
      return -1;
    int amount = Math.min(len, length - index);
    System.arraycopy(buffer, index, data, off, amount);
    index += amount;
    return amount;
  }
  /**
   * @see java.io.InputStream#available()
   */
  public int available() throws IOException {
    return (index < length) ? length - index :
        ((length >= 0) && reader.ready()) ? 1 : 0;
  }
  /**
   * @see java.io.InputStream#close()
   */
  public void close() throws IOException {
    reader.close();
  }
}





Compare the contents of two Readers to determine if they are equal or not.

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
/** * Origin of code: Apache Avalon (Excalibur)
 * 
 * @author Peter Donald
 * @author Jeff Turner
 * @author Matthew Hawthorne
 * @author Stephen Colebourne
 * @author Gareth Davis
 * @version CVS $Revision$ $Date$*/
public class Main {
  /**
   * Compare the contents of two Readers to determine if they are equal or not.
   * 
   * This method buffers the input internally using <code>BufferedReader</code> if they are not
   * already buffered.
   * 
   * @param input1
   *            the first reader
   * @param input2
   *            the second reader
   * @return true if the content of the readers are equal or they both don"t exist, false
   *         otherwise
   * @throws NullPointerException
   *             if either input is null
   * @throws IOException
   *             if an I/O error occurs
   * @since 1.1
   */
  public static boolean contentEquals(Reader input1, Reader input2) throws IOException
  {
    if (!(input1 instanceof BufferedReader))
    {
      input1 = new BufferedReader(input1);
    }
    if (!(input2 instanceof BufferedReader))
    {
      input2 = new BufferedReader(input2);
    }
    int ch = input1.read();
    while (-1 != ch)
    {
      int ch2 = input2.read();
      if (ch != ch2)
      {
        return false;
      }
      ch = input1.read();
    }
    int ch2 = input2.read();
    return (ch2 == -1);
  }
}





convert Reader to InputStream

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common
 * Development and Distribution License("CDDL") (collectively, the
 * "License"). You may not use this file except in compliance with the
 * License. You can obtain a copy of the License at
 * http://www.netbeans.org/cddl-gplv2.html
 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
 * specific language governing permissions and limitations under the
 * License.  When distributing the software, include this License Header
 * Notice in each file and include the License file at
 * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the GPL Version 2 section of the License file that
 * accompanied this code. If applicable, add the following below the
 * License Header, with the fields enclosed by brackets [] replaced by
 * your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 *
 * Contributor(s):
 *
 * The Original Software is NetBeans. The Initial Developer of the Original
 * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
 * Microsystems, Inc. All Rights Reserved.
 *
 * If you wish your version of this file to be governed by only the CDDL
 * or only the GPL Version 2, indicate your decision by adding
 * "[Contributor] elects to include this software in this distribution
 * under the [CDDL or GPL Version 2] license." If you do not indicate a
 * single choice of license, a recipient has the option to distribute
 * your version of this file under either the CDDL, the GPL Version 2 or
 * to extend the choice of license to its licensees as provided above.
 * However, if you add GPL Version 2 code and therefore, elected the GPL
 * Version 2 license, then the option applies only if the new code is
 * made subject to such option by the copyright holder.
 */
import java.io.*;

/**
* This class convert Reader to InputStream. It works by converting
* the characters to the encoding specified in constructor parameter.
*
* @author   Petr Hamernik, David Strupl
*/
public class ReaderInputStream extends InputStream {
    /** Input Reader class. */
    private Reader reader;
    private PipedOutputStream pos;
    private PipedInputStream pis;
    private OutputStreamWriter osw;
    /** Creates new input stream from the given reader.
     * Uses the platform default encoding.
    * @param reader Input reader
    */
    public ReaderInputStream(Reader reader) throws IOException {
        this.reader = reader;
        pos = new PipedOutputStream();
        pis = new PipedInputStream(pos);
        osw = new OutputStreamWriter(pos);
    }
    /** Creates new input stream from the given reader and encoding.
     * @param reader Input reader
     * @param encoding
     */
    public ReaderInputStream(Reader reader, String encoding)
    throws IOException {
        this.reader = reader;
        pos = new PipedOutputStream();
        pis = new PipedInputStream(pos);
        osw = new OutputStreamWriter(pos, encoding);
    }
    public int read() throws IOException {
        if (pis.available() > 0) {
            return pis.read();
        }
        int c = reader.read();
        if (c == -1) {
            return c;
        }
        osw.write(c);
        osw.flush();
        pos.flush();
        return pis.read();
    }
    public int read(byte[] b, int off, int len) throws IOException {
        if (len == 0) {
            return 0;
        }
        int c = read();
        if (c == -1) {
            return -1;
        }
        b[off] = (byte) c;
        int i = 1;
        // Don"t try to fill up the buffer if the reader is waiting.
        for (; (i < len) && reader.ready(); i++) {
            c = read();
            if (c == -1) {
                return i;
            }
            b[off + i] = (byte) c;
        }
        return i;
    }
    public int available() throws IOException {
        int i = pis.available();
        if (i > 0) {
            return i;
        }
        if (reader.ready()) {
            // Char must produce at least one byte.
            return 1;
        } else {
            return 0;
        }
    }
    public void close() throws IOException {
        reader.close();
        osw.close();
        pis.close();
    }
}





CRLF Terminated Reader

/****************************************************************
 * Licensed to the Apache Software Foundation (ASF) under one   *
 * or more contributor license agreements.  See the NOTICE file *
 * distributed with this work for additional information        *
 * regarding copyright ownership.  The ASF licenses this file   *
 * to you under the Apache License, Version 2.0 (the            *
 * "License"); you may not use this file except in compliance   *
 * with the License.  You may obtain a copy of the License at   *
 *                                                              *
 *   http://www.apache.org/licenses/LICENSE-2.0                 *
 *                                                              *
 * Unless required by applicable law or agreed to in writing,   *
 * software distributed under the License is distributed on an  *
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
 * KIND, either express or implied.  See the License for the    *
 * specific language governing permissions and limitations      *
 * under the License.                                           *
 ****************************************************************/

import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
/**
 * A Reader for use with SMTP or other protocols in which lines
 * must end with CRLF.  Extends Reader and overrides its 
 * readLine() method.  The Reader readLine() method cannot
 * serve for SMTP because it ends lines with either CR or LF alone. 
 */
public class CRLFTerminatedReader extends Reader {
    public class TerminationException extends IOException {
        private int where;
        public TerminationException(int where) {
            super();
            this.where = where;
        }
        public TerminationException(String s, int where) {
            super(s);
            this.where = where;
        }
        public int position() {
            return where;
        }
    }
    public class LineLengthExceededException extends IOException {
        public LineLengthExceededException(String s) {
            super(s);
        }
    }
    /**
     * Constructs this CRLFTerminatedReader.
     * @param in an InputStream
     * @param charsetName the String name of a supported charset.  
     * "ASCII" is common here.
     * @throws UnsupportedEncodingException if the named charset
     * is not supported
     */
    InputStream in;
    public CRLFTerminatedReader(InputStream in) {
    this.in = in;
    }
    public CRLFTerminatedReader(InputStream in, String enc) throws UnsupportedEncodingException {
        this(in);
    }
    private StringBuffer lineBuffer = new StringBuffer();
    private final int
            EOF = -1,
            CR  = 13,
            LF  = 10;
    private int tainted = -1;
    /**
     * Read a line of text which is terminated by CRLF.  The concluding
     * CRLF characters are not returned with the String, but if either CR
     * or LF appears in the text in any other sequence it is returned
     * in the String like any other character.  Some characters at the 
     * end of the stream may be lost if they are in a "line" not
     * terminated by CRLF.
     * 
     * @return either a String containing the contents of a 
     * line which must end with CRLF, or null if the end of the 
     * stream has been reached, possibly discarding some characters 
     * in a line not terminated with CRLF. 
     * @throws IOException if an I/O error occurs.
     */
    public String readLine() throws IOException{
        //start with the StringBuffer empty
        lineBuffer.delete(0, lineBuffer.length());
        /* This boolean tells which state we are in,
         * depending upon whether or not we got a CR
         * in the preceding read().
         */ 
        boolean cr_just_received = false;
        // Until we add support for specifying a maximum line lenth as
        // a Service Extension, limit lines to 2K, which is twice what
        // RFC 2821 4.5.3.1 requires.
        while (lineBuffer.length() <= 2048) {
            int inChar = read();
            if (!cr_just_received){
                //the most common case, somewhere before the end of a line
                switch (inChar){
                    case CR  :  cr_just_received = true;
                                break;
                    case EOF :  return null;   // premature EOF -- discards data(?)
                    case LF  :  //the normal ending of a line
                        if (tainted == -1) tainted = lineBuffer.length();
                        // intentional fall-through
                    default  :  lineBuffer.append((char)inChar);
                }
            }else{
                // CR has been received, we may be at end of line
                switch (inChar){
                    case LF  :  // LF without a preceding CR
                        if (tainted != -1) {
                            int pos = tainted;
                            tainted = -1;
                            throw new TerminationException("\"bare\" CR or LF in data stream", pos);
                        }
                        return lineBuffer.toString();
                    case EOF :  return null;   // premature EOF -- discards data(?)
                    case CR  :  //we got two (or more) CRs in a row
                        if (tainted == -1) tainted = lineBuffer.length();
                        lineBuffer.append((char)CR);
                        break;
                    default  :  //we got some other character following a CR
                        if (tainted == -1) tainted = lineBuffer.length();
                        lineBuffer.append((char)CR);
                        lineBuffer.append((char)inChar);
                        cr_just_received = false;
                }
            }
        }//while
        throw new LineLengthExceededException("Exceeded maximum line length");
    }//method readLine()
    public int read() throws IOException {
    return in.read();
    }
    public boolean ready() throws IOException {
    return in.available() > 0;
    }
    public int read(char cbuf[], int  off, int  len) throws IOException {
    byte [] temp = new byte[len];
    int result = in.read(temp, 0, len);
    for (int i=0;i<result;i++) cbuf[i] = (char) temp[i];
    return result;
    }
    public void close() throws IOException {
    in.close();
    }
}





Read and return the entire contents of the supplied Reader. This method always closes the reader when finished reading.

/*
 * JBoss DNA (http://www.jboss.org/dna)
 * See the COPYRIGHT.txt file distributed with this work for information
 * regarding copyright ownership.  Some portions may be licensed
 * to Red Hat, Inc. under one or more contributor license agreements.
 * See the AUTHORS.txt file in the distribution for a full listing of 
 * individual contributors. 
 *
 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
 * is licensed to you under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * JBoss DNA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
import java.io.IOException;
import java.io.Reader;
/**
 * @author Randall Hauch
 */
public class Main {
  /**
   * Read and return the entire contents of the supplied {@link Reader}. This method always closes the reader when finished
   * reading.
   * 
   * @param reader the reader of the contents; may be null
   * @return the contents, or an empty string if the supplied reader is null
   * @throws IOException if there is an error reading the content
   */
  public static String read( Reader reader ) throws IOException {
      if (reader == null) return "";
      StringBuilder sb = new StringBuilder();
      boolean error = false;
      try {
          int numRead = 0;
          char[] buffer = new char[1024];
          while ((numRead = reader.read(buffer)) > -1) {
              sb.append(buffer, 0, numRead);
          }
      } catch (IOException e) {
          error = true; // this error should be thrown, even if there is an error closing reader
          throw e;
      } catch (RuntimeException e) {
          error = true; // this error should be thrown, even if there is an error closing reader
          throw e;
      } finally {
          try {
              reader.close();
          } catch (IOException e) {
              if (!error) throw e;
          }
      }
      return sb.toString();
  }
}





Reader: Reading Text (Characters)

  1. Reader is an abstract class that represents an input stream for reading characters.
  2. You use the Reader class to read text (characters, i.e. human readable data).
  3. The two implementation classes of Reader are InputStreamReader and BufferedReader.

The Reader class has three read method overloads that are similar to the read methods in InputStream:



public int read()
public int read (char[] data)
public int read (char[] data, int offset, int length)
public int read(java.nio.CharBuffer target)





Read from Reader and write to Writer until there is no more input from reader.

import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
  /**
   * Read input from reader and write it to writer until there is no more
   * input from reader.
   *
   * @param reader the reader to read from.
   * @param writer the writer to write to.
   * @param buf the char array to use as a bufferx
   */
  public static void flow( Reader reader, Writer writer, char[] buf ) 
      throws IOException {
      int numRead;
      while ( (numRead = reader.read(buf) ) >= 0) {
          writer.write(buf, 0, numRead);
      }
  }
}





Reads characters available from the Reader and returns these characters as a String object.

/*
 * Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
 *
 * Licensed under the Aduna BSD-style license.
 */
import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
  /**
   * Fully reads the characters available from the supplied Reader
   * and returns these characters as a String object.
   *
   * @param reader The Reader to read the characters from.
   * @return A String existing of the characters that were read.
   * @throws IOException If I/O error occurred.
   */
  public static final String readFully(Reader reader)
    throws IOException
  {
    CharArrayWriter out = new CharArrayWriter(4096);
    transfer(reader, out);
    out.close();
    return out.toString();
  }
  /**
   * Transfers all characters that can be read from <tt>in</tt> to
   * <tt>out</tt>.
   *
   * @param in The Reader to read characters from.
   * @param out The Writer to write characters to.
   * @return The total number of characters transfered.
   */
  public static final long transfer(Reader in, Writer out)
    throws IOException
  {
    long totalChars = 0;
    int charsInBuf = 0;
    char[] buf = new char[4096];
    while ((charsInBuf = in.read(buf)) != -1) {
      out.write(buf, 0, charsInBuf);
      totalChars += charsInBuf;
    }
    return totalChars;
  }
}





Transfers all characters that can be read from one Reader to another Reader

/*
 * Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
 *
 * Licensed under the Aduna BSD-style license.
 */
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
  /**
   * Transfers all characters that can be read from <tt>in</tt> to
   * <tt>out</tt>.
   *
   * @param in The Reader to read characters from.
   * @param out The Writer to write characters to.
   * @return The total number of characters transfered.
   */
  public static final long transfer(Reader in, Writer out)
    throws IOException
  {
    long totalChars = 0;
    int charsInBuf = 0;
    char[] buf = new char[4096];
    while ((charsInBuf = in.read(buf)) != -1) {
      out.write(buf, 0, charsInBuf);
      totalChars += charsInBuf;
    }
    return totalChars;
  }
}





UTF8 Reader

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UTFDataFormatException;

/**
 * @author Andy Clark, IBM
 *
 * @version $Id: UTF8Reader.java 515 2008-03-17 21:02:23Z jfrederic.clere@jboss.ru $
 */
public class UTF8Reader
    extends Reader {
    
    //
    // Constants
    //
    /** Default byte buffer size (2048). */
    public static final int DEFAULT_BUFFER_SIZE = 2048;
    // debugging
    /** Debug read. */
    private static final boolean DEBUG_READ = false;
    //
    // Data
    //
    /** Input stream. */
    protected InputStream fInputStream;
    /** Byte buffer. */
    protected byte[] fBuffer;
    /** Offset into buffer. */
    protected int fOffset;
    /** Surrogate character. */
    private int fSurrogate = -1;
    //
    // Constructors
    //
    /** 
     * Constructs a UTF-8 reader from the specified input stream, 
     * buffer size and MessageFormatter.
     *
     * @param inputStream The input stream.
     * @param size        The initial buffer size.
     */
    public UTF8Reader(InputStream inputStream, int size) {
        fInputStream = inputStream;
        fBuffer = new byte[size];
    }
    //
    // Reader methods
    //
    /**
     * Read a single character.  This method will block until a character is
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * Subclasses that intend to support efficient single-character input
     * should override this method.
     *
     * @return     The character read, as an integer in the range 0 to 16383
     *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
     *             been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    public int read() throws IOException {
        // decode character
        int c = fSurrogate;
        if (fSurrogate == -1) {
            // NOTE: We use the index into the buffer if there are remaining
            //       bytes from the last block read. -Ac
            int index = 0;
            // get first byte
            int b0 = index == fOffset 
                   ? fInputStream.read() : fBuffer[index++] & 0x00FF;
            if (b0 == -1) {
                return -1;
            }
            // UTF-8:   [0xxx xxxx]
            // Unicode: [0000 0000] [0xxx xxxx]
            if (b0 < 0x80) {
                c = (char)b0;
            }
            // UTF-8:   [110y yyyy] [10xx xxxx]
            // Unicode: [0000 0yyy] [yyxx xxxx]
            else if ((b0 & 0xE0) == 0xC0) {
                int b1 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 2);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 2, b1);
                }
                c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
            }
            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
            // Unicode: [zzzz yyyy] [yyxx xxxx]
            else if ((b0 & 0xF0) == 0xE0) {
                int b1 = index == fOffset
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 3);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 3, b1);
                }
                int b2 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b2 == -1) {
                    expectedByte(3, 3);
                }
                if ((b2 & 0xC0) != 0x80) {
                    invalidByte(3, 3, b2);
                }
                c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                    (b2 & 0x003F);
            }
            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
            //          [1101 11yy] [yyxx xxxx] (low surrogate)
            //          * uuuuu = wwww + 1
            else if ((b0 & 0xF8) == 0xF0) {
                int b1 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 4);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 3, b1);
                }
                int b2 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b2 == -1) {
                    expectedByte(3, 4);
                }
                if ((b2 & 0xC0) != 0x80) {
                    invalidByte(3, 3, b2);
                }
                int b3 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b3 == -1) {
                    expectedByte(4, 4);
                }
                if ((b3 & 0xC0) != 0x80) {
                    invalidByte(4, 4, b3);
                }
                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                if (uuuuu > 0x10) {
                    invalidSurrogate(uuuuu);
                }
                int wwww = uuuuu - 1;
                int hs = 0xD800 | 
                         ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | 
                         ((b2 >> 4) & 0x0003);
                int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
                c = hs;
                fSurrogate = ls;
            }
            // error
            else {
                invalidByte(1, 1, b0);
            }
        }
        // use surrogate
        else {
            fSurrogate = -1;
        }
        return c;
    } // read():int
    /**
     * Read characters into a portion of an array.  This method will block
     * until some input is available, an I/O error occurs, or the end of the
     * stream is reached.
     *
     * @param      ch     Destination buffer
     * @param      offset Offset at which to start storing characters
     * @param      length Maximum number of characters to read
     *
     * @return     The number of characters read, or -1 if the end of the
     *             stream has been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    public int read(char ch[], int offset, int length) throws IOException {
        // handle surrogate
        int out = offset;
        if (fSurrogate != -1) {
            ch[offset + 1] = (char)fSurrogate;
            fSurrogate = -1;
            length--;
            out++;
        }
        // read bytes
        int count = 0;
        if (fOffset == 0) {
            // adjust length to read
            if (length > fBuffer.length) {
                length = fBuffer.length;
            }
            // perform read operation
            count = fInputStream.read(fBuffer, 0, length);
            if (count == -1) {
                return -1;
            }
            count += out - offset;
        }
        // skip read; last character was in error
        // NOTE: Having an offset value other than zero means that there was
        //       an error in the last character read. In this case, we have
        //       skipped the read so we don"t consume any bytes past the 
        //       error. By signalling the error on the next block read we
        //       allow the method to return the most valid characters that
        //       it can on the previous block read. -Ac
        else {
            count = fOffset;
            fOffset = 0;
        }
        // convert bytes to characters
        final int total = count;
        for (int in = 0; in < total; in++) {
            int b0 = fBuffer[in] & 0x00FF;
            // UTF-8:   [0xxx xxxx]
            // Unicode: [0000 0000] [0xxx xxxx]
            if (b0 < 0x80) {
                ch[out++] = (char)b0;
                continue;
            }
            // UTF-8:   [110y yyyy] [10xx xxxx]
            // Unicode: [0000 0yyy] [yyxx xxxx]
            if ((b0 & 0xE0) == 0xC0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 2);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 2, b1);
                }
                int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
                ch[out++] = (char)c;
                count -= 1;
                continue;
            }
            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
            // Unicode: [zzzz yyyy] [yyxx xxxx]
            if ((b0 & 0xF0) == 0xE0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 3);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 3, b1);
                }
                int b2 = -1;
                if (++in < total) { 
                    b2 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b2 = fInputStream.read();
                    if (b2 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fOffset = 2;
                            return out - offset;
                        }
                        expectedByte(3, 3);
                    }
                    count++;
                }
                if ((b2 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fOffset = 3;
                        return out - offset;
                    }
                    invalidByte(3, 3, b2);
                }
                int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                        (b2 & 0x003F);
                ch[out++] = (char)c;
                count -= 2;
                continue;
            }
            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
            //          [1101 11yy] [yyxx xxxx] (low surrogate)
            //          * uuuuu = wwww + 1
            if ((b0 & 0xF8) == 0xF0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 4);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 4, b1);
                }
                int b2 = -1;
                if (++in < total) { 
                    b2 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b2 = fInputStream.read();
                    if (b2 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fOffset = 2;
                            return out - offset;
                        }
                        expectedByte(3, 4);
                    }
                    count++;
                }
                if ((b2 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fOffset = 3;
                        return out - offset;
                    }
                    invalidByte(3, 4, b2);
                }
                int b3 = -1;
                if (++in < total) { 
                    b3 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b3 = fInputStream.read();
                    if (b3 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fBuffer[2] = (byte)b2;
                            fOffset = 3;
                            return out - offset;
                        }
                        expectedByte(4, 4);
                    }
                    count++;
                }
                if ((b3 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fBuffer[3] = (byte)b3;
                        fOffset = 4;
                        return out - offset;
                    }
                    invalidByte(4, 4, b2);
                }
                // decode bytes into surrogate characters
                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                if (uuuuu > 0x10) {
                    invalidSurrogate(uuuuu);
                }
                int wwww = uuuuu - 1;
                int zzzz = b1 & 0x000F;
                int yyyyyy = b2 & 0x003F;
                int xxxxxx = b3 & 0x003F;
                int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
                int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
                // set characters
                ch[out++] = (char)hs;
                ch[out++] = (char)ls;
                count -= 2;
                continue;
            }
            // error
            if (out > offset) {
                fBuffer[0] = (byte)b0;
                fOffset = 1;
                return out - offset;
            }
            invalidByte(1, 1, b0);
        }
        return count;
    } // read(char[],int,int)
    /**
     * Skip characters.  This method will block until some characters are
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @param  n  The number of characters to skip
     *
     * @return    The number of characters actually skipped
     *
     * @exception  IOException  If an I/O error occurs
     */
    public long skip(long n) throws IOException {
        long remaining = n;
        final char[] ch = new char[fBuffer.length];
        do {
            int length = ch.length < remaining ? ch.length : (int)remaining;
            int count = read(ch, 0, length);
            if (count > 0) {
                remaining -= count;
            }
            else {
                break;
            }
        } while (remaining > 0);
        long skipped = n - remaining;
        return skipped;
    } // skip(long):long
    /**
     * Tell whether this stream is ready to be read.
     *
     * @return True if the next read() is guaranteed not to block for input,
     * false otherwise.  Note that returning false does not guarantee that the
     * next read will block.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public boolean ready() throws IOException {
      return false;
    } // ready()
    /**
     * Tell whether this stream supports the mark() operation.
     */
    public boolean markSupported() {
      return false;
    } // markSupported()
    /**
     * Mark the present position in the stream.  Subsequent calls to reset()
     * will attempt to reposition the stream to this point.  Not all
     * character-input streams support the mark() operation.
     *
     * @param  readAheadLimit  Limit on the number of characters that may be
     *                         read while still preserving the mark.  After
     *                         reading this many characters, attempting to
     *                         reset the stream may fail.
     *
     * @exception  IOException  If the stream does not support mark(),
     *                          or if some other I/O error occurs
     */
    public void mark(int readAheadLimit) throws IOException {
  throw new IOException("operationNotSupported");
    }
    /**
     * Reset the stream.  If the stream has been marked, then attempt to
     * reposition it at the mark.  If the stream has not been marked, then
     * attempt to reset it in some way appropriate to the particular stream,
     * for example by repositioning it to its starting point.  Not all
     * character-input streams support the reset() operation, and some support
     * reset() without supporting mark().
     *
     * @exception  IOException  If the stream has not been marked,
     *                          or if the mark has been invalidated,
     *                          or if the stream does not support reset(),
     *                          or if some other I/O error occurs
     */
    public void reset() throws IOException {
        fOffset = 0;
        fSurrogate = -1;
    } // reset()
    /**
     * Close the stream.  Once a stream has been closed, further read(),
     * ready(), mark(), or reset() invocations will throw an IOException.
     * Closing a previously-closed stream, however, has no effect.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public void close() throws IOException {
        fInputStream.close();
    } // close()
    //
    // Private methods
    //
    /** Throws an exception for expected byte. */
    private void expectedByte(int position, int count)
        throws UTFDataFormatException {
        throw new UTFDataFormatException("expectedByte");
    } // expectedByte(int,int,int)
    /** Throws an exception for invalid byte. */
    private void invalidByte(int position, int count, int c) 
        throws UTFDataFormatException {
        throw new UTFDataFormatException("invalidByte");
    } // invalidByte(int,int,int,int)
    /** Throws an exception for invalid surrogate bits. */
    private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
        
        throw new UTFDataFormatException("invalidHighSurrogate");
    } // invalidSurrogate(int)
} // class UTF8Reader





Writes all characters from a Reader to a file using the default character encoding.

/*
 * Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
 *
 * Licensed under the Aduna BSD-style license.
 */
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
  
  /**
   * Writes all characters from a <tt>Reader</tt> to a file using the default
   * character encoding.
   *
   * @param reader The <tt>Reader</tt> containing the data to write to the
   * file.
   * @param file The file to write the data to.
   * @return The total number of characters written.
   * @throws IOException If an I/O error occured while trying to write the
   * data to the file.
   * @see java.io.FileWriter
   */
  public static final long writeToFile(Reader reader, File file)
    throws IOException
  {
    FileWriter writer = new FileWriter(file);
    try {
      return transfer(reader, writer);
    }
    finally {
      writer.close();
    }
  }
  /**
   * Transfers all characters that can be read from <tt>in</tt> to
   * <tt>out</tt>.
   *
   * @param in The Reader to read characters from.
   * @param out The Writer to write characters to.
   * @return The total number of characters transfered.
   */
  public static final long transfer(Reader in, Writer out)
    throws IOException
  {
    long totalChars = 0;
    int charsInBuf = 0;
    char[] buf = new char[4096];
    while ((charsInBuf = in.read(buf)) != -1) {
      out.write(buf, 0, charsInBuf);
      totalChars += charsInBuf;
    }
    return totalChars;
  }
}