Java Tutorial/File/Reader
Содержание
- 1 An InputStream backed by a Reader
- 2 Compare the contents of two Readers to determine if they are equal or not.
- 3 convert Reader to InputStream
- 4 CRLF Terminated Reader
- 5 Read and return the entire contents of the supplied Reader. This method always closes the reader when finished reading.
- 6 Reader: Reading Text (Characters)
- 7 Read from Reader and write to Writer until there is no more input from reader.
- 8 Reads characters available from the Reader and returns these characters as a String object.
- 9 Transfers all characters that can be read from one Reader to another Reader
- 10 UTF8 Reader
- 11 Writes all characters from a Reader to a file using the default character encoding.
An InputStream backed by a Reader
<source lang="java">
/*
* Java Network Programming, Second Edition * Merlin Hughes, Michael Shoffner, Derek Hamner * Manning Publications Company; ISBN 188477749X * * http://nitric.ru/jnp/ * * Copyright (c) 1997-1999 Merlin Hughes, Michael Shoffner, Derek Hamner; * all rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE ABOVE NAMED AUTHORS "AS IS" AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS, THEIR * PUBLISHER OR THEIR EMPLOYERS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. */
import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; /**
* An InputStream backed by a Reader */
public class ReaderInputStream extends InputStream {
protected Reader reader; protected ByteArrayOutputStream byteArrayOut; protected Writer writer; protected char[] chars; protected byte[] buffer; protected int index, length; /** * Constructor to supply a Reader * * @param reader - the Reader used by the InputStream */ public ReaderInputStream(Reader reader) { this.reader = reader; byteArrayOut = new ByteArrayOutputStream(); writer = new OutputStreamWriter(byteArrayOut); chars = new char[1024]; } /** * Constructor to supply a Reader and an encoding * * @param reader - the Reader used by the InputStream * @param encoding - the encoding to use for the InputStream * @throws UnsupportedEncodingException if the encoding is not supported */ public ReaderInputStream(Reader reader, String encoding) throws UnsupportedEncodingException { this.reader = reader; byteArrayOut = new ByteArrayOutputStream(); writer = new OutputStreamWriter(byteArrayOut, encoding); chars = new char[1024]; } /** * @see java.io.InputStream#read() */ public int read() throws IOException { if (index >= length) fillBuffer(); if (index >= length) return -1; return 0xff & buffer[index++]; } protected void fillBuffer() throws IOException { if (length < 0) return; int numChars = reader.read(chars); if (numChars < 0) { length = -1; } else { byteArrayOut.reset(); writer.write(chars, 0, numChars); writer.flush(); buffer = byteArrayOut.toByteArray(); length = buffer.length; index = 0; } } /** * @see java.io.InputStream#read(byte[], int, int) */ public int read(byte[] data, int off, int len) throws IOException { if (index >= length) fillBuffer(); if (index >= length) return -1; int amount = Math.min(len, length - index); System.arraycopy(buffer, index, data, off, amount); index += amount; return amount; } /** * @see java.io.InputStream#available() */ public int available() throws IOException { return (index < length) ? length - index : ((length >= 0) && reader.ready()) ? 1 : 0; } /** * @see java.io.InputStream#close() */ public void close() throws IOException { reader.close(); }
}</source>
Compare the contents of two Readers to determine if they are equal or not.
<source lang="java">
/*
* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */
import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; /** * Origin of code: Apache Avalon (Excalibur)
* * @author Peter Donald * @author Jeff Turner * @author Matthew Hawthorne * @author Stephen Colebourne * @author Gareth Davis * @version CVS $Revision$ $Date$*/
public class Main {
/**
* Compare the contents of two Readers to determine if they are equal or not.
*
* This method buffers the input internally using BufferedReader
if they are not
* already buffered.
*
* @param input1
* the first reader
* @param input2
* the second reader
* @return true if the content of the readers are equal or they both don"t exist, false
* otherwise
* @throws NullPointerException
* if either input is null
* @throws IOException
* if an I/O error occurs
* @since 1.1
*/
public static boolean contentEquals(Reader input1, Reader input2) throws IOException
{
if (!(input1 instanceof BufferedReader))
{
input1 = new BufferedReader(input1);
}
if (!(input2 instanceof BufferedReader))
{
input2 = new BufferedReader(input2);
}
int ch = input1.read();
while (-1 != ch)
{
int ch2 = input2.read();
if (ch != ch2)
{
return false;
}
ch = input1.read();
}
int ch2 = input2.read();
return (ch2 == -1);
}
}</source>
convert Reader to InputStream
<source lang="java">
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved. * * The contents of this file are subject to the terms of either the GNU * General Public License Version 2 only ("GPL") or the Common * Development and Distribution License("CDDL") (collectively, the * "License"). You may not use this file except in compliance with the * License. You can obtain a copy of the License at * http://www.netbeans.org/cddl-gplv2.html * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the * specific language governing permissions and limitations under the * License. When distributing the software, include this License Header * Notice in each file and include the License file at * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this * particular file as subject to the "Classpath" exception as provided * by Sun in the GPL Version 2 section of the License file that * accompanied this code. If applicable, add the following below the * License Header, with the fields enclosed by brackets [] replaced by * your own identifying information: * "Portions Copyrighted [year] [name of copyright owner]" * * Contributor(s): * * The Original Software is NetBeans. The Initial Developer of the Original * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun * Microsystems, Inc. All Rights Reserved. * * If you wish your version of this file to be governed by only the CDDL * or only the GPL Version 2, indicate your decision by adding * "[Contributor] elects to include this software in this distribution * under the [CDDL or GPL Version 2] license." If you do not indicate a * single choice of license, a recipient has the option to distribute * your version of this file under either the CDDL, the GPL Version 2 or * to extend the choice of license to its licensees as provided above. * However, if you add GPL Version 2 code and therefore, elected the GPL * Version 2 license, then the option applies only if the new code is * made subject to such option by the copyright holder. */
import java.io.*;
/**
- This class convert Reader to InputStream. It works by converting
- the characters to the encoding specified in constructor parameter.
- @author Petr Hamernik, David Strupl
- /
public class ReaderInputStream extends InputStream {
/** Input Reader class. */ private Reader reader; private PipedOutputStream pos; private PipedInputStream pis; private OutputStreamWriter osw; /** Creates new input stream from the given reader. * Uses the platform default encoding. * @param reader Input reader */ public ReaderInputStream(Reader reader) throws IOException { this.reader = reader; pos = new PipedOutputStream(); pis = new PipedInputStream(pos); osw = new OutputStreamWriter(pos); } /** Creates new input stream from the given reader and encoding. * @param reader Input reader * @param encoding */ public ReaderInputStream(Reader reader, String encoding) throws IOException { this.reader = reader; pos = new PipedOutputStream(); pis = new PipedInputStream(pos); osw = new OutputStreamWriter(pos, encoding); } public int read() throws IOException { if (pis.available() > 0) { return pis.read(); } int c = reader.read(); if (c == -1) { return c; } osw.write(c); osw.flush(); pos.flush(); return pis.read(); } public int read(byte[] b, int off, int len) throws IOException { if (len == 0) { return 0; } int c = read(); if (c == -1) { return -1; } b[off] = (byte) c; int i = 1; // Don"t try to fill up the buffer if the reader is waiting. for (; (i < len) && reader.ready(); i++) { c = read(); if (c == -1) { return i; } b[off + i] = (byte) c; } return i; } public int available() throws IOException { int i = pis.available(); if (i > 0) { return i; } if (reader.ready()) { // Char must produce at least one byte. return 1; } else { return 0; } } public void close() throws IOException { reader.close(); osw.close(); pis.close(); }
}</source>
CRLF Terminated Reader
<source lang="java">
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one * * or more contributor license agreements. See the NOTICE file * * distributed with this work for additional information * * regarding copyright ownership. The ASF licenses this file * * to you under the Apache License, Version 2.0 (the * * "License"); you may not use this file except in compliance * * with the License. You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * * KIND, either express or implied. See the License for the * * specific language governing permissions and limitations * * under the License. * ****************************************************************/
import java.io.InputStream; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.IOException; /**
* A Reader for use with SMTP or other protocols in which lines * must end with CRLF. Extends Reader and overrides its * readLine() method. The Reader readLine() method cannot * serve for SMTP because it ends lines with either CR or LF alone. */
public class CRLFTerminatedReader extends Reader {
public class TerminationException extends IOException { private int where; public TerminationException(int where) { super(); this.where = where; } public TerminationException(String s, int where) { super(s); this.where = where; } public int position() { return where; } } public class LineLengthExceededException extends IOException { public LineLengthExceededException(String s) { super(s); } } /** * Constructs this CRLFTerminatedReader. * @param in an InputStream * @param charsetName the String name of a supported charset. * "ASCII" is common here. * @throws UnsupportedEncodingException if the named charset * is not supported */ InputStream in; public CRLFTerminatedReader(InputStream in) { this.in = in; } public CRLFTerminatedReader(InputStream in, String enc) throws UnsupportedEncodingException { this(in); } private StringBuffer lineBuffer = new StringBuffer(); private final int EOF = -1, CR = 13, LF = 10; private int tainted = -1; /** * Read a line of text which is terminated by CRLF. The concluding * CRLF characters are not returned with the String, but if either CR * or LF appears in the text in any other sequence it is returned * in the String like any other character. Some characters at the * end of the stream may be lost if they are in a "line" not * terminated by CRLF. * * @return either a String containing the contents of a * line which must end with CRLF, or null if the end of the * stream has been reached, possibly discarding some characters * in a line not terminated with CRLF. * @throws IOException if an I/O error occurs. */ public String readLine() throws IOException{ //start with the StringBuffer empty lineBuffer.delete(0, lineBuffer.length()); /* This boolean tells which state we are in, * depending upon whether or not we got a CR * in the preceding read(). */ boolean cr_just_received = false; // Until we add support for specifying a maximum line lenth as // a Service Extension, limit lines to 2K, which is twice what // RFC 2821 4.5.3.1 requires. while (lineBuffer.length() <= 2048) { int inChar = read(); if (!cr_just_received){ //the most common case, somewhere before the end of a line switch (inChar){ case CR : cr_just_received = true; break; case EOF : return null; // premature EOF -- discards data(?) case LF : //the normal ending of a line if (tainted == -1) tainted = lineBuffer.length(); // intentional fall-through default : lineBuffer.append((char)inChar); } }else{ // CR has been received, we may be at end of line switch (inChar){ case LF : // LF without a preceding CR if (tainted != -1) { int pos = tainted; tainted = -1; throw new TerminationException("\"bare\" CR or LF in data stream", pos); } return lineBuffer.toString(); case EOF : return null; // premature EOF -- discards data(?) case CR : //we got two (or more) CRs in a row if (tainted == -1) tainted = lineBuffer.length(); lineBuffer.append((char)CR); break; default : //we got some other character following a CR if (tainted == -1) tainted = lineBuffer.length(); lineBuffer.append((char)CR); lineBuffer.append((char)inChar); cr_just_received = false; } } }//while throw new LineLengthExceededException("Exceeded maximum line length"); }//method readLine() public int read() throws IOException { return in.read(); } public boolean ready() throws IOException { return in.available() > 0; } public int read(char cbuf[], int off, int len) throws IOException { byte [] temp = new byte[len]; int result = in.read(temp, 0, len); for (int i=0;i<result;i++) cbuf[i] = (char) temp[i]; return result; } public void close() throws IOException { in.close(); }
}</source>
Read and return the entire contents of the supplied Reader. This method always closes the reader when finished reading.
<source lang="java">
/*
* JBoss DNA (http://www.jboss.org/dna) * See the COPYRIGHT.txt file distributed with this work for information * regarding copyright ownership. Some portions may be licensed * to Red Hat, Inc. under one or more contributor license agreements. * See the AUTHORS.txt file in the distribution for a full listing of * individual contributors. * * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA * is licensed to you under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * JBoss DNA is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */
import java.io.IOException; import java.io.Reader; /**
* @author Randall Hauch */
public class Main {
/** * Read and return the entire contents of the supplied {@link Reader}. This method always closes the reader when finished * reading. * * @param reader the reader of the contents; may be null * @return the contents, or an empty string if the supplied reader is null * @throws IOException if there is an error reading the content */ public static String read( Reader reader ) throws IOException { if (reader == null) return ""; StringBuilder sb = new StringBuilder(); boolean error = false; try { int numRead = 0; char[] buffer = new char[1024]; while ((numRead = reader.read(buffer)) > -1) { sb.append(buffer, 0, numRead); } } catch (IOException e) { error = true; // this error should be thrown, even if there is an error closing reader throw e; } catch (RuntimeException e) { error = true; // this error should be thrown, even if there is an error closing reader throw e; } finally { try { reader.close(); } catch (IOException e) { if (!error) throw e; } } return sb.toString(); }
}</source>
Reader: Reading Text (Characters)
- Reader is an abstract class that represents an input stream for reading characters.
- You use the Reader class to read text (characters, i.e. human readable data).
- The two implementation classes of Reader are InputStreamReader and BufferedReader.
The Reader class has three read method overloads that are similar to the read methods in InputStream:
<source lang="java">
public int read() public int read (char[] data) public int read (char[] data, int offset, int length) public int read(java.nio.CharBuffer target)</source>
Read from Reader and write to Writer until there is no more input from reader.
<source lang="java">
import java.io.IOException; import java.io.Reader; import java.io.Writer; public class Main {
/** * Read input from reader and write it to writer until there is no more * input from reader. * * @param reader the reader to read from. * @param writer the writer to write to. * @param buf the char array to use as a bufferx */ public static void flow( Reader reader, Writer writer, char[] buf ) throws IOException { int numRead; while ( (numRead = reader.read(buf) ) >= 0) { writer.write(buf, 0, numRead); } }
}</source>
Reads characters available from the Reader and returns these characters as a String object.
<source lang="java">
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006. * * Licensed under the Aduna BSD-style license. */
import java.io.CharArrayWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; public class Main {
/** * Fully reads the characters available from the supplied Reader * and returns these characters as a String object. * * @param reader The Reader to read the characters from. * @return A String existing of the characters that were read. * @throws IOException If I/O error occurred. */ public static final String readFully(Reader reader) throws IOException { CharArrayWriter out = new CharArrayWriter(4096); transfer(reader, out); out.close(); return out.toString(); } /** * Transfers all characters that can be read from in to * out. * * @param in The Reader to read characters from. * @param out The Writer to write characters to. * @return The total number of characters transfered. */ public static final long transfer(Reader in, Writer out) throws IOException { long totalChars = 0; int charsInBuf = 0; char[] buf = new char[4096]; while ((charsInBuf = in.read(buf)) != -1) { out.write(buf, 0, charsInBuf); totalChars += charsInBuf; } return totalChars; }
}</source>
Transfers all characters that can be read from one Reader to another Reader
<source lang="java">
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006. * * Licensed under the Aduna BSD-style license. */
import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; public class Main {
/** * Transfers all characters that can be read from in to * out. * * @param in The Reader to read characters from. * @param out The Writer to write characters to. * @return The total number of characters transfered. */ public static final long transfer(Reader in, Writer out) throws IOException { long totalChars = 0; int charsInBuf = 0; char[] buf = new char[4096]; while ((charsInBuf = in.read(buf)) != -1) { out.write(buf, 0, charsInBuf); totalChars += charsInBuf; } return totalChars; }
}</source>
UTF8 Reader
<source lang="java">
/*
* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */
import java.io.InputStream; import java.io.IOException; import java.io.Reader; import java.io.UTFDataFormatException;
/**
* @author Andy Clark, IBM * * @version $Id: UTF8Reader.java 515 2008-03-17 21:02:23Z jfrederic.clere@jboss.ru $ */
public class UTF8Reader
extends Reader { // // Constants // /** Default byte buffer size (2048). */ public static final int DEFAULT_BUFFER_SIZE = 2048; // debugging /** Debug read. */ private static final boolean DEBUG_READ = false; // // Data // /** Input stream. */ protected InputStream fInputStream; /** Byte buffer. */ protected byte[] fBuffer; /** Offset into buffer. */ protected int fOffset; /** Surrogate character. */ private int fSurrogate = -1; // // Constructors // /** * Constructs a UTF-8 reader from the specified input stream, * buffer size and MessageFormatter. * * @param inputStream The input stream. * @param size The initial buffer size. */ public UTF8Reader(InputStream inputStream, int size) { fInputStream = inputStream; fBuffer = new byte[size]; } // // Reader methods // /** * Read a single character. This method will block until a character is * available, an I/O error occurs, or the end of the stream is reached. * * Subclasses that intend to support efficient single-character input * should override this method. * * @return The character read, as an integer in the range 0 to 16383 * (0x00-0xffff), or -1 if the end of the stream has * been reached * * @exception IOException If an I/O error occurs */ public int read() throws IOException { // decode character int c = fSurrogate; if (fSurrogate == -1) { // NOTE: We use the index into the buffer if there are remaining // bytes from the last block read. -Ac int index = 0; // get first byte int b0 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b0 == -1) { return -1; } // UTF-8: [0xxx xxxx] // Unicode: [0000 0000] [0xxx xxxx] if (b0 < 0x80) { c = (char)b0; } // UTF-8: [110y yyyy] [10xx xxxx] // Unicode: [0000 0yyy] [yyxx xxxx] else if ((b0 & 0xE0) == 0xC0) { int b1 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b1 == -1) { expectedByte(2, 2); } if ((b1 & 0xC0) != 0x80) { invalidByte(2, 2, b1); } c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F); } // UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx] // Unicode: [zzzz yyyy] [yyxx xxxx] else if ((b0 & 0xF0) == 0xE0) { int b1 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b1 == -1) { expectedByte(2, 3); } if ((b1 & 0xC0) != 0x80) { invalidByte(2, 3, b1); } int b2 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b2 == -1) { expectedByte(3, 3); } if ((b2 & 0xC0) != 0x80) { invalidByte(3, 3, b2); } c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) | (b2 & 0x003F); } // UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]* // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate) // [1101 11yy] [yyxx xxxx] (low surrogate) // * uuuuu = wwww + 1 else if ((b0 & 0xF8) == 0xF0) { int b1 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b1 == -1) { expectedByte(2, 4); } if ((b1 & 0xC0) != 0x80) { invalidByte(2, 3, b1); } int b2 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b2 == -1) { expectedByte(3, 4); } if ((b2 & 0xC0) != 0x80) { invalidByte(3, 3, b2); } int b3 = index == fOffset ? fInputStream.read() : fBuffer[index++] & 0x00FF; if (b3 == -1) { expectedByte(4, 4); } if ((b3 & 0xC0) != 0x80) { invalidByte(4, 4, b3); } int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003); if (uuuuu > 0x10) { invalidSurrogate(uuuuu); } int wwww = uuuuu - 1; int hs = 0xD800 | ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | ((b2 >> 4) & 0x0003); int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F); c = hs; fSurrogate = ls; } // error else { invalidByte(1, 1, b0); } } // use surrogate else { fSurrogate = -1; } return c; } // read():int /** * Read characters into a portion of an array. This method will block * until some input is available, an I/O error occurs, or the end of the * stream is reached. * * @param ch Destination buffer * @param offset Offset at which to start storing characters * @param length Maximum number of characters to read * * @return The number of characters read, or -1 if the end of the * stream has been reached * * @exception IOException If an I/O error occurs */ public int read(char ch[], int offset, int length) throws IOException { // handle surrogate int out = offset; if (fSurrogate != -1) { ch[offset + 1] = (char)fSurrogate; fSurrogate = -1; length--; out++; } // read bytes int count = 0; if (fOffset == 0) { // adjust length to read if (length > fBuffer.length) { length = fBuffer.length; } // perform read operation count = fInputStream.read(fBuffer, 0, length); if (count == -1) { return -1; } count += out - offset; } // skip read; last character was in error // NOTE: Having an offset value other than zero means that there was // an error in the last character read. In this case, we have // skipped the read so we don"t consume any bytes past the // error. By signalling the error on the next block read we // allow the method to return the most valid characters that // it can on the previous block read. -Ac else { count = fOffset; fOffset = 0; } // convert bytes to characters final int total = count; for (int in = 0; in < total; in++) { int b0 = fBuffer[in] & 0x00FF; // UTF-8: [0xxx xxxx] // Unicode: [0000 0000] [0xxx xxxx] if (b0 < 0x80) { ch[out++] = (char)b0; continue; } // UTF-8: [110y yyyy] [10xx xxxx] // Unicode: [0000 0yyy] [yyxx xxxx] if ((b0 & 0xE0) == 0xC0) { int b1 = -1; if (++in < total) { b1 = fBuffer[in] & 0x00FF; } else { b1 = fInputStream.read(); if (b1 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fOffset = 1; return out - offset; } expectedByte(2, 2); } count++; } if ((b1 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fOffset = 2; return out - offset; } invalidByte(2, 2, b1); } int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F); ch[out++] = (char)c; count -= 1; continue; } // UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx] // Unicode: [zzzz yyyy] [yyxx xxxx] if ((b0 & 0xF0) == 0xE0) { int b1 = -1; if (++in < total) { b1 = fBuffer[in] & 0x00FF; } else { b1 = fInputStream.read(); if (b1 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fOffset = 1; return out - offset; } expectedByte(2, 3); } count++; } if ((b1 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fOffset = 2; return out - offset; } invalidByte(2, 3, b1); } int b2 = -1; if (++in < total) { b2 = fBuffer[in] & 0x00FF; } else { b2 = fInputStream.read(); if (b2 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fOffset = 2; return out - offset; } expectedByte(3, 3); } count++; } if ((b2 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fBuffer[2] = (byte)b2; fOffset = 3; return out - offset; } invalidByte(3, 3, b2); } int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) | (b2 & 0x003F); ch[out++] = (char)c; count -= 2; continue; } // UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]* // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate) // [1101 11yy] [yyxx xxxx] (low surrogate) // * uuuuu = wwww + 1 if ((b0 & 0xF8) == 0xF0) { int b1 = -1; if (++in < total) { b1 = fBuffer[in] & 0x00FF; } else { b1 = fInputStream.read(); if (b1 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fOffset = 1; return out - offset; } expectedByte(2, 4); } count++; } if ((b1 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fOffset = 2; return out - offset; } invalidByte(2, 4, b1); } int b2 = -1; if (++in < total) { b2 = fBuffer[in] & 0x00FF; } else { b2 = fInputStream.read(); if (b2 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fOffset = 2; return out - offset; } expectedByte(3, 4); } count++; } if ((b2 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fBuffer[2] = (byte)b2; fOffset = 3; return out - offset; } invalidByte(3, 4, b2); } int b3 = -1; if (++in < total) { b3 = fBuffer[in] & 0x00FF; } else { b3 = fInputStream.read(); if (b3 == -1) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fBuffer[2] = (byte)b2; fOffset = 3; return out - offset; } expectedByte(4, 4); } count++; } if ((b3 & 0xC0) != 0x80) { if (out > offset) { fBuffer[0] = (byte)b0; fBuffer[1] = (byte)b1; fBuffer[2] = (byte)b2; fBuffer[3] = (byte)b3; fOffset = 4; return out - offset; } invalidByte(4, 4, b2); } // decode bytes into surrogate characters int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003); if (uuuuu > 0x10) { invalidSurrogate(uuuuu); } int wwww = uuuuu - 1; int zzzz = b1 & 0x000F; int yyyyyy = b2 & 0x003F; int xxxxxx = b3 & 0x003F; int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4); int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx; // set characters ch[out++] = (char)hs; ch[out++] = (char)ls; count -= 2; continue; } // error if (out > offset) { fBuffer[0] = (byte)b0; fOffset = 1; return out - offset; } invalidByte(1, 1, b0); } return count; } // read(char[],int,int) /** * Skip characters. This method will block until some characters are * available, an I/O error occurs, or the end of the stream is reached. * * @param n The number of characters to skip * * @return The number of characters actually skipped * * @exception IOException If an I/O error occurs */ public long skip(long n) throws IOException { long remaining = n; final char[] ch = new char[fBuffer.length]; do { int length = ch.length < remaining ? ch.length : (int)remaining; int count = read(ch, 0, length); if (count > 0) { remaining -= count; } else { break; } } while (remaining > 0); long skipped = n - remaining; return skipped; } // skip(long):long /** * Tell whether this stream is ready to be read. * * @return True if the next read() is guaranteed not to block for input, * false otherwise. Note that returning false does not guarantee that the * next read will block. * * @exception IOException If an I/O error occurs */ public boolean ready() throws IOException { return false; } // ready() /** * Tell whether this stream supports the mark() operation. */ public boolean markSupported() { return false; } // markSupported() /** * Mark the present position in the stream. Subsequent calls to reset() * will attempt to reposition the stream to this point. Not all * character-input streams support the mark() operation. * * @param readAheadLimit Limit on the number of characters that may be * read while still preserving the mark. After * reading this many characters, attempting to * reset the stream may fail. * * @exception IOException If the stream does not support mark(), * or if some other I/O error occurs */ public void mark(int readAheadLimit) throws IOException { throw new IOException("operationNotSupported"); } /** * Reset the stream. If the stream has been marked, then attempt to * reposition it at the mark. If the stream has not been marked, then * attempt to reset it in some way appropriate to the particular stream, * for example by repositioning it to its starting point. Not all * character-input streams support the reset() operation, and some support * reset() without supporting mark(). * * @exception IOException If the stream has not been marked, * or if the mark has been invalidated, * or if the stream does not support reset(), * or if some other I/O error occurs */ public void reset() throws IOException { fOffset = 0; fSurrogate = -1; } // reset() /** * Close the stream. Once a stream has been closed, further read(), * ready(), mark(), or reset() invocations will throw an IOException. * Closing a previously-closed stream, however, has no effect. * * @exception IOException If an I/O error occurs */ public void close() throws IOException { fInputStream.close(); } // close() // // Private methods // /** Throws an exception for expected byte. */ private void expectedByte(int position, int count) throws UTFDataFormatException { throw new UTFDataFormatException("expectedByte"); } // expectedByte(int,int,int) /** Throws an exception for invalid byte. */ private void invalidByte(int position, int count, int c) throws UTFDataFormatException { throw new UTFDataFormatException("invalidByte"); } // invalidByte(int,int,int,int) /** Throws an exception for invalid surrogate bits. */ private void invalidSurrogate(int uuuuu) throws UTFDataFormatException { throw new UTFDataFormatException("invalidHighSurrogate"); } // invalidSurrogate(int)
} // class UTF8Reader</source>
Writes all characters from a Reader to a file using the default character encoding.
<source lang="java">
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006. * * Licensed under the Aduna BSD-style license. */
import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; public class Main {
/** * Writes all characters from a Reader to a file using the default * character encoding. * * @param reader The Reader containing the data to write to the * file. * @param file The file to write the data to. * @return The total number of characters written. * @throws IOException If an I/O error occured while trying to write the * data to the file. * @see java.io.FileWriter */ public static final long writeToFile(Reader reader, File file) throws IOException { FileWriter writer = new FileWriter(file); try { return transfer(reader, writer); } finally { writer.close(); } } /** * Transfers all characters that can be read from in to * out. * * @param in The Reader to read characters from. * @param out The Writer to write characters to. * @return The total number of characters transfered. */ public static final long transfer(Reader in, Writer out) throws IOException { long totalChars = 0; int charsInBuf = 0; char[] buf = new char[4096]; while ((charsInBuf = in.read(buf)) != -1) { out.write(buf, 0, charsInBuf); totalChars += charsInBuf; } return totalChars; }
}</source>