Java Tutorial/File/Reader — различия между версиями
Admin (обсуждение | вклад) м (1 версия) |
|
(нет различий)
|
Текущая версия на 05:20, 1 июня 2010
Содержание
- 1 An InputStream backed by a Reader
- 2 Compare the contents of two Readers to determine if they are equal or not.
- 3 convert Reader to InputStream
- 4 CRLF Terminated Reader
- 5 Read and return the entire contents of the supplied Reader. This method always closes the reader when finished reading.
- 6 Reader: Reading Text (Characters)
- 7 Read from Reader and write to Writer until there is no more input from reader.
- 8 Reads characters available from the Reader and returns these characters as a String object.
- 9 Transfers all characters that can be read from one Reader to another Reader
- 10 UTF8 Reader
- 11 Writes all characters from a Reader to a file using the default character encoding.
An InputStream backed by a Reader
/*
* Java Network Programming, Second Edition
* Merlin Hughes, Michael Shoffner, Derek Hamner
* Manning Publications Company; ISBN 188477749X
*
* http://nitric.ru/jnp/
*
* Copyright (c) 1997-1999 Merlin Hughes, Michael Shoffner, Derek Hamner;
* all rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE ABOVE NAMED AUTHORS "AS IS" AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS, THEIR
* PUBLISHER OR THEIR EMPLOYERS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
/**
* An InputStream backed by a Reader
*/
public class ReaderInputStream extends InputStream {
protected Reader reader;
protected ByteArrayOutputStream byteArrayOut;
protected Writer writer;
protected char[] chars;
protected byte[] buffer;
protected int index, length;
/**
* Constructor to supply a Reader
*
* @param reader - the Reader used by the InputStream
*/
public ReaderInputStream(Reader reader) {
this.reader = reader;
byteArrayOut = new ByteArrayOutputStream();
writer = new OutputStreamWriter(byteArrayOut);
chars = new char[1024];
}
/**
* Constructor to supply a Reader and an encoding
*
* @param reader - the Reader used by the InputStream
* @param encoding - the encoding to use for the InputStream
* @throws UnsupportedEncodingException if the encoding is not supported
*/
public ReaderInputStream(Reader reader, String encoding) throws UnsupportedEncodingException {
this.reader = reader;
byteArrayOut = new ByteArrayOutputStream();
writer = new OutputStreamWriter(byteArrayOut, encoding);
chars = new char[1024];
}
/**
* @see java.io.InputStream#read()
*/
public int read() throws IOException {
if (index >= length)
fillBuffer();
if (index >= length)
return -1;
return 0xff & buffer[index++];
}
protected void fillBuffer() throws IOException {
if (length < 0)
return;
int numChars = reader.read(chars);
if (numChars < 0) {
length = -1;
} else {
byteArrayOut.reset();
writer.write(chars, 0, numChars);
writer.flush();
buffer = byteArrayOut.toByteArray();
length = buffer.length;
index = 0;
}
}
/**
* @see java.io.InputStream#read(byte[], int, int)
*/
public int read(byte[] data, int off, int len) throws IOException {
if (index >= length)
fillBuffer();
if (index >= length)
return -1;
int amount = Math.min(len, length - index);
System.arraycopy(buffer, index, data, off, amount);
index += amount;
return amount;
}
/**
* @see java.io.InputStream#available()
*/
public int available() throws IOException {
return (index < length) ? length - index :
((length >= 0) && reader.ready()) ? 1 : 0;
}
/**
* @see java.io.InputStream#close()
*/
public void close() throws IOException {
reader.close();
}
}
Compare the contents of two Readers to determine if they are equal or not.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
/** * Origin of code: Apache Avalon (Excalibur)
*
* @author Peter Donald
* @author Jeff Turner
* @author Matthew Hawthorne
* @author Stephen Colebourne
* @author Gareth Davis
* @version CVS $Revision$ $Date$*/
public class Main {
/**
* Compare the contents of two Readers to determine if they are equal or not.
*
* This method buffers the input internally using <code>BufferedReader</code> if they are not
* already buffered.
*
* @param input1
* the first reader
* @param input2
* the second reader
* @return true if the content of the readers are equal or they both don"t exist, false
* otherwise
* @throws NullPointerException
* if either input is null
* @throws IOException
* if an I/O error occurs
* @since 1.1
*/
public static boolean contentEquals(Reader input1, Reader input2) throws IOException
{
if (!(input1 instanceof BufferedReader))
{
input1 = new BufferedReader(input1);
}
if (!(input2 instanceof BufferedReader))
{
input2 = new BufferedReader(input2);
}
int ch = input1.read();
while (-1 != ch)
{
int ch2 = input2.read();
if (ch != ch2)
{
return false;
}
ch = input1.read();
}
int ch2 = input2.read();
return (ch2 == -1);
}
}
convert Reader to InputStream
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common
* Development and Distribution License("CDDL") (collectively, the
* "License"). You may not use this file except in compliance with the
* License. You can obtain a copy of the License at
* http://www.netbeans.org/cddl-gplv2.html
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
* specific language governing permissions and limitations under the
* License. When distributing the software, include this License Header
* Notice in each file and include the License file at
* nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the
* License Header, with the fields enclosed by brackets [] replaced by
* your own identifying information:
* "Portions Copyrighted [year] [name of copyright owner]"
*
* Contributor(s):
*
* The Original Software is NetBeans. The Initial Developer of the Original
* Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
* Microsystems, Inc. All Rights Reserved.
*
* If you wish your version of this file to be governed by only the CDDL
* or only the GPL Version 2, indicate your decision by adding
* "[Contributor] elects to include this software in this distribution
* under the [CDDL or GPL Version 2] license." If you do not indicate a
* single choice of license, a recipient has the option to distribute
* your version of this file under either the CDDL, the GPL Version 2 or
* to extend the choice of license to its licensees as provided above.
* However, if you add GPL Version 2 code and therefore, elected the GPL
* Version 2 license, then the option applies only if the new code is
* made subject to such option by the copyright holder.
*/
import java.io.*;
/**
* This class convert Reader to InputStream. It works by converting
* the characters to the encoding specified in constructor parameter.
*
* @author Petr Hamernik, David Strupl
*/
public class ReaderInputStream extends InputStream {
/** Input Reader class. */
private Reader reader;
private PipedOutputStream pos;
private PipedInputStream pis;
private OutputStreamWriter osw;
/** Creates new input stream from the given reader.
* Uses the platform default encoding.
* @param reader Input reader
*/
public ReaderInputStream(Reader reader) throws IOException {
this.reader = reader;
pos = new PipedOutputStream();
pis = new PipedInputStream(pos);
osw = new OutputStreamWriter(pos);
}
/** Creates new input stream from the given reader and encoding.
* @param reader Input reader
* @param encoding
*/
public ReaderInputStream(Reader reader, String encoding)
throws IOException {
this.reader = reader;
pos = new PipedOutputStream();
pis = new PipedInputStream(pos);
osw = new OutputStreamWriter(pos, encoding);
}
public int read() throws IOException {
if (pis.available() > 0) {
return pis.read();
}
int c = reader.read();
if (c == -1) {
return c;
}
osw.write(c);
osw.flush();
pos.flush();
return pis.read();
}
public int read(byte[] b, int off, int len) throws IOException {
if (len == 0) {
return 0;
}
int c = read();
if (c == -1) {
return -1;
}
b[off] = (byte) c;
int i = 1;
// Don"t try to fill up the buffer if the reader is waiting.
for (; (i < len) && reader.ready(); i++) {
c = read();
if (c == -1) {
return i;
}
b[off + i] = (byte) c;
}
return i;
}
public int available() throws IOException {
int i = pis.available();
if (i > 0) {
return i;
}
if (reader.ready()) {
// Char must produce at least one byte.
return 1;
} else {
return 0;
}
}
public void close() throws IOException {
reader.close();
osw.close();
pis.close();
}
}
CRLF Terminated Reader
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
/**
* A Reader for use with SMTP or other protocols in which lines
* must end with CRLF. Extends Reader and overrides its
* readLine() method. The Reader readLine() method cannot
* serve for SMTP because it ends lines with either CR or LF alone.
*/
public class CRLFTerminatedReader extends Reader {
public class TerminationException extends IOException {
private int where;
public TerminationException(int where) {
super();
this.where = where;
}
public TerminationException(String s, int where) {
super(s);
this.where = where;
}
public int position() {
return where;
}
}
public class LineLengthExceededException extends IOException {
public LineLengthExceededException(String s) {
super(s);
}
}
/**
* Constructs this CRLFTerminatedReader.
* @param in an InputStream
* @param charsetName the String name of a supported charset.
* "ASCII" is common here.
* @throws UnsupportedEncodingException if the named charset
* is not supported
*/
InputStream in;
public CRLFTerminatedReader(InputStream in) {
this.in = in;
}
public CRLFTerminatedReader(InputStream in, String enc) throws UnsupportedEncodingException {
this(in);
}
private StringBuffer lineBuffer = new StringBuffer();
private final int
EOF = -1,
CR = 13,
LF = 10;
private int tainted = -1;
/**
* Read a line of text which is terminated by CRLF. The concluding
* CRLF characters are not returned with the String, but if either CR
* or LF appears in the text in any other sequence it is returned
* in the String like any other character. Some characters at the
* end of the stream may be lost if they are in a "line" not
* terminated by CRLF.
*
* @return either a String containing the contents of a
* line which must end with CRLF, or null if the end of the
* stream has been reached, possibly discarding some characters
* in a line not terminated with CRLF.
* @throws IOException if an I/O error occurs.
*/
public String readLine() throws IOException{
//start with the StringBuffer empty
lineBuffer.delete(0, lineBuffer.length());
/* This boolean tells which state we are in,
* depending upon whether or not we got a CR
* in the preceding read().
*/
boolean cr_just_received = false;
// Until we add support for specifying a maximum line lenth as
// a Service Extension, limit lines to 2K, which is twice what
// RFC 2821 4.5.3.1 requires.
while (lineBuffer.length() <= 2048) {
int inChar = read();
if (!cr_just_received){
//the most common case, somewhere before the end of a line
switch (inChar){
case CR : cr_just_received = true;
break;
case EOF : return null; // premature EOF -- discards data(?)
case LF : //the normal ending of a line
if (tainted == -1) tainted = lineBuffer.length();
// intentional fall-through
default : lineBuffer.append((char)inChar);
}
}else{
// CR has been received, we may be at end of line
switch (inChar){
case LF : // LF without a preceding CR
if (tainted != -1) {
int pos = tainted;
tainted = -1;
throw new TerminationException("\"bare\" CR or LF in data stream", pos);
}
return lineBuffer.toString();
case EOF : return null; // premature EOF -- discards data(?)
case CR : //we got two (or more) CRs in a row
if (tainted == -1) tainted = lineBuffer.length();
lineBuffer.append((char)CR);
break;
default : //we got some other character following a CR
if (tainted == -1) tainted = lineBuffer.length();
lineBuffer.append((char)CR);
lineBuffer.append((char)inChar);
cr_just_received = false;
}
}
}//while
throw new LineLengthExceededException("Exceeded maximum line length");
}//method readLine()
public int read() throws IOException {
return in.read();
}
public boolean ready() throws IOException {
return in.available() > 0;
}
public int read(char cbuf[], int off, int len) throws IOException {
byte [] temp = new byte[len];
int result = in.read(temp, 0, len);
for (int i=0;i<result;i++) cbuf[i] = (char) temp[i];
return result;
}
public void close() throws IOException {
in.close();
}
}
Read and return the entire contents of the supplied Reader. This method always closes the reader when finished reading.
/*
* JBoss DNA (http://www.jboss.org/dna)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
*
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
import java.io.IOException;
import java.io.Reader;
/**
* @author Randall Hauch
*/
public class Main {
/**
* Read and return the entire contents of the supplied {@link Reader}. This method always closes the reader when finished
* reading.
*
* @param reader the reader of the contents; may be null
* @return the contents, or an empty string if the supplied reader is null
* @throws IOException if there is an error reading the content
*/
public static String read( Reader reader ) throws IOException {
if (reader == null) return "";
StringBuilder sb = new StringBuilder();
boolean error = false;
try {
int numRead = 0;
char[] buffer = new char[1024];
while ((numRead = reader.read(buffer)) > -1) {
sb.append(buffer, 0, numRead);
}
} catch (IOException e) {
error = true; // this error should be thrown, even if there is an error closing reader
throw e;
} catch (RuntimeException e) {
error = true; // this error should be thrown, even if there is an error closing reader
throw e;
} finally {
try {
reader.close();
} catch (IOException e) {
if (!error) throw e;
}
}
return sb.toString();
}
}
Reader: Reading Text (Characters)
- Reader is an abstract class that represents an input stream for reading characters.
- You use the Reader class to read text (characters, i.e. human readable data).
- The two implementation classes of Reader are InputStreamReader and BufferedReader.
The Reader class has three read method overloads that are similar to the read methods in InputStream:
public int read()
public int read (char[] data)
public int read (char[] data, int offset, int length)
public int read(java.nio.CharBuffer target)
Read from Reader and write to Writer until there is no more input from reader.
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
/**
* Read input from reader and write it to writer until there is no more
* input from reader.
*
* @param reader the reader to read from.
* @param writer the writer to write to.
* @param buf the char array to use as a bufferx
*/
public static void flow( Reader reader, Writer writer, char[] buf )
throws IOException {
int numRead;
while ( (numRead = reader.read(buf) ) >= 0) {
writer.write(buf, 0, numRead);
}
}
}
Reads characters available from the Reader and returns these characters as a String object.
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
*
* Licensed under the Aduna BSD-style license.
*/
import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
/**
* Fully reads the characters available from the supplied Reader
* and returns these characters as a String object.
*
* @param reader The Reader to read the characters from.
* @return A String existing of the characters that were read.
* @throws IOException If I/O error occurred.
*/
public static final String readFully(Reader reader)
throws IOException
{
CharArrayWriter out = new CharArrayWriter(4096);
transfer(reader, out);
out.close();
return out.toString();
}
/**
* Transfers all characters that can be read from <tt>in</tt> to
* <tt>out</tt>.
*
* @param in The Reader to read characters from.
* @param out The Writer to write characters to.
* @return The total number of characters transfered.
*/
public static final long transfer(Reader in, Writer out)
throws IOException
{
long totalChars = 0;
int charsInBuf = 0;
char[] buf = new char[4096];
while ((charsInBuf = in.read(buf)) != -1) {
out.write(buf, 0, charsInBuf);
totalChars += charsInBuf;
}
return totalChars;
}
}
Transfers all characters that can be read from one Reader to another Reader
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
*
* Licensed under the Aduna BSD-style license.
*/
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
/**
* Transfers all characters that can be read from <tt>in</tt> to
* <tt>out</tt>.
*
* @param in The Reader to read characters from.
* @param out The Writer to write characters to.
* @return The total number of characters transfered.
*/
public static final long transfer(Reader in, Writer out)
throws IOException
{
long totalChars = 0;
int charsInBuf = 0;
char[] buf = new char[4096];
while ((charsInBuf = in.read(buf)) != -1) {
out.write(buf, 0, charsInBuf);
totalChars += charsInBuf;
}
return totalChars;
}
}
UTF8 Reader
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UTFDataFormatException;
/**
* @author Andy Clark, IBM
*
* @version $Id: UTF8Reader.java 515 2008-03-17 21:02:23Z jfrederic.clere@jboss.ru $
*/
public class UTF8Reader
extends Reader {
//
// Constants
//
/** Default byte buffer size (2048). */
public static final int DEFAULT_BUFFER_SIZE = 2048;
// debugging
/** Debug read. */
private static final boolean DEBUG_READ = false;
//
// Data
//
/** Input stream. */
protected InputStream fInputStream;
/** Byte buffer. */
protected byte[] fBuffer;
/** Offset into buffer. */
protected int fOffset;
/** Surrogate character. */
private int fSurrogate = -1;
//
// Constructors
//
/**
* Constructs a UTF-8 reader from the specified input stream,
* buffer size and MessageFormatter.
*
* @param inputStream The input stream.
* @param size The initial buffer size.
*/
public UTF8Reader(InputStream inputStream, int size) {
fInputStream = inputStream;
fBuffer = new byte[size];
}
//
// Reader methods
//
/**
* Read a single character. This method will block until a character is
* available, an I/O error occurs, or the end of the stream is reached.
*
* Subclasses that intend to support efficient single-character input
* should override this method.
*
* @return The character read, as an integer in the range 0 to 16383
* (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
* been reached
*
* @exception IOException If an I/O error occurs
*/
public int read() throws IOException {
// decode character
int c = fSurrogate;
if (fSurrogate == -1) {
// NOTE: We use the index into the buffer if there are remaining
// bytes from the last block read. -Ac
int index = 0;
// get first byte
int b0 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b0 == -1) {
return -1;
}
// UTF-8: [0xxx xxxx]
// Unicode: [0000 0000] [0xxx xxxx]
if (b0 < 0x80) {
c = (char)b0;
}
// UTF-8: [110y yyyy] [10xx xxxx]
// Unicode: [0000 0yyy] [yyxx xxxx]
else if ((b0 & 0xE0) == 0xC0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 2);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 2, b1);
}
c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
}
// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
// Unicode: [zzzz yyyy] [yyxx xxxx]
else if ((b0 & 0xF0) == 0xE0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 3);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 3, b1);
}
int b2 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b2 == -1) {
expectedByte(3, 3);
}
if ((b2 & 0xC0) != 0x80) {
invalidByte(3, 3, b2);
}
c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
(b2 & 0x003F);
}
// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
// [1101 11yy] [yyxx xxxx] (low surrogate)
// * uuuuu = wwww + 1
else if ((b0 & 0xF8) == 0xF0) {
int b1 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b1 == -1) {
expectedByte(2, 4);
}
if ((b1 & 0xC0) != 0x80) {
invalidByte(2, 3, b1);
}
int b2 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b2 == -1) {
expectedByte(3, 4);
}
if ((b2 & 0xC0) != 0x80) {
invalidByte(3, 3, b2);
}
int b3 = index == fOffset
? fInputStream.read() : fBuffer[index++] & 0x00FF;
if (b3 == -1) {
expectedByte(4, 4);
}
if ((b3 & 0xC0) != 0x80) {
invalidByte(4, 4, b3);
}
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
if (uuuuu > 0x10) {
invalidSurrogate(uuuuu);
}
int wwww = uuuuu - 1;
int hs = 0xD800 |
((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) |
((b2 >> 4) & 0x0003);
int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
c = hs;
fSurrogate = ls;
}
// error
else {
invalidByte(1, 1, b0);
}
}
// use surrogate
else {
fSurrogate = -1;
}
return c;
} // read():int
/**
* Read characters into a portion of an array. This method will block
* until some input is available, an I/O error occurs, or the end of the
* stream is reached.
*
* @param ch Destination buffer
* @param offset Offset at which to start storing characters
* @param length Maximum number of characters to read
*
* @return The number of characters read, or -1 if the end of the
* stream has been reached
*
* @exception IOException If an I/O error occurs
*/
public int read(char ch[], int offset, int length) throws IOException {
// handle surrogate
int out = offset;
if (fSurrogate != -1) {
ch[offset + 1] = (char)fSurrogate;
fSurrogate = -1;
length--;
out++;
}
// read bytes
int count = 0;
if (fOffset == 0) {
// adjust length to read
if (length > fBuffer.length) {
length = fBuffer.length;
}
// perform read operation
count = fInputStream.read(fBuffer, 0, length);
if (count == -1) {
return -1;
}
count += out - offset;
}
// skip read; last character was in error
// NOTE: Having an offset value other than zero means that there was
// an error in the last character read. In this case, we have
// skipped the read so we don"t consume any bytes past the
// error. By signalling the error on the next block read we
// allow the method to return the most valid characters that
// it can on the previous block read. -Ac
else {
count = fOffset;
fOffset = 0;
}
// convert bytes to characters
final int total = count;
for (int in = 0; in < total; in++) {
int b0 = fBuffer[in] & 0x00FF;
// UTF-8: [0xxx xxxx]
// Unicode: [0000 0000] [0xxx xxxx]
if (b0 < 0x80) {
ch[out++] = (char)b0;
continue;
}
// UTF-8: [110y yyyy] [10xx xxxx]
// Unicode: [0000 0yyy] [yyxx xxxx]
if ((b0 & 0xE0) == 0xC0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 2);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 2, b1);
}
int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
ch[out++] = (char)c;
count -= 1;
continue;
}
// UTF-8: [1110 zzzz] [10yy yyyy] [10xx xxxx]
// Unicode: [zzzz yyyy] [yyxx xxxx]
if ((b0 & 0xF0) == 0xE0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 3);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 3, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 3);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 3, b2);
}
int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
(b2 & 0x003F);
ch[out++] = (char)c;
count -= 2;
continue;
}
// UTF-8: [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
// Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
// [1101 11yy] [yyxx xxxx] (low surrogate)
// * uuuuu = wwww + 1
if ((b0 & 0xF8) == 0xF0) {
int b1 = -1;
if (++in < total) {
b1 = fBuffer[in] & 0x00FF;
}
else {
b1 = fInputStream.read();
if (b1 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
expectedByte(2, 4);
}
count++;
}
if ((b1 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
invalidByte(2, 4, b1);
}
int b2 = -1;
if (++in < total) {
b2 = fBuffer[in] & 0x00FF;
}
else {
b2 = fInputStream.read();
if (b2 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fOffset = 2;
return out - offset;
}
expectedByte(3, 4);
}
count++;
}
if ((b2 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
invalidByte(3, 4, b2);
}
int b3 = -1;
if (++in < total) {
b3 = fBuffer[in] & 0x00FF;
}
else {
b3 = fInputStream.read();
if (b3 == -1) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fOffset = 3;
return out - offset;
}
expectedByte(4, 4);
}
count++;
}
if ((b3 & 0xC0) != 0x80) {
if (out > offset) {
fBuffer[0] = (byte)b0;
fBuffer[1] = (byte)b1;
fBuffer[2] = (byte)b2;
fBuffer[3] = (byte)b3;
fOffset = 4;
return out - offset;
}
invalidByte(4, 4, b2);
}
// decode bytes into surrogate characters
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
if (uuuuu > 0x10) {
invalidSurrogate(uuuuu);
}
int wwww = uuuuu - 1;
int zzzz = b1 & 0x000F;
int yyyyyy = b2 & 0x003F;
int xxxxxx = b3 & 0x003F;
int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
// set characters
ch[out++] = (char)hs;
ch[out++] = (char)ls;
count -= 2;
continue;
}
// error
if (out > offset) {
fBuffer[0] = (byte)b0;
fOffset = 1;
return out - offset;
}
invalidByte(1, 1, b0);
}
return count;
} // read(char[],int,int)
/**
* Skip characters. This method will block until some characters are
* available, an I/O error occurs, or the end of the stream is reached.
*
* @param n The number of characters to skip
*
* @return The number of characters actually skipped
*
* @exception IOException If an I/O error occurs
*/
public long skip(long n) throws IOException {
long remaining = n;
final char[] ch = new char[fBuffer.length];
do {
int length = ch.length < remaining ? ch.length : (int)remaining;
int count = read(ch, 0, length);
if (count > 0) {
remaining -= count;
}
else {
break;
}
} while (remaining > 0);
long skipped = n - remaining;
return skipped;
} // skip(long):long
/**
* Tell whether this stream is ready to be read.
*
* @return True if the next read() is guaranteed not to block for input,
* false otherwise. Note that returning false does not guarantee that the
* next read will block.
*
* @exception IOException If an I/O error occurs
*/
public boolean ready() throws IOException {
return false;
} // ready()
/**
* Tell whether this stream supports the mark() operation.
*/
public boolean markSupported() {
return false;
} // markSupported()
/**
* Mark the present position in the stream. Subsequent calls to reset()
* will attempt to reposition the stream to this point. Not all
* character-input streams support the mark() operation.
*
* @param readAheadLimit Limit on the number of characters that may be
* read while still preserving the mark. After
* reading this many characters, attempting to
* reset the stream may fail.
*
* @exception IOException If the stream does not support mark(),
* or if some other I/O error occurs
*/
public void mark(int readAheadLimit) throws IOException {
throw new IOException("operationNotSupported");
}
/**
* Reset the stream. If the stream has been marked, then attempt to
* reposition it at the mark. If the stream has not been marked, then
* attempt to reset it in some way appropriate to the particular stream,
* for example by repositioning it to its starting point. Not all
* character-input streams support the reset() operation, and some support
* reset() without supporting mark().
*
* @exception IOException If the stream has not been marked,
* or if the mark has been invalidated,
* or if the stream does not support reset(),
* or if some other I/O error occurs
*/
public void reset() throws IOException {
fOffset = 0;
fSurrogate = -1;
} // reset()
/**
* Close the stream. Once a stream has been closed, further read(),
* ready(), mark(), or reset() invocations will throw an IOException.
* Closing a previously-closed stream, however, has no effect.
*
* @exception IOException If an I/O error occurs
*/
public void close() throws IOException {
fInputStream.close();
} // close()
//
// Private methods
//
/** Throws an exception for expected byte. */
private void expectedByte(int position, int count)
throws UTFDataFormatException {
throw new UTFDataFormatException("expectedByte");
} // expectedByte(int,int,int)
/** Throws an exception for invalid byte. */
private void invalidByte(int position, int count, int c)
throws UTFDataFormatException {
throw new UTFDataFormatException("invalidByte");
} // invalidByte(int,int,int,int)
/** Throws an exception for invalid surrogate bits. */
private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
throw new UTFDataFormatException("invalidHighSurrogate");
} // invalidSurrogate(int)
} // class UTF8Reader
Writes all characters from a Reader to a file using the default character encoding.
/*
* Copyright Aduna (http://www.aduna-software.ru/) (c) 1997-2006.
*
* Licensed under the Aduna BSD-style license.
*/
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
public class Main {
/**
* Writes all characters from a <tt>Reader</tt> to a file using the default
* character encoding.
*
* @param reader The <tt>Reader</tt> containing the data to write to the
* file.
* @param file The file to write the data to.
* @return The total number of characters written.
* @throws IOException If an I/O error occured while trying to write the
* data to the file.
* @see java.io.FileWriter
*/
public static final long writeToFile(Reader reader, File file)
throws IOException
{
FileWriter writer = new FileWriter(file);
try {
return transfer(reader, writer);
}
finally {
writer.close();
}
}
/**
* Transfers all characters that can be read from <tt>in</tt> to
* <tt>out</tt>.
*
* @param in The Reader to read characters from.
* @param out The Writer to write characters to.
* @return The total number of characters transfered.
*/
public static final long transfer(Reader in, Writer out)
throws IOException
{
long totalChars = 0;
int charsInBuf = 0;
char[] buf = new char[4096];
while ((charsInBuf = in.read(buf)) != -1) {
out.write(buf, 0, charsInBuf);
totalChars += charsInBuf;
}
return totalChars;
}
}