Java/File Input Output/Char Reader Writer — различия между версиями

Материал из Java эксперт
Перейти к: навигация, поиск
 
м (1 версия)
 
(нет различий)

Текущая версия на 06:02, 1 июня 2010

Char Array IO

  
import java.io.CharArrayReader;
import java.io.CharArrayWriter;
import java.io.IOException;
public class CharArrayIOApp {
  public static void main(String args[]) throws IOException {
    CharArrayWriter outStream = new CharArrayWriter();
    String s = "This is a test.";
    for (int i = 0; i < s.length(); ++i)
      outStream.write(s.charAt(i));
    System.out.println("outstream: " + outStream);
    System.out.println("size: " + outStream.size());
    CharArrayReader inStream;
    inStream = new CharArrayReader(outStream.toCharArray());
    int ch = 0;
    StringBuffer sb = new StringBuffer("");
    while ((ch = inStream.read()) != -1)
      sb.append((char) ch);
    s = sb.toString();
    System.out.println(s.length() + " characters were read");
    System.out.println("They are: " + s);
  }
}





Demonstrate CharArrayWriter.

  
import java.io.CharArrayWriter;
import java.io.FileWriter;
import java.io.IOException;
class CharArrayWriterDemo {
  public static void main(String args[]) throws IOException {
    CharArrayWriter f = new CharArrayWriter();
    String s = "This should end up in the array";
    char buf[] = new char[s.length()];
    s.getChars(0, s.length(), buf, 0);
    f.write(buf);
    System.out.println(f.toString());
    char c[] = f.toCharArray();
    for (int i = 0; i < c.length; i++) {
      System.out.print(c[i]);
    }
    FileWriter f2 = new FileWriter("test.txt");
    f.writeTo(f2);
    f2.close();
    f.reset();
    for (int i = 0; i < 3; i++)
      f.write("X");
  }
}





Safe UTF: 64K serialized size

 
/*
  * JBoss, Home of Professional Open Source
  * Copyright 2005, JBoss Inc., and individual contributors as indicated
  * by the @authors tag. See the copyright.txt in the distribution for a
  * full listing of individual contributors.
  *
  * This is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
  * This software is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this software; if not, write to the Free
  * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
  */
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
/**
 * 
 * A SafeUTF
 * 
 * @author 
 * @version $Revision: 1174 $
 *
 * $Id: SafeUTF.java 1174 2006-08-02 14:14:32Z timfox $
 * 
 * There is a "bug" in JDK1.4 / 1.5 DataOutputStream.writeUTF()
 * which means it does not work with Strings >= 64K serialized size.
 * See http://bugs.sun.ru/bugdatabase/view_bug.do?bug_id=4806007
 * 
 * We work around this by chunking larger strings into smaller pieces.
 * 
 * Note we only support TextMessage and ObjectMessage bodies with serialized length >= 64K
 * We DO NOT support Strings written to BytesMessages or StreamMessages or written as keys or values
 * in MapMessages, or as String properties or other String fields having serialized length >= 64K
 * This is for performance reasons since there is an overhead in coping with large
 * Strings
 * 
 */
public class SafeUTF
{      
   //Default is 16K chunks
   private static final int CHUNK_SIZE = 16 * 1024;
   
   private static final byte NULL = 0;
   
   private static final byte NOT_NULL = 1;
   
   public static SafeUTF instance = new SafeUTF(CHUNK_SIZE);
   
   private int chunkSize;
   
   private int lastReadBufferSize;
   
   public int getLastReadBufferSize()
   {
      return lastReadBufferSize;
   }
   
   public SafeUTF(int chunkSize)
   {
      this.chunkSize = chunkSize;
   }
      
   public void safeWriteUTF(DataOutputStream out, String str) throws IOException
   {        
      if (str == null)
      {
         out.writeByte(NULL);
      }
      else
      {         
         int len = str.length();
          
         short numChunks;
         
         if (len == 0)
         {
            numChunks = 0;
         }
         else
         {
            numChunks = (short)(((len - 1) / chunkSize) + 1);
         }         
         
         out.writeByte(NOT_NULL);
         
         out.writeShort(numChunks);
              
         int i = 0;
         while (len > 0)
         {
            int beginCopy = i * chunkSize;
            
            int endCopy = len <= chunkSize ? beginCopy + len : beginCopy + chunkSize;
     
            String theChunk = str.substring(beginCopy, endCopy);
               
            out.writeUTF(theChunk);
            
            len -= chunkSize;
            
            i++;
         }
      }
   }
   
   public String safeReadUTF(DataInputStream in) throws IOException
   {   
      boolean isNull = in.readByte() == NULL;
      
      if (isNull)
      {
         return null;
      }
      
      short numChunks = in.readShort();
      
      int bufferSize = chunkSize * numChunks;
      
      // special handling for single chunk
      if (numChunks == 1)
      {
         // The text size is likely to be much smaller than the chunkSize
         // so set bufferSize to the min of the input stream available
         // and the maximum buffer size. Since the input stream
         // available() can be <= 0 we check for that and default to
         // a small msg size of 256 bytes.
         
         int inSize = in.available();
               
         if (inSize <= 0)
         {
            inSize = 256;
         }
         bufferSize = Math.min(inSize, bufferSize);
         
         lastReadBufferSize = bufferSize;
      }
        
      StringBuffer buff = new StringBuffer(bufferSize);
            
      for (int i = 0; i < numChunks; i++)
      {
         String s = in.readUTF();
         buff.append(s);
      }
      
      return buff.toString();
   }
      
}





uses a pair of CharArrayReaders

  
import java.io.CharArrayReader;
import java.io.IOException;
public class CharArrayReaderDemo {
  public static void main(String args[]) throws IOException {
    String tmp = "abcdefghijklmnopqrstuvwxyz";
    int length = tmp.length();
    char c[] = new char[length];
    tmp.getChars(0, length, c, 0);
    CharArrayReader input1 = new CharArrayReader(c);
    CharArrayReader input2 = new CharArrayReader(c, 0, 5);
    int i;
    while ((i = input1.read()) != -1) {
      System.out.print((char) i);
    }
    while ((i = input2.read()) != -1) {
      System.out.print((char) i);
    }
  }
}





UTF8 Reader

 
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.UTFDataFormatException;

/**
 * @author Andy Clark, IBM
 *
 * @version $Id: UTF8Reader.java 515 2008-03-17 21:02:23Z jfrederic.clere@jboss.ru $
 */
public class UTF8Reader
    extends Reader {
    
    //
    // Constants
    //
    /** Default byte buffer size (2048). */
    public static final int DEFAULT_BUFFER_SIZE = 2048;
    // debugging
    /** Debug read. */
    private static final boolean DEBUG_READ = false;
    //
    // Data
    //
    /** Input stream. */
    protected InputStream fInputStream;
    /** Byte buffer. */
    protected byte[] fBuffer;
    /** Offset into buffer. */
    protected int fOffset;
    /** Surrogate character. */
    private int fSurrogate = -1;
    //
    // Constructors
    //
    /** 
     * Constructs a UTF-8 reader from the specified input stream, 
     * buffer size and MessageFormatter.
     *
     * @param inputStream The input stream.
     * @param size        The initial buffer size.
     */
    public UTF8Reader(InputStream inputStream, int size) {
        fInputStream = inputStream;
        fBuffer = new byte[size];
    }
    //
    // Reader methods
    //
    /**
     * Read a single character.  This method will block until a character is
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * <p> Subclasses that intend to support efficient single-character input
     * should override this method.
     *
     * @return     The character read, as an integer in the range 0 to 16383
     *             (<tt>0x00-0xffff</tt>), or -1 if the end of the stream has
     *             been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    public int read() throws IOException {
        // decode character
        int c = fSurrogate;
        if (fSurrogate == -1) {
            // NOTE: We use the index into the buffer if there are remaining
            //       bytes from the last block read. -Ac
            int index = 0;
            // get first byte
            int b0 = index == fOffset 
                   ? fInputStream.read() : fBuffer[index++] & 0x00FF;
            if (b0 == -1) {
                return -1;
            }
            // UTF-8:   [0xxx xxxx]
            // Unicode: [0000 0000] [0xxx xxxx]
            if (b0 < 0x80) {
                c = (char)b0;
            }
            // UTF-8:   [110y yyyy] [10xx xxxx]
            // Unicode: [0000 0yyy] [yyxx xxxx]
            else if ((b0 & 0xE0) == 0xC0) {
                int b1 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 2);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 2, b1);
                }
                c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
            }
            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
            // Unicode: [zzzz yyyy] [yyxx xxxx]
            else if ((b0 & 0xF0) == 0xE0) {
                int b1 = index == fOffset
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 3);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 3, b1);
                }
                int b2 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b2 == -1) {
                    expectedByte(3, 3);
                }
                if ((b2 & 0xC0) != 0x80) {
                    invalidByte(3, 3, b2);
                }
                c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                    (b2 & 0x003F);
            }
            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
            //          [1101 11yy] [yyxx xxxx] (low surrogate)
            //          * uuuuu = wwww + 1
            else if ((b0 & 0xF8) == 0xF0) {
                int b1 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b1 == -1) {
                    expectedByte(2, 4);
                }
                if ((b1 & 0xC0) != 0x80) {
                    invalidByte(2, 3, b1);
                }
                int b2 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b2 == -1) {
                    expectedByte(3, 4);
                }
                if ((b2 & 0xC0) != 0x80) {
                    invalidByte(3, 3, b2);
                }
                int b3 = index == fOffset 
                       ? fInputStream.read() : fBuffer[index++] & 0x00FF;
                if (b3 == -1) {
                    expectedByte(4, 4);
                }
                if ((b3 & 0xC0) != 0x80) {
                    invalidByte(4, 4, b3);
                }
                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                if (uuuuu > 0x10) {
                    invalidSurrogate(uuuuu);
                }
                int wwww = uuuuu - 1;
                int hs = 0xD800 | 
                         ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | 
                         ((b2 >> 4) & 0x0003);
                int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F);
                c = hs;
                fSurrogate = ls;
            }
            // error
            else {
                invalidByte(1, 1, b0);
            }
        }
        // use surrogate
        else {
            fSurrogate = -1;
        }
        return c;
    } // read():int
    /**
     * Read characters into a portion of an array.  This method will block
     * until some input is available, an I/O error occurs, or the end of the
     * stream is reached.
     *
     * @param      ch     Destination buffer
     * @param      offset Offset at which to start storing characters
     * @param      length Maximum number of characters to read
     *
     * @return     The number of characters read, or -1 if the end of the
     *             stream has been reached
     *
     * @exception  IOException  If an I/O error occurs
     */
    public int read(char ch[], int offset, int length) throws IOException {
        // handle surrogate
        int out = offset;
        if (fSurrogate != -1) {
            ch[offset + 1] = (char)fSurrogate;
            fSurrogate = -1;
            length--;
            out++;
        }
        // read bytes
        int count = 0;
        if (fOffset == 0) {
            // adjust length to read
            if (length > fBuffer.length) {
                length = fBuffer.length;
            }
            // perform read operation
            count = fInputStream.read(fBuffer, 0, length);
            if (count == -1) {
                return -1;
            }
            count += out - offset;
        }
        // skip read; last character was in error
        // NOTE: Having an offset value other than zero means that there was
        //       an error in the last character read. In this case, we have
        //       skipped the read so we don"t consume any bytes past the 
        //       error. By signalling the error on the next block read we
        //       allow the method to return the most valid characters that
        //       it can on the previous block read. -Ac
        else {
            count = fOffset;
            fOffset = 0;
        }
        // convert bytes to characters
        final int total = count;
        for (int in = 0; in < total; in++) {
            int b0 = fBuffer[in] & 0x00FF;
            // UTF-8:   [0xxx xxxx]
            // Unicode: [0000 0000] [0xxx xxxx]
            if (b0 < 0x80) {
                ch[out++] = (char)b0;
                continue;
            }
            // UTF-8:   [110y yyyy] [10xx xxxx]
            // Unicode: [0000 0yyy] [yyxx xxxx]
            if ((b0 & 0xE0) == 0xC0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 2);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 2, b1);
                }
                int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F);
                ch[out++] = (char)c;
                count -= 1;
                continue;
            }
            // UTF-8:   [1110 zzzz] [10yy yyyy] [10xx xxxx]
            // Unicode: [zzzz yyyy] [yyxx xxxx]
            if ((b0 & 0xF0) == 0xE0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 3);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 3, b1);
                }
                int b2 = -1;
                if (++in < total) { 
                    b2 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b2 = fInputStream.read();
                    if (b2 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fOffset = 2;
                            return out - offset;
                        }
                        expectedByte(3, 3);
                    }
                    count++;
                }
                if ((b2 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fOffset = 3;
                        return out - offset;
                    }
                    invalidByte(3, 3, b2);
                }
                int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) |
                        (b2 & 0x003F);
                ch[out++] = (char)c;
                count -= 2;
                continue;
            }
            // UTF-8:   [1111 0uuu] [10uu zzzz] [10yy yyyy] [10xx xxxx]*
            // Unicode: [1101 10ww] [wwzz zzyy] (high surrogate)
            //          [1101 11yy] [yyxx xxxx] (low surrogate)
            //          * uuuuu = wwww + 1
            if ((b0 & 0xF8) == 0xF0) {
                int b1 = -1;
                if (++in < total) { 
                    b1 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b1 = fInputStream.read();
                    if (b1 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fOffset = 1;
                            return out - offset;
                        }
                        expectedByte(2, 4);
                    }
                    count++;
                }
                if ((b1 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fOffset = 2;
                        return out - offset;
                    }
                    invalidByte(2, 4, b1);
                }
                int b2 = -1;
                if (++in < total) { 
                    b2 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b2 = fInputStream.read();
                    if (b2 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fOffset = 2;
                            return out - offset;
                        }
                        expectedByte(3, 4);
                    }
                    count++;
                }
                if ((b2 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fOffset = 3;
                        return out - offset;
                    }
                    invalidByte(3, 4, b2);
                }
                int b3 = -1;
                if (++in < total) { 
                    b3 = fBuffer[in] & 0x00FF; 
                }
                else {
                    b3 = fInputStream.read();
                    if (b3 == -1) {
                        if (out > offset) {
                            fBuffer[0] = (byte)b0;
                            fBuffer[1] = (byte)b1;
                            fBuffer[2] = (byte)b2;
                            fOffset = 3;
                            return out - offset;
                        }
                        expectedByte(4, 4);
                    }
                    count++;
                }
                if ((b3 & 0xC0) != 0x80) {
                    if (out > offset) {
                        fBuffer[0] = (byte)b0;
                        fBuffer[1] = (byte)b1;
                        fBuffer[2] = (byte)b2;
                        fBuffer[3] = (byte)b3;
                        fOffset = 4;
                        return out - offset;
                    }
                    invalidByte(4, 4, b2);
                }
                // decode bytes into surrogate characters
                int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
                if (uuuuu > 0x10) {
                    invalidSurrogate(uuuuu);
                }
                int wwww = uuuuu - 1;
                int zzzz = b1 & 0x000F;
                int yyyyyy = b2 & 0x003F;
                int xxxxxx = b3 & 0x003F;
                int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4);
                int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx;
                // set characters
                ch[out++] = (char)hs;
                ch[out++] = (char)ls;
                count -= 2;
                continue;
            }
            // error
            if (out > offset) {
                fBuffer[0] = (byte)b0;
                fOffset = 1;
                return out - offset;
            }
            invalidByte(1, 1, b0);
        }
        return count;
    } // read(char[],int,int)
    /**
     * Skip characters.  This method will block until some characters are
     * available, an I/O error occurs, or the end of the stream is reached.
     *
     * @param  n  The number of characters to skip
     *
     * @return    The number of characters actually skipped
     *
     * @exception  IOException  If an I/O error occurs
     */
    public long skip(long n) throws IOException {
        long remaining = n;
        final char[] ch = new char[fBuffer.length];
        do {
            int length = ch.length < remaining ? ch.length : (int)remaining;
            int count = read(ch, 0, length);
            if (count > 0) {
                remaining -= count;
            }
            else {
                break;
            }
        } while (remaining > 0);
        long skipped = n - remaining;
        return skipped;
    } // skip(long):long
    /**
     * Tell whether this stream is ready to be read.
     *
     * @return True if the next read() is guaranteed not to block for input,
     * false otherwise.  Note that returning false does not guarantee that the
     * next read will block.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public boolean ready() throws IOException {
      return false;
    } // ready()
    /**
     * Tell whether this stream supports the mark() operation.
     */
    public boolean markSupported() {
      return false;
    } // markSupported()
    /**
     * Mark the present position in the stream.  Subsequent calls to reset()
     * will attempt to reposition the stream to this point.  Not all
     * character-input streams support the mark() operation.
     *
     * @param  readAheadLimit  Limit on the number of characters that may be
     *                         read while still preserving the mark.  After
     *                         reading this many characters, attempting to
     *                         reset the stream may fail.
     *
     * @exception  IOException  If the stream does not support mark(),
     *                          or if some other I/O error occurs
     */
    public void mark(int readAheadLimit) throws IOException {
  throw new IOException("operationNotSupported");
    }
    /**
     * Reset the stream.  If the stream has been marked, then attempt to
     * reposition it at the mark.  If the stream has not been marked, then
     * attempt to reset it in some way appropriate to the particular stream,
     * for example by repositioning it to its starting point.  Not all
     * character-input streams support the reset() operation, and some support
     * reset() without supporting mark().
     *
     * @exception  IOException  If the stream has not been marked,
     *                          or if the mark has been invalidated,
     *                          or if the stream does not support reset(),
     *                          or if some other I/O error occurs
     */
    public void reset() throws IOException {
        fOffset = 0;
        fSurrogate = -1;
    } // reset()
    /**
     * Close the stream.  Once a stream has been closed, further read(),
     * ready(), mark(), or reset() invocations will throw an IOException.
     * Closing a previously-closed stream, however, has no effect.
     *
     * @exception  IOException  If an I/O error occurs
     */
    public void close() throws IOException {
        fInputStream.close();
    } // close()
    //
    // Private methods
    //
    /** Throws an exception for expected byte. */
    private void expectedByte(int position, int count)
        throws UTFDataFormatException {
        throw new UTFDataFormatException("expectedByte");
    } // expectedByte(int,int,int)
    /** Throws an exception for invalid byte. */
    private void invalidByte(int position, int count, int c) 
        throws UTFDataFormatException {
        throw new UTFDataFormatException("invalidByte");
    } // invalidByte(int,int,int,int)
    /** Throws an exception for invalid surrogate bits. */
    private void invalidSurrogate(int uuuuu) throws UTFDataFormatException {
        
        throw new UTFDataFormatException("invalidHighSurrogate");
    } // invalidSurrogate(int)
} // class UTF8Reader