Java/Development Class/UTF8 Byte Hex

Содержание

1 Convert bytes To Hex
2 Convert file in SJIS to UTF8
3 Convert from Unicode to UTF-8
4 Convert from UTF-8 to Unicode
5 Convert hex To Bytes
6 converting between byte arrays and hex encoded strings
7 Decodes values of attributes in the DN encoded in hex into a UTF-8 String.
8 Encodes octects (using utf-8) into Hex data
9 Make bytes
10 Normalizer
11 Read Windows Notepad Unicode files
12 Return an UTF-8 encoded String
13 Return an UTF-8 encoded String by length
14 Return UTF-8 encoded byte[] representation of a String
15 Show unicode string
16 String converter
17 Unicode 2 ASCII
18 UTF8 String utilities
19 Utility methods for handling UTF-8 encoded byte streams.

Convert bytes To Hex

     
public class HexCodec {
  private static final char[] kDigits = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a",
      "b", "c", "d", "e", "f" };
  public static char[] bytesToHex(byte[] raw) {
    int length = raw.length;
    char[] hex = new char[length * 2];
    for (int i = 0; i < length; i++) {
      int value = (raw[i] + 256) % 256;
      int highIndex = value >> 4;
      int lowIndex = value & 0x0f;
      hex[i * 2 + 0] = kDigits[highIndex];
      hex[i * 2 + 1] = kDigits[lowIndex];
    }
    return hex;
  }

}

Convert file in SJIS to UTF8

     
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
public class Converter {
  public static void main(String args[]) throws Exception {
    FileInputStream fis = new FileInputStream(new File("input.txt"));
    BufferedReader in = new BufferedReader(new InputStreamReader(fis, "SJIS"));
    FileOutputStream fos = new FileOutputStream(new File("output.txt"));
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fos, "UTF8"));
    int len = 80;
    char buf[] = new char[len];
    int numRead;
    while ((numRead = in.read(buf, 0, len)) != -1)
      out.write(buf, 0, numRead);
    out.close();
    in.close();
  }
}

Convert from Unicode to UTF-8

    
public class Main {
  public static void main(String[] argv) throws Exception {
    
    String string = "abc\u5639\u563b";
    byte[] utf8 = string.getBytes("UTF-8");
  }
}

Convert from UTF-8 to Unicode

    

public class Main {
  public static void main(String[] argv) throws Exception {
    String string = "abc\u5639";
    byte[] utf8 = string.getBytes("UTF-8");
    
    string = new String(utf8, "UTF-8");
    System.out.println(string);
  }
}

Convert hex To Bytes

     
public class HexCodec {
  private static final char[] kDigits = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a",
      "b", "c", "d", "e", "f" };
  public static byte[] hexToBytes(char[] hex) {
    int length = hex.length / 2;
    byte[] raw = new byte[length];
    for (int i = 0; i < length; i++) {
      int high = Character.digit(hex[i * 2], 16);
      int low = Character.digit(hex[i * 2 + 1], 16);
      int value = (high << 4) | low;
      if (value > 127)
        value -= 256;
      raw[i] = (byte) value;
    }
    return raw;
  }
  public static byte[] hexToBytes(String hex) {
    return hexToBytes(hex.toCharArray());
  }
}

converting between byte arrays and hex encoded strings

     
public class HexCodec {
  private static final char[] kDigits = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a",
      "b", "c", "d", "e", "f" };
  public static char[] bytesToHex(byte[] raw) {
    int length = raw.length;
    char[] hex = new char[length * 2];
    for (int i = 0; i < length; i++) {
      int value = (raw[i] + 256) % 256;
      int highIndex = value >> 4;
      int lowIndex = value & 0x0f;
      hex[i * 2 + 0] = kDigits[highIndex];
      hex[i * 2 + 1] = kDigits[lowIndex];
    }
    return hex;
  }
  public static byte[] hexToBytes(char[] hex) {
    int length = hex.length / 2;
    byte[] raw = new byte[length];
    for (int i = 0; i < length; i++) {
      int high = Character.digit(hex[i * 2], 16);
      int low = Character.digit(hex[i * 2 + 1], 16);
      int value = (high << 4) | low;
      if (value > 127)
        value -= 256;
      raw[i] = (byte) value;
    }
    return raw;
  }
  public static byte[] hexToBytes(String hex) {
    return hexToBytes(hex.toCharArray());
  }
}

Decodes values of attributes in the DN encoded in hex into a UTF-8 String.

   
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.naming.InvalidNameException;
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *  
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License. 
 *  
 */

/**
 * Various string manipulation methods that are more efficient then chaining
 * string operations: all is done in the same buffer without creating a bunch of
 * string objects.
 * 
 * @author 
 */
public class Main {
  /** &lt;hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */
  private static final byte[] HEX_VALUE =
      { 
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 -> 0F
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 -> 1F
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 -> 2F
           0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30 -> 3F ( 0, 1,2, 3, 4,5, 6, 7, 8, 9 )
          -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40 -> 4F ( A, B, C, D, E, F )
          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50 -> 5F
          -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1  // 60 -> 6F ( a, b, c, d, e, f )
      };
  /**
   * Decodes values of attributes in the DN encoded in hex into a UTF-8 
   * String.  RFC2253 allows a DN"s attribute to be encoded in hex.
   * The encoded value starts with a # then is followed by an even 
   * number of hex characters.  
   */
  public static final String decodeHexString( String str ) throws InvalidNameException
  {
      if ( str == null || str.length() == 0 )
      {
          throw new InvalidNameException( "Expected string to start with a "#" character.  " +
              "Invalid hex encoded string for empty or null string."  );
      }
      
      char[] chars = str.toCharArray();
      if ( chars[0] != "#" )
      {
          throw new InvalidNameException( "Expected string to start with a "#" character.  " +
                  "Invalid hex encoded string: " + str  );
      }
      
      // the bytes representing the encoded string of hex
      // this should be ( length - 1 )/2 in size
      byte[] decoded = new byte[ ( chars.length - 1 ) >> 1 ];
      for ( int ii = 1, jj = 0 ; ii < chars.length; ii+=2, jj++ )
      {
          int ch = ( HEX_VALUE[chars[ii]] << 4 ) + 
              HEX_VALUE[chars[ii+1]];
          decoded[jj] = ( byte ) ch;
      }
      
      return utf8ToString( decoded );
  }
  /**
   * Return an UTF-8 encoded String
   * 
   * @param bytes
   *            The byte array to be transformed to a String
   * @return A String.
   */
  public static final String utf8ToString( byte[] bytes )
  {
      if ( bytes == null )
      {
          return "";
      }
      try
      {
          return new String( bytes, "UTF-8" );
      }
      catch ( UnsupportedEncodingException uee )
      {
          return "";
      }
  }
}

Encodes octects (using utf-8) into Hex data

   
/*   Copyright 2004 The Apache Software Foundation
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *  limitations under the License.
 */
//xmlbeans
import java.io.UnsupportedEncodingException;
/**
 * format validation
 *
 * This class encodes/decodes hexadecimal data
 * @author Jeffrey Rodriguez
 * @version $Id: HexBin.java 125124 2005-01-14 00:23:54Z kkrouse $
 */
public class Main {
  static private final int  BASELENGTH   = 255;
  static private final int  LOOKUPLENGTH = 16;
  static private byte [] hexNumberTable    = new byte[BASELENGTH];
  static private byte [] lookUpHexAlphabet = new byte[LOOKUPLENGTH];

  static {
      for (int i = 0; i<BASELENGTH; i++ ) {
          hexNumberTable[i] = -1;
      }
      for ( int i = "9"; i >= "0"; i--) {
          hexNumberTable[i] = (byte) (i-"0");
      }
      for ( int i = "F"; i>= "A"; i--) {
          hexNumberTable[i] = (byte) ( i-"A" + 10 );
      }
      for ( int i = "f"; i>= "a"; i--) {
         hexNumberTable[i] = (byte) ( i-"a" + 10 );
      }
      for(int i = 0; i<10; i++ )
          lookUpHexAlphabet[i] = (byte) ("0"+i );
      for(int i = 10; i<=15; i++ )
          lookUpHexAlphabet[i] = (byte) ("A"+i -10);
  }
  /**
   * Encodes octects (using utf-8) into Hex data
   *
   * @param binaryData String containing Hex data
   * @return string containing decoded data.
   */
  public static String encode(String binaryData) {
      if (binaryData == null)
          return null;
      byte[] encoded = null;
      try {
        encoded = encode(binaryData.getBytes("utf-8"));
      }
      catch(UnsupportedEncodingException e) {}
      return encoded == null ? null : new String(encoded);
  }
  /**
   * array of byte to encode
   *
   * @param binaryData
   * @return return encode binary array
   */
  static public byte[] encode(byte[] binaryData) {
      if (binaryData == null)
          return null;
      int lengthData   = binaryData.length;
      int lengthEncode = lengthData * 2;
      byte[] encodedData = new byte[lengthEncode];
      for( int i = 0; i<lengthData; i++ ){
          encodedData[i*2] = lookUpHexAlphabet[(binaryData[i] >> 4) & 0xf];
          encodedData[i*2+1] = lookUpHexAlphabet[ binaryData[i] & 0xf];
      }
      return encodedData;
  }
}

Make bytes

     
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
public class MainClass {
  public static byte[] makeBytes(long t, double q) {
    try {
      ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
      DataOutputStream dataOut = new DataOutputStream(byteOut);
      dataOut.writeLong(t);
      dataOut.writeDouble(q);
      return byteOut.toByteArray();
    } catch (IOException e) {
      return new byte[0];
    }
  }
}

Normalizer

    
/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.applet.Applet;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Font;
import java.awt.Graphics;
import java.awt.Graphics2D;
import java.awt.Label;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.Iterator;
import javax.swing.BoxLayout;
import javax.swing.JComboBox;
import javax.swing.JComponent;
import javax.swing.JFrame;
import javax.swing.JPanel;
public class NormSample extends Applet {
  static final long serialVersionUID = 6607883013849198961L;
  JComboBox normalizationTemplate;
  JComboBox formComboBox;
  JComponent paintingComponent;
  HashMap<String, Normalizer.Form> formValues = new HashMap<String, Normalizer.Form>();
  HashMap<String, String> templateValues = new HashMap<String, String>();
  public static void main(String[] args) {
    // creating an applete for normalization
    JFrame f = new JFrame("Normalizer"s API");
    f.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    NormSample applet = new NormSample();
    applet.init();
    f.add("Center", applet);
    f.pack();
    f.setVisible(true);
  }
  public void init() {
    // preparing values for the normalization forms ComboBox
    formValues.put("NFC", Normalizer.Form.NFC);
    formValues.put("NFD", Normalizer.Form.NFD);
    formValues.put("NFKC", Normalizer.Form.NFKC);
    formValues.put("NFKD", Normalizer.Form.NFKD);
    formComboBox = new JComboBox();
    for (Iterator it = formValues.keySet().iterator(); it.hasNext();) {
      formComboBox.addItem((String) it.next());
    }
    // preparing samples for normalization
    // text with the acute accent symbol
    templateValues.put("acute accent", "touch" + "\u00e9");
    // text with ligature
    templateValues.put("ligature", "a" + "\ufb03" + "ance");
    // text with the cedilla
    templateValues.put("cedilla", "fa" + "\u00e7" + "ade");
    // text with half-width katakana
    templateValues.put("half-width katakana",
        "\uff81\uff6e\uff7a\uff9a\uff70\uff84");
    normalizationTemplate = new JComboBox();
    for (Iterator it = templateValues.keySet().iterator(); it.hasNext();) {
      normalizationTemplate.addItem((String) it.next());
    }
    // defining a component to output normalization results
    paintingComponent = new JComponent() {
      static final long serialVersionUID = -3725620407788489160L;
      public Dimension getSize() {
        return new Dimension(550, 200);
      }
      public Dimension getPreferredSize() {
        return new Dimension(550, 200);
      }
      public Dimension getMinimumSize() {
        return new Dimension(550, 200);
      }
      public void paint(Graphics g) {
        Graphics2D g2 = (Graphics2D) g;
        g2.setFont(new Font("Serif", Font.PLAIN, 20));
        g2.setColor(Color.BLACK);
        g2.drawString("Original string:", 100, 80);
        g2.drawString("Normalized string:", 100, 120);
        g2.setFont(new Font("Serif", Font.BOLD, 24));
        // output of the original sample selected from the ComboBox
        String original_string = templateValues.get(normalizationTemplate
            .getSelectedItem());
        g2.drawString(original_string, 320, 80);
        // normalization and output of the normalized string
        String normalized_string;
        java.text.Normalizer.Form currentForm = formValues.get(formComboBox
            .getSelectedItem());
        normalized_string = Normalizer.normalize(original_string, currentForm);
        g2.drawString(normalized_string, 320, 120);
      }
    };
    setLayout(new BoxLayout(this, BoxLayout.Y_AXIS));
    add(paintingComponent);
    JPanel controls = new JPanel();
    controls.setLayout(new BoxLayout(controls, BoxLayout.X_AXIS));
    controls.add(new Label("Normalization Form: "));
    controls.add(formComboBox);
    controls.add(new Label("Normalization Template:"));
    controls.add(normalizationTemplate);
    add(controls);
    formComboBox.addActionListener(new ActionListener() {
      public void actionPerformed(ActionEvent e) {
        paintingComponent.repaint();
      }
    });
    normalizationTemplate.addActionListener(new ActionListener() {
      public void actionPerformed(ActionEvent e) {
        paintingComponent.repaint();
      }
    });
  }
}

Read Windows Notepad Unicode files

 
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
/**
 * This is an input stream that is unicode BOM aware. This allows you to e.g.
 * read Windows Notepad Unicode files as Velocity templates.
 * 
 * It allows you to check the actual encoding of a file by calling
 * {@link #getEncodingFromStream()} on the input stream reader.
 * 
 * This class is not thread safe! When more than one thread wants to use an
 * instance of UnicodeInputStream, the caller must provide synchronization.
 * 
 * @author 
   * @version $Id: UnicodeInputStream.java 685685 2008-08-13 21:43:27Z nbubna $
   */
  static final class UnicodeBOM {
    private final String encoding;
    private final byte[] bytes;
    private UnicodeBOM(final String encoding, final byte[] bytes) {
      this.encoding = encoding;
      this.bytes = bytes;
    }
    String getEncoding() {
      return encoding;
    }
    byte[] getBytes() {
      return bytes;
    }
  }
}

Return an UTF-8 encoded String

   
import java.io.UnsupportedEncodingException;
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *  
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License. 
 *  
 */

/**
 * Various string manipulation methods that are more efficient then chaining
 * string operations: all is done in the same buffer without creating a bunch of
 * string objects.
 * 
 * @author 
 */
public class Main {
  /**
   * Return an UTF-8 encoded String
   * 
   * @param bytes
   *            The byte array to be transformed to a String
   * @return A String.
   */
  public static final String utf8ToString( byte[] bytes )
  {
      if ( bytes == null )
      {
          return "";
      }
      try
      {
          return new String( bytes, "UTF-8" );
      }
      catch ( UnsupportedEncodingException uee )
      {
          return "";
      }
  }
}

Return an UTF-8 encoded String by length

   
import java.io.UnsupportedEncodingException;
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *  
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License. 
 *  
 */

/**
 * Various string manipulation methods that are more efficient then chaining
 * string operations: all is done in the same buffer without creating a bunch of
 * string objects.
 * 
 * @author 
 */
public class Main {
  /**
   * Return an UTF-8 encoded String
   * 
   * @param bytes
   *            The byte array to be transformed to a String
   * @param length
   *            The length of the byte array to be converted
   * @return A String.
   */
  public static final String utf8ToString( byte[] bytes, int length )
  {
      if ( bytes == null )
      {
          return "";
      }
      try
      {
          return new String( bytes, 0, length, "UTF-8" );
      }
      catch ( UnsupportedEncodingException uee )
      {
          return "";
      }
  }
}

Return UTF-8 encoded byte[] representation of a String

   
import java.io.UnsupportedEncodingException;
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *  
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License. 
 *  
 */

/**
 * Various string manipulation methods that are more efficient then chaining
 * string operations: all is done in the same buffer without creating a bunch of
 * string objects.
 * 
 * @author 
 */
public class Main {
  /**
   * Return UTF-8 encoded byte[] representation of a String
   * 
   * @param string
   *            The string to be transformed to a byte array
   * @return The transformed byte array
   */
  public static final byte[] getBytesUtf8( String string )
  {
      if ( string == null )
      {
          return new byte[0];
      }
      try
      {
          return string.getBytes( "UTF-8" );
      }
      catch ( UnsupportedEncodingException uee )
      {
          return new byte[]
              {};
      }
  }
}

Show unicode string

    
/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.awt.Font;
import java.awt.FontMetrics;
import java.awt.Frame;
import java.awt.Graphics;
import java.awt.Insets;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
class ShowString extends Frame {
  FontMetrics fontM;
  String outString;
  ShowString(String target, String title) {
    setTitle(title);
    outString = target;
    Font font = new Font("Monospaced", Font.PLAIN, 36);
    fontM = getFontMetrics(font);
    setFont(font);
    int size = 0;
    for (int i = 0; i < outString.length(); i++) {
      size += fontM.charWidth(outString.charAt(i));
    }
    size += 24;
    setSize(size, fontM.getHeight() + 60);
    setLocation(getSize().width / 2, getSize().height / 2);
    setVisible(true);
  }
  public void paint(Graphics g) {
    Insets insets = getInsets();
    int x = insets.left;
    int y = insets.top;
    g.drawString(outString, x + 6, y + fontM.getAscent() + 14);
  }
}
public class StreamConverter {
  static void writeOutput(String str) {
    try {
      FileOutputStream fos = new FileOutputStream("test.txt");
      Writer out = new OutputStreamWriter(fos, "UTF8");
      out.write(str);
      out.close();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  static String readInput() {
    StringBuffer buffer = new StringBuffer();
    try {
      FileInputStream fis = new FileInputStream("test.txt");
      InputStreamReader isr = new InputStreamReader(fis, "UTF8");
      Reader in = new BufferedReader(isr);
      int ch;
      while ((ch = in.read()) > -1) {
        buffer.append((char) ch);
      }
      in.close();
      return buffer.toString();
    } catch (IOException e) {
      e.printStackTrace();
      return null;
    }
  }
  public static void main(String[] args) {
    String jaString = new String("\u65e5\u672c\u8a9e\u6587\u5b57\u5217");
    writeOutput(jaString);
    String inputString = readInput();
    String displayString = jaString + " " + inputString;
    new ShowString(displayString, "Conversion Demo");
  }
}

String converter

    
/*
 * Copyright (c) 1995 - 2008 Sun Microsystems, Inc.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Sun Microsystems nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
import java.io.UnsupportedEncodingException;
public class StringConverter {
  public static void printBytes(byte[] array, String name) {
    for (int k = 0; k < array.length; k++) {
      System.out.println(name + "[" + k + "] = " + "0x"
          + UnicodeFormatter.byteToHex(array[k]));
    }
  }
  public static void main(String[] args) {
    System.out.println(System.getProperty("file.encoding"));
    String original = new String("A" + "\u00ea" + "\u00f1" + "\u00fc" + "C");
    System.out.println("original = " + original);
    System.out.println();
    try {
      byte[] utf8Bytes = original.getBytes("UTF8");
      byte[] defaultBytes = original.getBytes();
      String roundTrip = new String(utf8Bytes, "UTF8");
      System.out.println("roundTrip = " + roundTrip);
      System.out.println();
      printBytes(utf8Bytes, "utf8Bytes");
      System.out.println();
      printBytes(defaultBytes, "defaultBytes");
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    }
  } // main
}
class UnicodeFormatter {
  static public String byteToHex(byte b) {
    // Returns hex String representation of byte b
    char hexDigit[] = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a",
        "b", "c", "d", "e", "f" };
    char[] array = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] };
    return new String(array);
  }
  static public String charToHex(char c) {
    // Returns hex String representation of char c
    byte hi = (byte) (c >>> 8);
    byte lo = (byte) (c & 0xff);
    return byteToHex(hi) + byteToHex(lo);
  }
} // class

Unicode 2 ASCII

     
public class Unicode2ASCII {
  public static String toHTML(String unicode) {
    String output = "";
    char[] charArray = unicode.toCharArray();
  
    for (int i = 0; i < charArray.length; ++i) {
      char a = charArray[i];
      if ((int) a > 255) {
        output += "&#" + (int) a + ";";
      } else {
        output += a;
      }
    }
    return output;
  }
  public static String toJAVA(String unicode) {
    String output = "";
    char[] charArray = unicode.toCharArray();
    for (int i = 0; i < charArray.length; ++i) {
      char a = charArray[i];
      if ((int) a > 255) {
        output += "\\u" + Integer.toHexString((int) a);
      } else {
        output += a;
      }
    }
    return output;
  }
}

UTF8 String utilities

   
// 
// Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at 
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// 
import java.io.UnsupportedEncodingException;
// 
/**
 * Fast String Utilities.
 * 
 * These string utilities provide both conveniance methods and performance
 * improvements over most standard library versions. The main aim of the
 * optimizations is to avoid object creation unless absolutely required.
 * 
 * @author Greg Wilkins (gregw)
 */
public class StringUtil {
  public static final String CRLF = "\015\012";
  public static final String __LINE_SEPARATOR = System.getProperty("line.separator", "\n");
  public static String __ISO_8859_1;
  static {
    String iso = System.getProperty("ISO_8859_1");
    if (iso != null)
      __ISO_8859_1 = iso;
    else {
      try {
        new String(new byte[] { (byte) 20 }, "ISO-8859-1");
        __ISO_8859_1 = "ISO-8859-1";
      } catch (java.io.UnsupportedEncodingException e) {
        __ISO_8859_1 = "ISO8859_1";
      }
    }
  }
  public final static String __UTF8 = "UTF-8";
  private static char[] lowercases = { "\000", "\001", "\002", "\003", "\004", "\005", "\006",
      "\007", "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017", "\020", "\021",
      "\022", "\023", "\024", "\025", "\026", "\027", "\030", "\031", "\032", "\033", "\034",
      "\035", "\036", "\037", "\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047",
      "\050", "\051", "\052", "\053", "\054", "\055", "\056", "\057", "\060", "\061", "\062",
      "\063", "\064", "\065", "\066", "\067", "\070", "\071", "\072", "\073", "\074", "\075",
      "\076", "\077", "\100", "\141", "\142", "\143", "\144", "\145", "\146", "\147", "\150",
      "\151", "\152", "\153", "\154", "\155", "\156", "\157", "\160", "\161", "\162", "\163",
      "\164", "\165", "\166", "\167", "\170", "\171", "\172", "\133", "\134", "\135", "\136",
      "\137", "\140", "\141", "\142", "\143", "\144", "\145", "\146", "\147", "\150", "\151",
      "\152", "\153", "\154", "\155", "\156", "\157", "\160", "\161", "\162", "\163", "\164",
      "\165", "\166", "\167", "\170", "\171", "\172", "\173", "\174", "\175", "\176", "\177" };
  /* ------------------------------------------------------------ */
  /**
   * fast lower case conversion. Only works on ascii (not unicode)
   * 
   * @param s
   *          the string to convert
   * @return a lower case version of s
   */
  public static String asciiToLowerCase(String s) {
    char[] c = null;
    int i = s.length();
    // look for first conversion
    while (i-- > 0) {
      char c1 = s.charAt(i);
      if (c1 <= 127) {
        char c2 = lowercases[c1];
        if (c1 != c2) {
          c = s.toCharArray();
          c[i] = c2;
          break;
        }
      }
    }
    while (i-- > 0) {
      if (c[i] <= 127)
        c[i] = lowercases[c[i]];
    }
    return c == null ? s : new String(c);
  }
  /* ------------------------------------------------------------ */
  public static boolean startsWithIgnoreCase(String s, String w) {
    if (w == null)
      return true;
    if (s == null || s.length() < w.length())
      return false;
    for (int i = 0; i < w.length(); i++) {
      char c1 = s.charAt(i);
      char c2 = w.charAt(i);
      if (c1 != c2) {
        if (c1 <= 127)
          c1 = lowercases[c1];
        if (c2 <= 127)
          c2 = lowercases[c2];
        if (c1 != c2)
          return false;
      }
    }
    return true;
  }
  /* ------------------------------------------------------------ */
  public static boolean endsWithIgnoreCase(String s, String w) {
    if (w == null)
      return true;
    if (s == null)
      return false;
    int sl = s.length();
    int wl = w.length();
    if (sl < wl)
      return false;
    for (int i = wl; i-- > 0;) {
      char c1 = s.charAt(--sl);
      char c2 = w.charAt(i);
      if (c1 != c2) {
        if (c1 <= 127)
          c1 = lowercases[c1];
        if (c2 <= 127)
          c2 = lowercases[c2];
        if (c1 != c2)
          return false;
      }
    }
    return true;
  }

  /* ------------------------------------------------------------ */
  public static boolean equals(String s, char[] buf, int offset, int length) {
    if (s.length() != length)
      return false;
    for (int i = 0; i < length; i++)
      if (buf[offset + i] != s.charAt(i))
        return false;
    return true;
  }
  /* ------------------------------------------------------------ */
  public static String toUTF8String(byte[] b, int offset, int length) {
    try {
      if (length < 32) {
        Utf8StringBuffer buffer = new Utf8StringBuffer(length);
        buffer.append(b, offset, length);
        return buffer.toString();
      }
      return new String(b, offset, length, __UTF8);
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
      return null;
    }
  }
  /* ------------------------------------------------------------ */
  public static String toString(byte[] b, int offset, int length, String charset) {
    if (charset == null || StringUtil.isUTF8(charset))
      return toUTF8String(b, offset, length);
    try {
      return new String(b, offset, length, charset);
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
      return null;
    }
  }
  /* ------------------------------------------------------------ */
  public static boolean isUTF8(String charset) {
    return charset == __UTF8 || __UTF8.equalsIgnoreCase(charset);
  }
  /* ------------------------------------------------------------ */
  public static String printable(String name) {
    if (name == null)
      return null;
    StringBuffer buf = new StringBuffer(name.length());
    for (int i = 0; i < name.length(); i++) {
      char c = name.charAt(i);
      if (!Character.isISOControl(c))
        buf.append(c);
    }
    return buf.toString();
  }
}
// 
// Copyright 2006 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// 
/* ------------------------------------------------------------ */
/**
 * UTF-8 StringBuffer.
 * 
 * This class wraps a standard {@link java.lang.StringBuffer} and provides
 * methods to append UTF-8 encoded bytes, that are converted into characters.
 * 
 * This class is stateful and up to 6 calls to {@link #append(byte)} may be
 * needed before state a character is appended to the string buffer.
 * 
 * The UTF-8 decoding is done by this class and no additional buffers or Readers
 * are used. The UTF-8 code was inspired by http://javolution.org
 * 
 */
class Utf8StringBuffer {
  StringBuffer _buffer;
  int _more;
  int _bits;
  boolean _errors;
  Utf8StringBuffer() {
    _buffer = new StringBuffer();
  }
  Utf8StringBuffer(int capacity) {
    _buffer = new StringBuffer(capacity);
  }
  public void append(byte[] b, int offset, int length) {
    int end = offset + length;
    for (int i = offset; i < end; i++)
      append(b[i]);
  }
  public void append(byte b) {
    if (b > 0) {
      if (_more > 0) {
        _buffer.append("?");
        _more = 0;
        _bits = 0;
      } else
        _buffer.append((char) (0x7f & b));
    } else if (_more == 0) {
      if ((b & 0xc0) != 0xc0) {
        // 10xxxxxx
        _buffer.append("?");
        _more = 0;
        _bits = 0;
      } else if ((b & 0xe0) == 0xc0) {
        // 110xxxxx
        _more = 1;
        _bits = b & 0x1f;
      } else if ((b & 0xf0) == 0xe0) {
        // 1110xxxx
        _more = 2;
        _bits = b & 0x0f;
      } else if ((b & 0xf8) == 0xf0) {
        // 11110xxx
        _more = 3;
        _bits = b & 0x07;
      } else if ((b & 0xfc) == 0xf8) {
        // 111110xx
        _more = 4;
        _bits = b & 0x03;
      } else if ((b & 0xfe) == 0xfc) {
        // 1111110x
        _more = 5;
        _bits = b & 0x01;
      }
    } else {
      if ((b & 0xc0) == 0xc0) { // 11??????
        _buffer.append("?");
        _more = 0;
        _bits = 0;
        _errors = true;
      } else {
        // 10xxxxxx
        _bits = (_bits << 6) | (b & 0x3f);
        if (--_more == 0)
          _buffer.append((char) _bits);
      }
    }
  }
  public int length() {
    return _buffer.length();
  }
  public void reset() {
    _buffer.setLength(0);
    _more = 0;
    _bits = 0;
    _errors = false;
  }
  public StringBuffer getStringBuffer() {
    return _buffer;
  }
  public String toString() {
    return _buffer.toString();
  }
  /* ------------------------------------------------------------ */
  /**
   * @return True if there are non UTF-8 characters or incomplete UTF-8
   *         characters in the buffer.
   */
  public boolean isError() {
    return _errors || _more > 0;
  }
}

Utility methods for handling UTF-8 encoded byte streams.

  
/*
   Derby - Class org.apache.derby.iapi.util.UTF8Util
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
 */
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UTFDataFormatException;

/**
 * Utility methods for handling UTF-8 encoded byte streams.
 * <p>
 * Note that when the {@code skip} methods mention detection of invalid
 * UTF-8 encodings, it only checks the first byte of a character. For multibyte
 * encodings, the second and third byte are not checked for correctness, just
 * skipped and ignored.
 *
 * @see java.io.DataInput
 */
//@ThreadSafe
public final class UTF8Util {
    /** This class cannot be instantiated. */
    private UTF8Util() {}
    /**
     * Skip until the end-of-stream is reached.
     *
     * @param in byte stream with UTF-8 encoded characters
     * @return The number of characters skipped.
     * @throws IOException if reading from the stream fails
     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
     */
    public static final long skipUntilEOF(InputStream in)
            throws IOException {
        // No need to do the skip in a loop, as Reader.read() returning -1
        // means EOF has been reached.
        // Note that a loop should be used if skip is used instead of read.
        return internalSkip(in, Long.MAX_VALUE).charsSkipped();
    }
    /**
     * Skip the requested number of characters from the stream.
     * <p>
     * @param in byte stream with UTF-8 encoded characters
     * @param charsToSkip number of characters to skip
     * @return The number of bytes skipped.
     * @throws EOFException if end-of-stream is reached before the requested
     *      number of characters are skipped
     * @throws IOException if reading from the stream fails
     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
     */
    public static final long skipFully(InputStream in, long charsToSkip)
            throws EOFException, IOException {
        SkipCount skipped = internalSkip(in, charsToSkip);
        if (skipped.charsSkipped() != charsToSkip) {
            throw new EOFException("Reached end-of-stream prematurely at " +
                "character/byte position " + skipped.charsSkipped() + "/" +
                skipped.bytesSkipped() + ", trying to skip " + charsToSkip);
        }
        return skipped.bytesSkipped();
    }
    /**
     * Skip characters in the stream.
     * <p>
     * Note that a smaller number than requested might be skipped if the
     * end-of-stream is reached before the specified number of characters has
     * been decoded. It is up to the caller to decide if this is an error
     * or not. For instance, when determining the character length of a stream,
     * <code>Long.MAX_VALUE</code> could be passed as the requested number of
     * characters to skip.
     *
     * @param in byte stream with UTF-8 encoded characters
     * @param charsToSkip the number of characters to skip
     * @return A long array with counts; the characters skipped at position
     *      <code>CHAR_COUNT</code>, the bytes skipped at position
     *      <code>BYTE_COUNT</code>. Note that the number of characters skipped
     *      may be smaller than the requested number.
     * @throws IOException if reading from the stream fails
     * @throws UTFDataFormatException if an invalid UTF-8 encoding is detected
     */
    private static final SkipCount internalSkip(final InputStream in,
                                                final long charsToSkip)
            throws IOException {
        long charsSkipped = 0;
        long bytesSkipped = 0;
        // Decoding routine for modified UTF-8.
        // See java.io.DataInput
        while (charsSkipped < charsToSkip) {
            int c = in.read();
            if (c == -1) {
                break;
            }
            charsSkipped++;
            if ((c & 0x80) == 0x00) { // 8th bit set (top bit)
                // Found char of one byte width.
                bytesSkipped++;
            } else if ((c & 0x60) == 0x40) { // 7th bit set, 6th bit unset
                // Found char of two byte width.
                if (InputStreamUtil.skipPersistent(in, 1L) != 1L) {
                    // No second byte present.
                    throw new UTFDataFormatException(
                        "Second byte in two byte character missing; byte pos " +
                        bytesSkipped + " ; char pos " + charsSkipped);
                }
                bytesSkipped += 2;
            } else if ((c & 0x70) == 0x60) { // 7th and 6th bit set, 5th unset
                // Found char of three byte width.
                int skipped = 0;
                if (c == 0xe0) {
                    // Check for Derby EOF marker.
                    int c1 = in.read();
                    int c2 = in.read();
                    if (c1 == 0x00 && c2 == 0x00) {
                        // Found Derby EOF marker, exit loop.
                        charsSkipped--; // Compensate by subtracting one.
                        break;
                    }
                    // Do some rudimentary error checking.
                    // Allow everything except EOF, which is the same as done in
                    // normal processing (skipPersistent below).
                    if (c1 != -1 && c2 != -1) {
                        skipped = 2;
                    }
                } else {
                    skipped = (int)InputStreamUtil.skipPersistent(in, 2L);
                }
                if (skipped != 2) {
                    // No second or third byte present
                    throw new UTFDataFormatException(
                        "Second or third byte in three byte character " +
                        "missing; byte pos " + bytesSkipped + " ; char pos " +
                        charsSkipped);
                }
                bytesSkipped += 3;
            } else {
                throw new UTFDataFormatException(
                    "Invalid UTF-8 encoding encountered: (decimal) " + c);
            }
        }
        // We don"t close the stream, since it might be reused. One example of
        // this is use of Resetable streams.
        return new SkipCount(charsSkipped, bytesSkipped);
    }
    /**
     * Helper class to hold skip counts; one for chars and one for bytes.
     */
    // @Immutable
    private static final class SkipCount {
        /** Number of bytes skipped. */
        private final long byteCount;
        /** Number of characters skipped. */
        private final long charCount; 
        /**
         * Creates a holder for the specified skip counts.
         * 
         * @param byteCount number of bytes
         * @param charCount number of characters
         */
        SkipCount(long charCount, long byteCount) {
            if (byteCount < 0 || charCount < 0) {
                // Don"t allow negative counts.
                throw new IllegalArgumentException("charCount/byteCount " +
                        "cannot be negative: " + charCount + "/" + byteCount);
            }
            if (byteCount < charCount) {
                // A char must always be represented by at least one byte.
                throw new IllegalArgumentException("Number of bytes cannot be" +
                        "less than number of chars: " + byteCount + " < " +
                        charCount);
            }
            this.byteCount = byteCount;
            this.charCount = charCount;
        }
        long charsSkipped() {
            return this.charCount;
        }
        long bytesSkipped() {
            return this.byteCount;
        }
    }
} // End class UTF8Util
/*
Derby - Class org.apache.derby.iapi.services.io.InputStreamUtil
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements.  See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License.  You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/**
Utility methods for InputStream that are stand-ins for
a small subset of DataInput methods. This avoids pushing
a DataInputStream just to get this functionality.
*/
final class InputStreamUtil {
 private static final int SKIP_FRAGMENT_SIZE = Integer.MAX_VALUE;
/**
  Read an unsigned byte from an InputStream, throwing an EOFException
  if the end of the input is reached.
  @exception IOException if an I/O error occurs.
  @exception EOFException if the end of the stream is reached
  @see DataInput#readUnsignedByte
*/
public static int readUnsignedByte(InputStream in) throws IOException {
  int b = in.read();
  if (b < 0)
    throw new EOFException();
  return b;
}
/**
  Read a number of bytes into an array.
  @exception IOException if an I/O error occurs.
  @exception EOFException if the end of the stream is reached
  @see DataInput#readFully
*/
public static void readFully(InputStream in, byte b[],
                              int offset,
                              int len) throws IOException
{
  do {
    int bytesRead = in.read(b, offset, len);
    if (bytesRead < 0)
      throw new EOFException();
    len -= bytesRead;
    offset += bytesRead;
  } while (len != 0);
}

/**
  Read a number of bytes into an array.
     Keep reading in a loop until len bytes are read or EOF is reached or
     an exception is thrown. Return the number of bytes read.
     (InputStream.read(byte[],int,int) does not guarantee to read len bytes
      even if it can do so without reaching EOF or raising an exception.)
  @exception IOException if an I/O error occurs.
*/
public static int readLoop(InputStream in,
                             byte b[],
                             int offset,
                             int len)
     throws IOException
{
     int firstOffset = offset;
  do {
    int bytesRead = in.read(b, offset, len);
    if (bytesRead <= 0)
             break;
    len -= bytesRead;
    offset += bytesRead;
  } while (len != 0);
     return offset - firstOffset;
}
 /**
  * Skips until EOF, returns number of bytes skipped.
  * @param is
  *      InputStream to be skipped.
  * @return
  *      number of bytes skipped in fact.
  * @throws IOException
  *      if IOException occurs. It doesn"t contain EOFException.
  * @throws NullPointerException
  *      if the param "is" equals null.
  */
 public static long skipUntilEOF(InputStream is) throws IOException {
     if(is == null)
         throw new NullPointerException();
     long bytes = 0;
     while(true){
         long r = skipPersistent(is, SKIP_FRAGMENT_SIZE);
         bytes += r;
         if(r < SKIP_FRAGMENT_SIZE)
             return bytes;
     }
 }
 /**
  * Skips requested number of bytes,
  * throws EOFException if there is too few bytes in the stream.
  * @param is
  *      InputStream to be skipped.
  * @param skippedBytes
  *      number of bytes to skip. if skippedBytes <= zero, do nothing.
  * @throws EOFException
  *      if EOF meets before requested number of bytes are skipped.
  * @throws IOException
  *      if IOException occurs. It doesn"t contain EOFException.
  * @throws NullPointerException
  *      if the param "is" equals null.
  */
 public static void skipFully(InputStream is, long skippedBytes)
 throws IOException {
     if(is == null)
         throw new NullPointerException();
     if(skippedBytes <= 0)
         return;
     long bytes = skipPersistent(is, skippedBytes);
     if(bytes < skippedBytes)
         throw new EOFException();
 }
 /**
  * Tries harder to skip the requested number of bytes.
  * <p>
  * Note that even if the method fails to skip the requested number of bytes,
  * it will not throw an exception. If this happens, the caller can be sure
  * that end-of-stream has been reached.
  *
  * @param in byte stream
  * @param bytesToSkip the number of bytes to skip
  * @return The number of bytes skipped.
  * @throws IOException if reading from the stream fails
  */
 public static final long skipPersistent(InputStream in, long bytesToSkip)
 throws IOException {
     long skipped = 0;
     while (skipped < bytesToSkip) {
         long skippedNow = in.skip(bytesToSkip - skipped);
         if (skippedNow == 0) {
             if (in.read() == -1) {
                 // EOF, return what we have and leave it up to caller to
                 // decide what to do about it.
                 break;
             } else {
                 skippedNow = 1; // Added to count below.
             }
         }
         skipped += skippedNow;
     }
     return skipped;
 }
}

Java/Development Class/UTF8 Byte Hex

Содержание

Convert bytes To Hex

Convert file in SJIS to UTF8

Convert from Unicode to UTF-8

Convert from UTF-8 to Unicode

Convert hex To Bytes

converting between byte arrays and hex encoded strings

Decodes values of attributes in the DN encoded in hex into a UTF-8 String.

Encodes octects (using utf-8) into Hex data

Make bytes

Normalizer

Read Windows Notepad Unicode files

Return an UTF-8 encoded String

Return an UTF-8 encoded String by length

Return UTF-8 encoded byte[] representation of a String

Show unicode string

String converter

Unicode 2 ASCII

UTF8 String utilities

Utility methods for handling UTF-8 encoded byte streams.

Навигация

Персональные инструменты

Пространства имён

Варианты

Просмотры

Ещё

Поиск

Разделы

Навигация

Инструменты