Java Tutorial/File/CharSet
Версия от 17:44, 31 мая 2010; (обсуждение)
Содержание
CharSet alias list
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Map;
public class MainClass {
public static void main(String[] args) {
Map charsets = Charset.availableCharsets();
Iterator iterator = charsets.values().iterator();
while (iterator.hasNext()) {
Charset cs = (Charset) iterator.next();
System.out.print(cs.displayName());
if (cs.isRegistered()) {
System.out.print(" (registered): ");
} else {
System.out.print(" (unregistered): ");
}
System.out.print(cs.name());
Iterator names = cs.aliases().iterator();
while (names.hasNext()) {
System.out.print(", ");
System.out.print(names.next());
}
System.out.println();
}
}
}
Big5 (registered): Big5, csBig5 Big5-HKSCS (registered): Big5-HKSCS, big5-hkscs, big5hk, big5-hkscs:unicode3.0, big5hkscs, Big5_HKSCS EUC-JP (registered): EUC-JP, eucjis, x-eucjp, csEUCPkdFmtjapanese, eucjp, Extended_UNIX_Code_Packed_Format_for_Japanese, x-euc-jp, euc_jp EUC-KR (registered): EUC-KR, ksc5601, 5601, ksc5601_1987, ksc_5601, ksc5601-1987, euc_kr, ks_c_5601-1987, euckr, csEUCKR GB18030 (registered): GB18030, gb18030-2000 GB2312 (registered): GB2312, gb2312-1980, gb2312, EUC_CN, gb2312-80, euc-cn, euccn, x-EUC-CN GBK (registered): GBK, windows-936, CP936 IBM-Thai (registered): IBM-Thai, ibm-838, ibm838, 838, cp838 IBM00858 (registered): IBM00858, cp858, ccsid00858, cp00858, 858 IBM01140 (registered): IBM01140, 1140, ccsid01140, cp01140, cp1140 IBM01141 (registered): IBM01141, cp01141, cp1141, ccsid01141, 1141 IBM01142 (registered): IBM01142, cp01142, cp1142, ccsid01142, 1142 IBM01143 (registered): IBM01143, 1143, cp01143, cp1143, ccsid01143 IBM01144 (registered): IBM01144, cp01144, cp1144, 1144, ccsid01144 IBM01145 (registered): IBM01145, ccsid01145, cp01145, 1145, cp1145 IBM01146 (registered): IBM01146, ccsid01146, cp1146, 1146, cp01146 IBM01147 (registered): IBM01147, cp1147, 1147, ccsid01147, cp01147 IBM01148 (registered): IBM01148, cp01148, cp1148, ccsid01148, 1148 IBM01149 (registered): IBM01149, cp1149, ccsid01149, 1149, cp01149 IBM037 (registered): IBM037, csIBM037, cpibm37, cp037, cs-ebcdic-cp-us, ibm-037, ibm-37, cs-ebcdic-cp-ca, cs-ebcdic-cp-wt, cs-ebcdic-cp-nl, ibm037, 037 IBM1026 (registered): IBM1026, 1026, ibm1026, cp1026, ibm-1026 IBM1047 (registered): IBM1047, 1047, ibm-1047, cp1047 IBM273 (registered): IBM273, ibm273, 273, cp273, ibm-273 IBM277 (registered): IBM277, ibm277, cp277, ibm-277, 277 IBM278 (registered): IBM278, csIBM278, ibm278, cp278, ebcdic-cp-se, 278, ibm-278, ebcdic-sv IBM280 (registered): IBM280, ibm280, cp280, 280, ibm-280 IBM284 (registered): IBM284, cpibm284, csIBM284, ibm-284, ibm284, 284, cp284 IBM285 (registered): IBM285, 285, ebcdic-cp-gb, ibm-285, csIBM285, cp285, ibm285, cpibm285, ebcdic-gb IBM297 (registered): IBM297, csIBM297, ebcdic-cp-fr, cp297, ibm297, ibm-297, 297, cpibm297 IBM420 (registered): IBM420, ibm420, 420, ebcdic-cp-ar1, csIBM420, ibm-420, cp420 IBM424 (registered): IBM424, cp424, 424, ebcdic-cp-he, ibm424, csIBM424, ibm-424 IBM437 (registered): IBM437, windows-437, cspc8codepage437, ibm437, cp437, 437, ibm-437 IBM500 (registered): IBM500, 500, ebcdic-cp-ch, ebcdic-cp-bh, ibm-500, csIBM500, cp500, ibm500 IBM775 (registered): IBM775, ibm775, cp775, ibm-775, 775 IBM850 (registered): IBM850, ibm-850, 850, ibm850, cspc850multilingual, cp850 IBM852 (registered): IBM852, 852, ibm-852, csPCp852, cp852, ibm852 IBM855 (registered): IBM855, 855, ibm855, cp855, cspcp855, ibm-855 IBM857 (registered): IBM857, cp857, ibm857, csIBM857, 857, ibm-857 IBM860 (registered): IBM860, ibm860, ibm-860, csIBM860, cp860, 860 IBM861 (registered): IBM861, csIBM861, ibm861, 861, cp861, ibm-861 IBM862 (registered): IBM862, cp862, ibm862, 862, ibm-862, csIBM862 IBM863 (registered): IBM863, cp863, csIBM863, ibm863, 863, ibm-863 IBM864 (registered): IBM864, csIBM864, ibm-864, 864, ibm864, cp864 IBM865 (registered): IBM865, ibm-865, csIBM865, 865, ibm865, cp865 IBM866 (registered): IBM866, 866, ibm-866, csIBM866, ibm866, cp866 IBM868 (registered): IBM868, cp-ar, 868, ibm868, csIBM868, ibm-868, cp868 IBM869 (registered): IBM869, ibm869, ibm-869, 869, cp869, csIBM869, cp-gr IBM870 (registered): IBM870, ebcdic-cp-yu, ibm870, ibm-870, 870, csIBM870, cp870, ebcdic-cp-roece IBM871 (registered): IBM871, csIBM871, ibm-871, cp871, ebcdic-cp-is, 871, ibm871 IBM918 (registered): IBM918, ibm-918, 918, cp918, ebcdic-cp-ar2 ISO-2022-CN (registered): ISO-2022-CN, csISO2022CN, ISO2022CN ISO-2022-JP (registered): ISO-2022-JP, jis, jis_encoding, csjisencoding, csISO2022JP, iso2022jp ISO-2022-KR (registered): ISO-2022-KR, ISO2022KR, csISO2022KR ISO-8859-1 (registered): ISO-8859-1, iso-ir-100, 8859_1, ISO_8859-1, ISO8859_1, 819, csISOLatin1, IBM-819, ISO_8859-1:1987, latin1, cp819, ISO8859-1, IBM819, ISO_8859_1, l1 ISO-8859-13 (registered): ISO-8859-13, ISO8859-13, 8859_13, iso8859_13, iso_8859-13 ISO-8859-15 (registered): ISO-8859-15, 8859_15, csISOlatin9, IBM923, cp923, 923, L9, IBM-923, ISO8859-15, LATIN9, ISO_8859-15, LATIN0, csISOlatin0, ISO8859_15_FDIS, ISO-8859-15, ISO8859_15 ISO-8859-2 (registered): ISO-8859-2, ibm912, l2, ibm-912, cp912, ISO_8859-2:1987, ISO_8859-2, latin2, csISOLatin2, iso8859_2, 912, 8859_2, ISO8859-2, iso-ir-101 ISO-8859-3 (registered): ISO-8859-3, iso8859_3, cp913, csISOLatin3, ibm-913, ISO_8859-3, 913, ISO8859-3, 8859_3, ibm913, iso-ir-109, ISO_8859-3:1988, latin3, l3 ISO-8859-4 (registered): ISO-8859-4, iso-ir-110, l4, 8859_4, ibm914, latin4, ibm-914, csISOLatin4, iso8859_4, iso8859-4, cp914, 914, ISO_8859-4:1988, ISO_8859-4 ISO-8859-5 (registered): ISO-8859-5, 915, ISO_8859-5:1988, iso8859_5, cp915, ibm915, ISO_8859-5, ISO8859-5, csISOLatinCyrillic, cyrillic, 8859_5, iso-ir-144, ibm-915 ISO-8859-6 (registered): ISO-8859-6, 8859_6, arabic, ibm-1089, iso8859_6, ISO_8859-6, iso-ir-127, ibm1089, ISO_8859-6:1987, ECMA-114, 1089, csISOLatinArabic, ISO8859-6, ASMO-708, cp1089 ISO-8859-7 (registered): ISO-8859-7, sun_eu_greek, 8859_7, iso-ir-126, ISO_8859-7:1987, ibm-813, iso8859_7, ISO_8859-7, csISOLatinGreek, greek8, ECMA-118, ibm813, ELOT_928, iso8859-7, cp813, greek, 813 ISO-8859-8 (registered): ISO-8859-8, iso-ir-138, ibm-916, iso8859_8, cp916, ISO8859-8, ISO_8859-8:1988, hebrew, 8859_8, csISOLatinHebrew, ibm916, 916, ISO_8859-8 ISO-8859-9 (registered): ISO-8859-9, cp920, l5, ISO_8859-9, ibm-920, csISOLatin5, 8859_9, iso-ir-148, latin5, 920, ISO8859-9, ibm920, ISO_8859-9:1989, iso8859_9 JIS_X0201 (registered): JIS_X0201, JIS_X0201, X0201, JIS0201, csHalfWidthKatakana JIS_X0212-1990 (registered): JIS_X0212-1990, jis_x0212-1990, iso-ir-159, x0212, JIS0212, csISO159JISX02121990 KOI8-R (registered): KOI8-R, koi8, koi8_r, cskoi8r Shift_JIS (registered): Shift_JIS, shift-jis, shift_jis, x-sjis, ms_kanji, csShiftJIS, sjis TIS-620 (registered): TIS-620, tis620.2533, tis620 US-ASCII (registered): US-ASCII, ISO646-US, IBM367, ASCII, cp367, default, ascii7, ANSI_X3.4-1986, iso-ir-6, us, 646, iso_646.irv:1983, csASCII, ANSI_X3.4-1968, ISO_646.irv:1991 UTF-16 (registered): UTF-16, utf16, UTF_16 UTF-16BE (registered): UTF-16BE, X-UTF-16BE, UnicodeBigUnmarked, UTF_16BE, ISO-10646-UCS-2 UTF-16LE (registered): UTF-16LE, UnicodeLittleUnmarked, X-UTF-16LE, UTF_16LE UTF-8 (registered): UTF-8, UTF8, unicode-1-1-utf-8 windows-1250 (registered): windows-1250, cp1250, cp5346 windows-1251 (registered): windows-1251, ansi-1251, cp1251, cp5347 windows-1252 (registered): windows-1252, cp1252, cp5348 windows-1253 (registered): windows-1253, cp1253, cp5349 windows-1254 (registered): windows-1254, cp5350, cp1254 windows-1255 (registered): windows-1255, cp1255 windows-1256 (registered): windows-1256, cp1256 windows-1257 (registered): windows-1257, cp1257, cp5353 windows-1258 (registered): windows-1258, cp1258 windows-31j (registered): windows-31j, csWindows31J, windows-932, MS932 x-Big5-Solaris (unregistered): x-Big5-Solaris, Big5_Solaris x-euc-jp-linux (unregistered): x-euc-jp-linux, euc_jp_linux, euc-jp-linux x-EUC-TW (unregistered): x-EUC-TW, cns11643, euc_tw, EUC-TW, euctw x-eucJP-Open (unregistered): x-eucJP-Open, EUC_JP_Solaris, eucJP-open x-IBM1006 (unregistered): x-IBM1006, cp1006, ibm1006, 1006, ibm-1006 x-IBM1025 (unregistered): x-IBM1025, ibm1025, 1025, cp1025, ibm-1025 x-IBM1046 (unregistered): x-IBM1046, ibm1046, 1046, cp1046, ibm-1046 x-IBM1097 (unregistered): x-IBM1097, ibm1097, 1097, cp1097, ibm-1097 x-IBM1098 (unregistered): x-IBM1098, cp1098, ibm-1098, ibm1098, 1098 x-IBM1112 (unregistered): x-IBM1112, cp1112, 1112, ibm1112, ibm-1112 x-IBM1122 (unregistered): x-IBM1122, ibm-1122, 1122, cp1122, ibm1122 x-IBM1123 (unregistered): x-IBM1123, cp1123, ibm1123, ibm-1123, 1123 x-IBM1124 (unregistered): x-IBM1124, cp1124, ibm1124, ibm-1124, 1124 x-IBM1381 (unregistered): x-IBM1381, 1381, cp1381, ibm1381, ibm-1381 x-IBM1383 (unregistered): x-IBM1383, ibm1383, ibm-1383, cp1383, 1383 x-IBM33722 (unregistered): x-IBM33722, ibm-33722, cp33722, ibm-33722_vascii_vpua, ibm-5050, ibm33722, 33722 x-IBM737 (unregistered): x-IBM737, ibm-737, ibm737, cp737, 737 x-IBM856 (unregistered): x-IBM856, ibm-856, 856, ibm856, cp856 x-IBM874 (unregistered): x-IBM874, cp874, ibm874, ibm-874, 874 x-IBM875 (unregistered): x-IBM875, ibm875, ibm-875, 875, cp875 x-IBM921 (unregistered): x-IBM921, 921, cp921, ibm921, ibm-921 x-IBM922 (unregistered): x-IBM922, cp922, ibm922, ibm-922, 922 x-IBM930 (unregistered): x-IBM930, cp930, 930, ibm930, ibm-930 x-IBM933 (unregistered): x-IBM933, ibm933, cp933, 933, ibm-933 x-IBM935 (unregistered): x-IBM935, 935, cp935, ibm935, ibm-935 x-IBM937 (unregistered): x-IBM937, cp937, ibm-937, ibm937, 937 x-IBM939 (unregistered): x-IBM939, ibm-939, ibm939, cp939, 939 x-IBM942 (unregistered): x-IBM942, cp942, ibm942, ibm-942, 942 x-IBM942C (unregistered): x-IBM942C, ibm942C, cp942C, ibm-942C, 942C x-IBM943 (unregistered): x-IBM943, ibm943, ibm-943, cp943, 943 x-IBM943C (unregistered): x-IBM943C, ibm-943C, ibm943C, 943C, cp943C x-IBM948 (unregistered): x-IBM948, 948, ibm-948, cp948, ibm948 x-IBM949 (unregistered): x-IBM949, ibm-949, cp949, 949, ibm949 x-IBM949C (unregistered): x-IBM949C, cp949C, 949C, ibm949C, ibm-949C x-IBM950 (unregistered): x-IBM950, 950, cp950, ibm-950, ibm950 x-IBM964 (unregistered): x-IBM964, 964, cp964, ibm-964, ibm964 x-IBM970 (unregistered): x-IBM970, ibm970, 970, cp970, ibm-eucKR, ibm-970 x-ISCII91 (unregistered): x-ISCII91, iscii, ST_SEV_358-88, iso-ir-153, csISO153GOST1976874, ISCII91 x-ISO-2022-CN-CNS (unregistered): x-ISO-2022-CN-CNS, ISO2022CN_CNS, ISO-2022-CN-CNS x-ISO-2022-CN-GB (unregistered): x-ISO-2022-CN-GB, ISO-2022-CN-GB, ISO2022CN_GB x-iso-8859-11 (unregistered): x-iso-8859-11, iso-8859-11, iso8859_11 x-JIS0208 (unregistered): x-JIS0208, JIS0208, csISO87JISX0208, x0208, JIS_C6226-1983, JIS_X0208-1983, iso-ir-87 x-JISAutoDetect (unregistered): x-JISAutoDetect, JISAutoDetect x-Johab (unregistered): x-Johab, johab, ms1361, ksc5601-1992, ksc5601_1992 x-MacArabic (unregistered): x-MacArabic, MacArabic x-MacCentralEurope (unregistered): x-MacCentralEurope, MacCentralEurope x-MacCroatian (unregistered): x-MacCroatian, MacCroatian x-MacCyrillic (unregistered): x-MacCyrillic, MacCyrillic x-MacDingbat (unregistered): x-MacDingbat, MacDingbat x-MacGreek (unregistered): x-MacGreek, MacGreek x-MacHebrew (unregistered): x-MacHebrew, MacHebrew x-MacIceland (unregistered): x-MacIceland, MacIceland x-MacRoman (unregistered): x-MacRoman, MacRoman x-MacRomania (unregistered): x-MacRomania, MacRomania x-MacSymbol (unregistered): x-MacSymbol, MacSymbol x-MacThai (unregistered): x-MacThai, MacThai x-MacTurkish (unregistered): x-MacTurkish, MacTurkish x-MacUkraine (unregistered): x-MacUkraine, MacUkraine x-MS950-HKSCS (unregistered): x-MS950-HKSCS, MS950_HKSCS x-mswin-936 (unregistered): x-mswin-936, ms936, ms_936 x-PCK (unregistered): x-PCK, pck x-windows-874 (unregistered): x-windows-874, windows-874, ms874, ms-874 x-windows-949 (unregistered): x-windows-949, windows949, ms_949, ms949 x-windows-950 (unregistered): x-windows-950, windows-950, ms950
CharSet Lister
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Map;
public class MainClass {
public static void main(String[] args) {
Map charsets = Charset.availableCharsets();
Iterator iterator = charsets.keySet().iterator();
while (iterator.hasNext()) {
System.out.println(iterator.next());
}
}
}
Big5 Big5-HKSCS EUC-JP EUC-KR GB18030 GB2312 GBK IBM-Thai IBM00858 IBM01140 IBM01141 IBM01142 IBM01143 IBM01144 IBM01145 IBM01146 IBM01147 IBM01148 IBM01149 IBM037 IBM1026 IBM1047 IBM273 IBM277 IBM278 IBM280 IBM284 IBM285 IBM297 IBM420 IBM424 IBM437 IBM500 IBM775 IBM850 IBM852 IBM855 IBM857 IBM860 IBM861 IBM862 IBM863 IBM864 IBM865 IBM866 IBM868 IBM869 IBM870 IBM871 IBM918 ISO-2022-CN ISO-2022-JP ISO-2022-KR ISO-8859-1 ISO-8859-13 ISO-8859-15 ISO-8859-2 ISO-8859-3 ISO-8859-4 ISO-8859-5 ISO-8859-6 ISO-8859-7 ISO-8859-8 ISO-8859-9 JIS_X0201 JIS_X0212-1990 KOI8-R Shift_JIS TIS-620 US-ASCII UTF-16 UTF-16BE UTF-16LE UTF-8 windows-1250 windows-1251 windows-1252 windows-1253 windows-1254 windows-1255 windows-1256 windows-1257 windows-1258 windows-31j x-Big5-Solaris x-euc-jp-linux x-EUC-TW x-eucJP-Open x-IBM1006 x-IBM1025 x-IBM1046 x-IBM1097 x-IBM1098 x-IBM1112 x-IBM1122 x-IBM1123 x-IBM1124 x-IBM1381 x-IBM1383 x-IBM33722 x-IBM737 x-IBM856 x-IBM874 x-IBM875 x-IBM921 x-IBM922 x-IBM930 x-IBM933 x-IBM935 x-IBM937 x-IBM939 x-IBM942 x-IBM942C x-IBM943 x-IBM943C x-IBM948 x-IBM949 x-IBM949C x-IBM950 x-IBM964 x-IBM970 x-ISCII91 x-ISO-2022-CN-CNS x-ISO-2022-CN-GB x-iso-8859-11 x-JIS0208 x-JISAutoDetect x-Johab x-MacArabic x-MacCentralEurope x-MacCroatian x-MacCyrillic x-MacDingbat x-MacGreek x-MacHebrew x-MacIceland x-MacRoman x-MacRomania x-MacSymbol x-MacThai x-MacTurkish x-MacUkraine x-MS950-HKSCS x-mswin-936 x-PCK x-windows-874 x-windows-949 x-windows-950
Displays Available Charsets and aliases
import java.nio.charset.*;
import java.util.*;
public class MainClass {
public static void main(String[] args) {
Map charSets = Charset.availableCharsets();
Iterator it = charSets.keySet().iterator();
while(it.hasNext()) {
String csName = (String)it.next();
System.out.print(csName);
Iterator aliases = ((Charset)charSets.get(csName))
.aliases().iterator();
if(aliases.hasNext())
System.out.print(": ");
while(aliases.hasNext()) {
System.out.print(aliases.next());
if(aliases.hasNext())
System.out.print(", ");
}
System.out.println();
}
}
}
/*
*/
Big5: csBig5 Big5-HKSCS: big5-hkscs, big5hk, big5-hkscs:unicode3.0, big5hkscs, Big5_HKSCS EUC-JP: eucjis, x-eucjp, csEUCPkdFmtjapanese, eucjp, Extended_UNIX_Code_Packed_Format_for_Japanese, x-euc-jp, euc_jp EUC-KR: ksc5601, 5601, ksc5601_1987, ksc_5601, ksc5601-1987, euc_kr, ks_c_5601-1987, euckr, csEUCKR GB18030: gb18030-2000 GB2312: gb2312-1980, gb2312, EUC_CN, gb2312-80, euc-cn, euccn, x-EUC-CN GBK: windows-936, CP936 IBM-Thai: ibm-838, ibm838, 838, cp838 IBM00858: cp858, ccsid00858, cp00858, 858 IBM01140: 1140, ccsid01140, cp01140, cp1140 IBM01141: cp01141, cp1141, ccsid01141, 1141 IBM01142: cp01142, cp1142, ccsid01142, 1142 IBM01143: 1143, cp01143, cp1143, ccsid01143 IBM01144: cp01144, cp1144, 1144, ccsid01144 IBM01145: ccsid01145, cp01145, 1145, cp1145 IBM01146: ccsid01146, cp1146, 1146, cp01146 IBM01147: cp1147, 1147, ccsid01147, cp01147 IBM01148: cp01148, cp1148, ccsid01148, 1148 IBM01149: cp1149, ccsid01149, 1149, cp01149 IBM037: csIBM037, cpibm37, cp037, cs-ebcdic-cp-us, ibm-037, ibm-37, cs-ebcdic-cp-ca, cs-ebcdic-cp-wt, cs-ebcdic-cp-nl, ibm037, 037 IBM1026: 1026, ibm1026, cp1026, ibm-1026 IBM1047: 1047, ibm-1047, cp1047 IBM273: ibm273, 273, cp273, ibm-273 IBM277: ibm277, cp277, ibm-277, 277 IBM278: csIBM278, ibm278, cp278, ebcdic-cp-se, 278, ibm-278, ebcdic-sv IBM280: ibm280, cp280, 280, ibm-280 IBM284: cpibm284, csIBM284, ibm-284, ibm284, 284, cp284 IBM285: 285, ebcdic-cp-gb, ibm-285, csIBM285, cp285, ibm285, cpibm285, ebcdic-gb IBM297: csIBM297, ebcdic-cp-fr, cp297, ibm297, ibm-297, 297, cpibm297 IBM420: ibm420, 420, ebcdic-cp-ar1, csIBM420, ibm-420, cp420 IBM424: cp424, 424, ebcdic-cp-he, ibm424, csIBM424, ibm-424 IBM437: windows-437, cspc8codepage437, ibm437, cp437, 437, ibm-437 IBM500: 500, ebcdic-cp-ch, ebcdic-cp-bh, ibm-500, csIBM500, cp500, ibm500 IBM775: ibm775, cp775, ibm-775, 775 IBM850: ibm-850, 850, ibm850, cspc850multilingual, cp850 IBM852: 852, ibm-852, csPCp852, cp852, ibm852 IBM855: 855, ibm855, cp855, cspcp855, ibm-855 IBM857: cp857, ibm857, csIBM857, 857, ibm-857 IBM860: ibm860, ibm-860, csIBM860, cp860, 860 IBM861: csIBM861, ibm861, 861, cp861, ibm-861 IBM862: cp862, ibm862, 862, ibm-862, csIBM862 IBM863: cp863, csIBM863, ibm863, 863, ibm-863 IBM864: csIBM864, ibm-864, 864, ibm864, cp864 IBM865: ibm-865, csIBM865, 865, ibm865, cp865 IBM866: 866, ibm-866, csIBM866, ibm866, cp866 IBM868: cp-ar, 868, ibm868, csIBM868, ibm-868, cp868 IBM869: ibm869, ibm-869, 869, cp869, csIBM869, cp-gr IBM870: ebcdic-cp-yu, ibm870, ibm-870, 870, csIBM870, cp870, ebcdic-cp-roece IBM871: csIBM871, ibm-871, cp871, ebcdic-cp-is, 871, ibm871 IBM918: ibm-918, 918, cp918, ebcdic-cp-ar2 ISO-2022-CN: csISO2022CN, ISO2022CN ISO-2022-JP: jis, jis_encoding, csjisencoding, csISO2022JP, iso2022jp ISO-2022-KR: ISO2022KR, csISO2022KR ISO-8859-1: iso-ir-100, 8859_1, ISO_8859-1, ISO8859_1, 819, csISOLatin1, IBM-819, ISO_8859-1:1987, latin1, cp819, ISO8859-1, IBM819, ISO_8859_1, l1 ISO-8859-13: ISO8859-13, 8859_13, iso8859_13, iso_8859-13 ISO-8859-15: 8859_15, csISOlatin9, IBM923, cp923, 923, L9, IBM-923, ISO8859-15, LATIN9, ISO_8859-15, LATIN0, csISOlatin0, ISO8859_15_FDIS, ISO-8859-15, ISO8859_15 ISO-8859-2: ibm912, l2, ibm-912, cp912, ISO_8859-2:1987, ISO_8859-2, latin2, csISOLatin2, iso8859_2, 912, 8859_2, ISO8859-2, iso-ir-101 ISO-8859-3: iso8859_3, cp913, csISOLatin3, ibm-913, ISO_8859-3, 913, ISO8859-3, 8859_3, ibm913, iso-ir-109, ISO_8859-3:1988, latin3, l3 ISO-8859-4: iso-ir-110, l4, 8859_4, ibm914, latin4, ibm-914, csISOLatin4, iso8859_4, iso8859-4, cp914, 914, ISO_8859-4:1988, ISO_8859-4 ISO-8859-5: 915, ISO_8859-5:1988, iso8859_5, cp915, ibm915, ISO_8859-5, ISO8859-5, csISOLatinCyrillic, cyrillic, 8859_5, iso-ir-144, ibm-915 ISO-8859-6: 8859_6, arabic, ibm-1089, iso8859_6, ISO_8859-6, iso-ir-127, ibm1089, ISO_8859-6:1987, ECMA-114, 1089, csISOLatinArabic, ISO8859-6, ASMO-708, cp1089 ISO-8859-7: sun_eu_greek, 8859_7, iso-ir-126, ISO_8859-7:1987, ibm-813, iso8859_7, ISO_8859-7, csISOLatinGreek, greek8, ECMA-118, ibm813, ELOT_928, iso8859-7, cp813, greek, 813 ISO-8859-8: iso-ir-138, ibm-916, iso8859_8, cp916, ISO8859-8, ISO_8859-8:1988, hebrew, 8859_8, csISOLatinHebrew, ibm916, 916, ISO_8859-8 ISO-8859-9: cp920, l5, ISO_8859-9, ibm-920, csISOLatin5, 8859_9, iso-ir-148, latin5, 920, ISO8859-9, ibm920, ISO_8859-9:1989, iso8859_9 JIS_X0201: JIS_X0201, X0201, JIS0201, csHalfWidthKatakana JIS_X0212-1990: jis_x0212-1990, iso-ir-159, x0212, JIS0212, csISO159JISX02121990 KOI8-R: koi8, koi8_r, cskoi8r Shift_JIS: shift-jis, shift_jis, x-sjis, ms_kanji, csShiftJIS, sjis TIS-620: tis620.2533, tis620 US-ASCII: ISO646-US, IBM367, ASCII, cp367, default, ascii7, ANSI_X3.4-1986, iso-ir-6, us, 646, iso_646.irv:1983, csASCII, ANSI_X3.4-1968, ISO_646.irv:1991 UTF-16: utf16, UTF_16 UTF-16BE: X-UTF-16BE, UnicodeBigUnmarked, UTF_16BE, ISO-10646-UCS-2 UTF-16LE: UnicodeLittleUnmarked, X-UTF-16LE, UTF_16LE UTF-8: UTF8, unicode-1-1-utf-8 windows-1250: cp1250, cp5346 windows-1251: ansi-1251, cp1251, cp5347 windows-1252: cp1252, cp5348 windows-1253: cp1253, cp5349 windows-1254: cp5350, cp1254 windows-1255: cp1255 windows-1256: cp1256 windows-1257: cp1257, cp5353 windows-1258: cp1258 windows-31j: csWindows31J, windows-932, MS932 x-Big5-Solaris: Big5_Solaris x-euc-jp-linux: euc_jp_linux, euc-jp-linux x-EUC-TW: cns11643, euc_tw, EUC-TW, euctw x-eucJP-Open: EUC_JP_Solaris, eucJP-open x-IBM1006: cp1006, ibm1006, 1006, ibm-1006 x-IBM1025: ibm1025, 1025, cp1025, ibm-1025 x-IBM1046: ibm1046, 1046, cp1046, ibm-1046 x-IBM1097: ibm1097, 1097, cp1097, ibm-1097 x-IBM1098: cp1098, ibm-1098, ibm1098, 1098 x-IBM1112: cp1112, 1112, ibm1112, ibm-1112 x-IBM1122: ibm-1122, 1122, cp1122, ibm1122 x-IBM1123: cp1123, ibm1123, ibm-1123, 1123 x-IBM1124: cp1124, ibm1124, ibm-1124, 1124 x-IBM1381: 1381, cp1381, ibm1381, ibm-1381 x-IBM1383: ibm1383, ibm-1383, cp1383, 1383 x-IBM33722: ibm-33722, cp33722, ibm-33722_vascii_vpua, ibm-5050, ibm33722, 33722 x-IBM737: ibm-737, ibm737, cp737, 737 x-IBM856: ibm-856, 856, ibm856, cp856 x-IBM874: cp874, ibm874, ibm-874, 874 x-IBM875: ibm875, ibm-875, 875, cp875 x-IBM921: 921, cp921, ibm921, ibm-921 x-IBM922: cp922, ibm922, ibm-922, 922 x-IBM930: cp930, 930, ibm930, ibm-930 x-IBM933: ibm933, cp933, 933, ibm-933 x-IBM935: 935, cp935, ibm935, ibm-935 x-IBM937: cp937, ibm-937, ibm937, 937 x-IBM939: ibm-939, ibm939, cp939, 939 x-IBM942: cp942, ibm942, ibm-942, 942 x-IBM942C: ibm942C, cp942C, ibm-942C, 942C x-IBM943: ibm943, ibm-943, cp943, 943 x-IBM943C: ibm-943C, ibm943C, 943C, cp943C x-IBM948: 948, ibm-948, cp948, ibm948 x-IBM949: ibm-949, cp949, 949, ibm949 x-IBM949C: cp949C, 949C, ibm949C, ibm-949C x-IBM950: 950, cp950, ibm-950, ibm950 x-IBM964: 964, cp964, ibm-964, ibm964 x-IBM970: ibm970, 970, cp970, ibm-eucKR, ibm-970 x-ISCII91: iscii, ST_SEV_358-88, iso-ir-153, csISO153GOST1976874, ISCII91 x-ISO-2022-CN-CNS: ISO2022CN_CNS, ISO-2022-CN-CNS x-ISO-2022-CN-GB: ISO-2022-CN-GB, ISO2022CN_GB x-iso-8859-11: iso-8859-11, iso8859_11 x-JIS0208: JIS0208, csISO87JISX0208, x0208, JIS_C6226-1983, JIS_X0208-1983, iso-ir-87 x-JISAutoDetect: JISAutoDetect x-Johab: johab, ms1361, ksc5601-1992, ksc5601_1992 x-MacArabic: MacArabic x-MacCentralEurope: MacCentralEurope x-MacCroatian: MacCroatian x-MacCyrillic: MacCyrillic x-MacDingbat: MacDingbat x-MacGreek: MacGreek x-MacHebrew: MacHebrew x-MacIceland: MacIceland x-MacRoman: MacRoman x-MacRomania: MacRomania x-MacSymbol: MacSymbol x-MacThai: MacThai x-MacTurkish: MacTurkish x-MacUkraine: MacUkraine x-MS950-HKSCS: MS950_HKSCS x-mswin-936: ms936, ms_936 x-PCK: pck x-windows-874: windows-874, ms874, ms-874 x-windows-949: windows949, ms_949, ms949 x-windows-950: windows-950, ms950
Get the default charset
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
/**
* Various string manipulation methods that are more efficient then chaining
* string operations: all is done in the same buffer without creating a bunch of
* string objects.
*
* @author
*/
public class Main {
/**
* Get the default charset
*
* @return The default charset
*/
public static final String getDefaultCharsetName() throws Exception {
String defaultCharset;
try {
// Try with jdk 1.5 method, if we are using a 1.5 jdk :)
Method method = Charset.class.getMethod("defaultCharset", new Class[0]);
defaultCharset = ((Charset) method.invoke(null, new Object[0])).name();
} catch (NoSuchMethodException nsme) {
defaultCharset = new OutputStreamWriter(new ByteArrayOutputStream()).getEncoding();
} catch (InvocationTargetException ite) {
defaultCharset = new OutputStreamWriter(new ByteArrayOutputStream()).getEncoding();
} catch (IllegalAccessException iea) {
defaultCharset = new OutputStreamWriter(new ByteArrayOutputStream()).getEncoding();
} catch (RuntimeException e) {
// fall back to old method
defaultCharset = new OutputStreamWriter(new ByteArrayOutputStream()).getEncoding();
}
return defaultCharset;
}
}
read byte data into a byte buffer and convert byte data into character data
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
public class BufferConverter {
public static void main(String[] arguments) {
try {
String data = "friends.dat";
FileInputStream inData = new FileInputStream(data);
FileChannel inChannel = inData.getChannel();
long inSize = inChannel.size();
ByteBuffer source = ByteBuffer.allocate((int) inSize);
inChannel.read(source, 0);
source.position(0);
for (int i = 0; source.remaining() > 0; i++)
System.out.print(source.get() + " ");
source.position(0);
Charset ascii = Charset.forName("US-ASCII");
CharsetDecoder toAscii = ascii.newDecoder();
CharBuffer destination = toAscii.decode(source);
destination.position(0);
System.out.println("\n\nNew character data:");
for (int i = 0; destination.remaining() > 0; i++)
System.out.print(destination.get());
} catch (Exception ioe) {
System.out.println(ioe.getMessage());
}
}
}