@@ -32,9 +32,37 @@ class Encoding
3232 public static final Map <String , String > LOCALEID_MAPPING = new HashMap <String , String >();
3333 static
3434 {
35+ // Comment lines based on: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx
36+
37+ // 037 IBM037 IBM EBCDIC US-Canada
38+ // 437 IBM437 OEM United States
39+ // 500 IBM500 IBM EBCDIC International
40+ // 708 ASMO-708 Arabic (ASMO 708)
41+ // 709 Arabic (ASMO-449+, BCON V4)
42+ // 710 Arabic - Transparent Arabic
43+ // 720 DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
44+ // 737 ibm737 OEM Greek (formerly 437G); Greek (DOS)
45+ // 775 ibm775 OEM Baltic; Baltic (DOS)
46+ // 850 ibm850 OEM Multilingual Latin 1; Western European (DOS)
47+ // 852 ibm852 OEM Latin 2; Central European (DOS)
48+ // 855 IBM855 OEM Cyrillic (primarily Russian)
49+ // 857 ibm857 OEM Turkish; Turkish (DOS)
50+ // 858 IBM00858 OEM Multilingual Latin 1 + Euro symbol
51+ // 860 IBM860 OEM Portuguese; Portuguese (DOS)
52+ // 861 ibm861 OEM Icelandic; Icelandic (DOS)
53+ // 862 DOS-862 OEM Hebrew; Hebrew (DOS)
54+ // 863 IBM863 OEM French Canadian; French Canadian (DOS)
55+ // 864 IBM864 OEM Arabic; Arabic (864)
56+ // 865 IBM865 OEM Nordic; Nordic (DOS)
57+ // 866 cp866 OEM Russian; Cyrillic (DOS)
58+ // 869 ibm869 OEM Modern Greek; Greek, Modern (DOS)
59+ // 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
60+ // 874 windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
61+ // 875 cp875 IBM EBCDIC Greek Modern
3562 LOCALEID_MAPPING .put ("932" , "SJIS" ); // Japanese
3663 LOCALEID_MAPPING .put ("936" , "Cp936" ); // Simplified Chinese
3764 LOCALEID_MAPPING .put ("949" , "Cp949" ); // Korean
65+ // 950 big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
3866 LOCALEID_MAPPING .put ("1025" , "Cp1256" ); // Arabic (Saudi Arabia)
3967 LOCALEID_MAPPING .put ("1026" , "Cp1251" ); // Bulgarian
4068 LOCALEID_MAPPING .put ("1028" , "Cp950" ); // Chinese (Taiwan)
@@ -45,6 +73,7 @@ class Encoding
4573 LOCALEID_MAPPING .put ("1041" , "SJIS" ); // Japanese
4674 LOCALEID_MAPPING .put ("1042" , "Cp949" ); // Korean
4775 LOCALEID_MAPPING .put ("1045" , "Cp1250" ); // Polish
76+ // 1047 IBM01047 IBM EBCDIC Latin 1/Open System
4877 LOCALEID_MAPPING .put ("1048" , "Cp1250" ); // Romanian
4978 LOCALEID_MAPPING .put ("1049" , "Cp1251" ); // Russian
5079 LOCALEID_MAPPING .put ("1050" , "Cp1250" ); // Croatian
@@ -68,13 +97,28 @@ class Encoding
6897 LOCALEID_MAPPING .put ("1091" , "Cp1254" ); // Uzbek (Latin)
6998 LOCALEID_MAPPING .put ("1092" , "Cp1251" ); // Tatar
7099 LOCALEID_MAPPING .put ("1104" , "Cp1251" ); // Mongolian (Cyrillic)
100+ // 1140 IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
101+ // 1141 IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
102+ // 1142 IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
103+ // 1143 IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
104+ // 1144 IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
105+ // 1145 IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
106+ // 1146 IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
107+ // 1147 IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
108+ // 1148 IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
109+ // 1149 IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
110+ // 1200 utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646)
111+ // 1201 unicodeFFFE Unicode UTF-16, big endian byte order
71112 LOCALEID_MAPPING .put ("1250" , "Cp1250" ); // Windows Latin 2 (Central Europe)
72113 LOCALEID_MAPPING .put ("1251" , "Cp1251" ); // Cyrillic
73114 LOCALEID_MAPPING .put ("1252" , "Cp1252" ); // Latin
74115 LOCALEID_MAPPING .put ("1253" , "Cp1253" ); // Greek
75116 LOCALEID_MAPPING .put ("1254" , "Cp1254" ); // Turkish
76117 LOCALEID_MAPPING .put ("1255" , "Cp1255" ); // Windows Hebrew
77118 LOCALEID_MAPPING .put ("1256" , "Cp1256" ); // Arabic (Iraq)
119+ LOCALEID_MAPPING .put ("1257" , "Cp1257" ); // Baltic
120+ LOCALEID_MAPPING .put ("1258" , "Cp1258" ); // Vietnamese
121+ // 1361 Johab Korean (Johab)
78122 LOCALEID_MAPPING .put ("2049" , "Cp1256" ); // Arabic (Iraq)
79123 LOCALEID_MAPPING .put ("2052" , "MS936" ); // Chinese (PRC)
80124 LOCALEID_MAPPING .put ("2074" , "Cp1250" ); // Serbian (Latin)
@@ -92,12 +136,110 @@ class Encoding
92136 LOCALEID_MAPPING .put ("8193" , "Cp1256" ); // Arabic (Oman)
93137 LOCALEID_MAPPING .put ("9217" , "Cp1256" ); // Arabic (Yemen)
94138 LOCALEID_MAPPING .put ("10000" , "MacRoman" ); // Mac Roman
139+ // 10001 x-mac-japanese Japanese (Mac)
140+ // 10002 x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
141+ // 10003 x-mac-korean Korean (Mac)
142+ // 10004 x-mac-arabic Arabic (Mac)
143+ // 10005 x-mac-hebrew Hebrew (Mac)
144+ // 10006 x-mac-greek Greek (Mac)
145+ // 10007 x-mac-cyrillic Cyrillic (Mac)
146+ // 10008 x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
147+ // 10010 x-mac-romanian Romanian (Mac)
148+ // 10017 x-mac-ukrainian Ukrainian (Mac)
149+ // 10021 x-mac-thai Thai (Mac)
150+ // 10029 x-mac-ce MAC Latin 2; Central European (Mac)
151+ // 10079 x-mac-icelandic Icelandic (Mac)
152+ // 10081 x-mac-turkish Turkish (Mac)
153+ // 10082 x-mac-croatian Croatian (Mac)
95154 LOCALEID_MAPPING .put ("10241" , "Cp1256" ); // Arabic (Syria)
96155 LOCALEID_MAPPING .put ("11265" , "Cp1256" ); // Arabic (Jordan)
156+ // 12000 utf-32 Unicode UTF-32, little endian byte order
157+ // 12001 utf-32BE Unicode UTF-32, big endian byte order
97158 LOCALEID_MAPPING .put ("12289" , "Cp1256" ); // Arabic (Lebanon)
98159 LOCALEID_MAPPING .put ("13313" , "Cp1256" ); // Arabic (Kuwait)
99160 LOCALEID_MAPPING .put ("14337" , "Cp1256" ); // Arabic (U.A.E.)
100161 LOCALEID_MAPPING .put ("15361" , "Cp1256" ); // Arabic (Bahrain)
101162 LOCALEID_MAPPING .put ("16385" , "Cp1256" ); // Arabic (Qatar)
163+ // 20000 x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
164+ // 20001 x-cp20001 TCA Taiwan
165+ // 20002 x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
166+ // 20003 x-cp20003 IBM5550 Taiwan
167+ // 20004 x-cp20004 TeleText Taiwan
168+ // 20005 x-cp20005 Wang Taiwan
169+ // 20105 x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
170+ // 20106 x-IA5-German IA5 German (7-bit)
171+ // 20107 x-IA5-Swedish IA5 Swedish (7-bit)
172+ // 20108 x-IA5-Norwegian IA5 Norwegian (7-bit)
173+ // 20127 us-ascii US-ASCII (7-bit)
174+ // 20261 x-cp20261 T.61
175+ // 20269 x-cp20269 ISO 6937 Non-Spacing Accent
176+ // 20273 IBM273 IBM EBCDIC Germany
177+ // 20277 IBM277 IBM EBCDIC Denmark-Norway
178+ // 20278 IBM278 IBM EBCDIC Finland-Sweden
179+ // 20280 IBM280 IBM EBCDIC Italy
180+ // 20284 IBM284 IBM EBCDIC Latin America-Spain
181+ // 20285 IBM285 IBM EBCDIC United Kingdom
182+ // 20290 IBM290 IBM EBCDIC Japanese Katakana Extended
183+ // 20297 IBM297 IBM EBCDIC France
184+ // 20420 IBM420 IBM EBCDIC Arabic
185+ // 20423 IBM423 IBM EBCDIC Greek
186+ // 20424 IBM424 IBM EBCDIC Hebrew
187+ // 20833 x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
188+ // 20838 IBM-Thai IBM EBCDIC Thai
189+ // 20866 koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
190+ // 20871 IBM871 IBM EBCDIC Icelandic
191+ // 20880 IBM880 IBM EBCDIC Cyrillic Russian
192+ // 20905 IBM905 IBM EBCDIC Turkish
193+ // 20924 IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
194+ // 20932 EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
195+ // 20936 x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
196+ // 20949 x-cp20949 Korean Wansung
197+ // 21025 cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
198+ // 21027 (deprecated)
199+ // 21866 koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
200+ // 28591 iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
201+ // 28592 iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
202+ // 28593 iso-8859-3 ISO 8859-3 Latin 3
203+ // 28594 iso-8859-4 ISO 8859-4 Baltic
204+ // 28595 iso-8859-5 ISO 8859-5 Cyrillic
205+ // 28596 iso-8859-6 ISO 8859-6 Arabic
206+ // 28597 iso-8859-7 ISO 8859-7 Greek
207+ // 28598 iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
208+ // 28599 iso-8859-9 ISO 8859-9 Turkish
209+ // 28603 iso-8859-13 ISO 8859-13 Estonian
210+ // 28605 iso-8859-15 ISO 8859-15 Latin 9
211+ // 29001 x-Europa Europa 3
212+ // 38598 iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
213+ // 50220 iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
214+ // 50221 csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
215+ // 50222 iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
216+ // 50225 iso-2022-kr ISO 2022 Korean
217+ // 50227 x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
218+ // 50229 ISO 2022 Traditional Chinese
219+ // 50930 EBCDIC Japanese (Katakana) Extended
220+ // 50931 EBCDIC US-Canada and Japanese
221+ // 50933 EBCDIC Korean Extended and Korean
222+ // 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese
223+ // 50936 EBCDIC Simplified Chinese
224+ // 50937 EBCDIC US-Canada and Traditional Chinese
225+ // 50939 EBCDIC Japanese (Latin) Extended and Japanese
226+ // 51932 euc-jp EUC Japanese
227+ // 51936 EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
228+ // 51949 euc-kr EUC Korean
229+ // 51950 EUC Traditional Chinese
230+ // 52936 hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
231+ // 54936 GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
232+ // 57002 x-iscii-de ISCII Devanagari
233+ // 57003 x-iscii-be ISCII Bangla
234+ // 57004 x-iscii-ta ISCII Tamil
235+ // 57005 x-iscii-te ISCII Telugu
236+ // 57006 x-iscii-as ISCII Assamese
237+ // 57007 x-iscii-or ISCII Odia
238+ // 57008 x-iscii-ka ISCII Kannada
239+ // 57009 x-iscii-ma ISCII Malayalam
240+ // 57010 x-iscii-gu ISCII Gujarati
241+ // 57011 x-iscii-pa ISCII Punjabi
242+ LOCALEID_MAPPING .put ("65000" , null ); // UTF-7 - not a supported Java encoding, see: http://stackoverflow.com/questions/19861987/java-io-unsupportedencodingexception-unicode-1-1-utf-7
243+ LOCALEID_MAPPING .put ("65001" , "UTF-8" ); // UTF-8
102244 }
103245}
0 commit comments