Skip to content

Commit 49da4bf

Browse files
committed
Add support for additional code pages
(437, 874, 950, 10001, 10004, 10005, 10006, 10007, 10010, 10017, 10021, 10029, 10079, 10081, 10082)
1 parent 87c1892 commit 49da4bf

File tree

12 files changed

+1190
-15
lines changed

12 files changed

+1190
-15
lines changed

RTF Parser Kit/src/com/rtfparserkit/parser/standard/Encoding.java

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class Encoding
3535
// Comment lines based on: https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx
3636

3737
// 037 IBM037 IBM EBCDIC US-Canada
38-
// 437 IBM437 OEM United States
38+
LOCALEID_MAPPING.put("437", "Cp437"); // IBM437 OEM United States
3939
// 500 IBM500 IBM EBCDIC International
4040
// 708 ASMO-708 Arabic (ASMO 708)
4141
// 709 Arabic (ASMO-449+, BCON V4)
@@ -57,12 +57,12 @@ class Encoding
5757
// 866 cp866 OEM Russian; Cyrillic (DOS)
5858
// 869 ibm869 OEM Modern Greek; Greek, Modern (DOS)
5959
// 870 IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
60-
// 874 windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
60+
LOCALEID_MAPPING.put("874", "Cp874"); // windows-874 ANSI/OEM Thai (ISO 8859-11); Thai (Windows)
6161
// 875 cp875 IBM EBCDIC Greek Modern
6262
LOCALEID_MAPPING.put("932", "SJIS"); // Japanese
6363
LOCALEID_MAPPING.put("936", "Cp936"); // Simplified Chinese
6464
LOCALEID_MAPPING.put("949", "Cp949"); // Korean
65-
// 950 big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
65+
LOCALEID_MAPPING.put("950", "Cp950"); // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
6666
LOCALEID_MAPPING.put("1025", "Cp1256"); // Arabic (Saudi Arabia)
6767
LOCALEID_MAPPING.put("1026", "Cp1251"); // Bulgarian
6868
LOCALEID_MAPPING.put("1028", "Cp950"); // Chinese (Taiwan)
@@ -136,21 +136,21 @@ class Encoding
136136
LOCALEID_MAPPING.put("8193", "Cp1256"); // Arabic (Oman)
137137
LOCALEID_MAPPING.put("9217", "Cp1256"); // Arabic (Yemen)
138138
LOCALEID_MAPPING.put("10000", "MacRoman"); // Mac Roman
139-
// 10001 x-mac-japanese Japanese (Mac)
139+
LOCALEID_MAPPING.put("10001", "Shift_JIS"); // x-mac-japanese Japanese (Mac)
140140
// 10002 x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
141141
// 10003 x-mac-korean Korean (Mac)
142-
// 10004 x-mac-arabic Arabic (Mac)
143-
// 10005 x-mac-hebrew Hebrew (Mac)
144-
// 10006 x-mac-greek Greek (Mac)
145-
// 10007 x-mac-cyrillic Cyrillic (Mac)
142+
LOCALEID_MAPPING.put("10004", "x-MacArabic"); // x-mac-arabic Arabic (Mac)
143+
LOCALEID_MAPPING.put("10005", "x-MacHebrew"); // x-mac-hebrew Hebrew (Mac)
144+
LOCALEID_MAPPING.put("10006", "x-MacHebrew"); // x-mac-greek Greek (Mac)
145+
LOCALEID_MAPPING.put("10007", "x-MacCyrillic"); // x-mac-cyrillic Cyrillic (Mac)
146146
// 10008 x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
147-
// 10010 x-mac-romanian Romanian (Mac)
148-
// 10017 x-mac-ukrainian Ukrainian (Mac)
149-
// 10021 x-mac-thai Thai (Mac)
150-
// 10029 x-mac-ce MAC Latin 2; Central European (Mac)
151-
// 10079 x-mac-icelandic Icelandic (Mac)
152-
// 10081 x-mac-turkish Turkish (Mac)
153-
// 10082 x-mac-croatian Croatian (Mac)
147+
LOCALEID_MAPPING.put("10010", "x-MacRomania"); // x-mac-romanian Romanian (Mac)
148+
LOCALEID_MAPPING.put("10017", "x-MacUkraine"); // x-mac-ukrainian Ukrainian (Mac)
149+
LOCALEID_MAPPING.put("10021", "x-MacThai"); // x-mac-thai Thai (Mac)
150+
LOCALEID_MAPPING.put("10029", "x-MacCentralEurope"); // x-mac-ce MAC Latin 2; Central European (Mac)
151+
LOCALEID_MAPPING.put("10079", "x-MacIceland"); // x-mac-icelandic Icelandic (Mac)
152+
LOCALEID_MAPPING.put("10081", "x-MacTurkish"); // x-mac-turkish Turkish (Mac)
153+
LOCALEID_MAPPING.put("10082", "x-MacCroatian"); // x-mac-croatian Croatian (Mac)
154154
LOCALEID_MAPPING.put("10241", "Cp1256"); // Arabic (Syria)
155155
LOCALEID_MAPPING.put("11265", "Cp1256"); // Arabic (Jordan)
156156
// 12000 utf-32 Unicode UTF-32, little endian byte order

RTF Parser Kit/test/com/rtfparserkit/parser/standard/StandardRtfParserTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,36 @@ public void testGreekEncoding() throws Exception
8989
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "testGreekEncoding");
9090
}
9191

92+
@Test
93+
public void test437Encoding() throws Exception
94+
{
95+
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test437Encoding");
96+
}
97+
98+
@Test
99+
public void test874Encoding() throws Exception
100+
{
101+
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test874Encoding");
102+
}
103+
104+
@Test
105+
public void test950Encoding() throws Exception
106+
{
107+
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test950Encoding");
108+
}
109+
110+
@Test
111+
public void test10001Encoding() throws Exception
112+
{
113+
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10001Encoding");
114+
}
115+
116+
@Test
117+
public void test10007Encoding() throws Exception
118+
{
119+
TestUtilities.assertRtfParserDumpMatches(this, new StandardRtfParser(), "test10007Encoding");
120+
}
121+
92122
@Test
93123
public void testKoreanEncoding() throws Exception
94124
{
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{\rtf1\mac\ansicpg10001\cocoartf824\cocoasubrtf230
2+
{\fonttbl\f0\fnil\fcharset78 HiraKakuPro-W6;\f1\fswiss\fcharset77 Helvetica-Bold;\f2\fswiss\fcharset77 Helvetica;
3+
\f3\fnil\fcharset78 HiraKakuPro-W3;}
4+
{\colortbl;\red255\green255\blue255;}
5+
{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc2\leveljcn2\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid0\'02\'05.;}{\levelnumbers\'01;}}{\listname ;}\listid1}}
6+
{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}
7+
\margl1440\margr1440\vieww12240\viewh8980\viewkind0
8+
\pard\tx220\tx720\tx1133\tx1700\tx2267\tx2834\tx3401\tx3968\tx4535\tx5102\tx5669\tx6236\tx6803\li720\fi-720\ql\qnatural\pardirnatural
9+
\ls1\ilvl0
10+
\f0\b\fs50 \cf0 \'82\'a8\'93\'c7\'82\'dd\'82\'ad\'82\'be\'82\'b3\'82\'a2
11+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<rtf>
3+
<group>
4+
<command name="rtf" parameter="1"/>
5+
<command name="cocoartf" parameter="824"/>
6+
<command name="cocoasubrtf" parameter="230"/>
7+
<group>
8+
<command name="fonttbl"/>
9+
<command name="f" parameter="0"/>
10+
<command name="fnil"/>
11+
<command name="fcharset" parameter="78"/>
12+
<chars>HiraKakuPro-W6;</chars>
13+
<command name="f" parameter="1"/>
14+
<command name="fswiss"/>
15+
<command name="fcharset" parameter="77"/>
16+
<chars>Helvetica-Bold;</chars>
17+
<command name="f" parameter="2"/>
18+
<command name="fswiss"/>
19+
<command name="fcharset" parameter="77"/>
20+
<chars>Helvetica;</chars>
21+
<command name="f" parameter="3"/>
22+
<command name="fnil"/>
23+
<command name="fcharset" parameter="78"/>
24+
<chars>HiraKakuPro-W3;</chars>
25+
</group>
26+
<group>
27+
<command name="colortbl"/>
28+
<chars>;</chars>
29+
<command name="red" parameter="255"/>
30+
<command name="green" parameter="255"/>
31+
<command name="blue" parameter="255"/>
32+
<chars>;</chars>
33+
</group>
34+
<group>
35+
<command name="listtable" optional="true"/>
36+
<group>
37+
<command name="list"/>
38+
<command name="listtemplateid" parameter="1"/>
39+
<command name="listhybrid"/>
40+
<group>
41+
<command name="listlevel"/>
42+
<command name="levelnfc" parameter="23"/>
43+
<command name="levelnfcn" parameter="23"/>
44+
<command name="leveljc" parameter="2"/>
45+
<command name="leveljcn" parameter="2"/>
46+
<command name="levelfollow" parameter="0"/>
47+
<command name="levelstartat" parameter="1"/>
48+
<command name="levelspace" parameter="360"/>
49+
<command name="levelindent" parameter="0"/>
50+
<group>
51+
<chars>{disc}</chars>
52+
</group>
53+
<group>
54+
<command name="leveltext"/>
55+
<command name="leveltemplateid" parameter="0"/>
56+
<chars>.;</chars>
57+
</group>
58+
<group>
59+
<command name="levelnumbers"/>
60+
<chars>;</chars>
61+
</group>
62+
</group>
63+
<group>
64+
<command name="listname"/>
65+
<chars>;</chars>
66+
</group>
67+
<command name="listid" parameter="1"/>
68+
</group>
69+
</group>
70+
<group>
71+
<command name="listoverridetable" optional="true"/>
72+
<group>
73+
<command name="listoverride"/>
74+
<command name="listid" parameter="1"/>
75+
<command name="listoverridecount" parameter="0"/>
76+
<command name="ls" parameter="1"/>
77+
</group>
78+
</group>
79+
<command name="margl" parameter="1440"/>
80+
<command name="margr" parameter="1440"/>
81+
<command name="viewkind" parameter="0"/>
82+
<command name="pard"/>
83+
<command name="tx" parameter="220"/>
84+
<command name="tx" parameter="720"/>
85+
<command name="tx" parameter="1133"/>
86+
<command name="tx" parameter="1700"/>
87+
<command name="tx" parameter="2267"/>
88+
<command name="tx" parameter="2834"/>
89+
<command name="tx" parameter="3401"/>
90+
<command name="tx" parameter="3968"/>
91+
<command name="tx" parameter="4535"/>
92+
<command name="tx" parameter="5102"/>
93+
<command name="tx" parameter="5669"/>
94+
<command name="tx" parameter="6236"/>
95+
<command name="tx" parameter="6803"/>
96+
<command name="li" parameter="720"/>
97+
<command name="fi" parameter="-720"/>
98+
<command name="ql"/>
99+
<command name="ls" parameter="1"/>
100+
<command name="ilvl" parameter="0"/>
101+
<command name="f" parameter="0"/>
102+
<command name="b"/>
103+
<command name="fs" parameter="50"/>
104+
<command name="cf" parameter="0"/>
105+
<chars>お読みください</chars>
106+
</group>
107+
</rtf>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{\rtf1\mac\ansicpg10007\cocoartf102
2+
{\fonttbl\f0\fnil\fcharset77 LucidaGrande;\f1\fnil\fcharset77 Georgia;\f2\fnil\fcharset77 Verdana;
3+
}
4+
{\colortbl;\red255\green255\blue255;}
5+
\margl1440\margr1440\vieww16780\viewh13600\viewkind0
6+
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\ql\qnatural
7+
8+
\f0\fs36 \cf0 \uc0\u1050 \u1086 \u1084 \u1084 \u1072 \u1085 \u1076 \u1072 \u1088 \u1072 \u1079 \u1088 \u1072 \u1073 \u1086 \u1090 \u1095 \u1080 \u1082 \u1086 \u1074
9+
\f1 \
10+
}
11+
12+
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<rtf>
3+
<group>
4+
<command name="rtf" parameter="1"/>
5+
<command name="cocoartf" parameter="102"/>
6+
<group>
7+
<command name="fonttbl"/>
8+
<command name="f" parameter="0"/>
9+
<command name="fnil"/>
10+
<command name="fcharset" parameter="77"/>
11+
<chars>LucidaGrande;</chars>
12+
<command name="f" parameter="1"/>
13+
<command name="fnil"/>
14+
<command name="fcharset" parameter="77"/>
15+
<chars>Georgia;</chars>
16+
<command name="f" parameter="2"/>
17+
<command name="fnil"/>
18+
<command name="fcharset" parameter="77"/>
19+
<chars>Verdana;</chars>
20+
</group>
21+
<group>
22+
<command name="colortbl"/>
23+
<chars>;</chars>
24+
<command name="red" parameter="255"/>
25+
<command name="green" parameter="255"/>
26+
<command name="blue" parameter="255"/>
27+
<chars>;</chars>
28+
</group>
29+
<command name="margl" parameter="1440"/>
30+
<command name="margr" parameter="1440"/>
31+
<command name="viewkind" parameter="0"/>
32+
<command name="pard"/>
33+
<command name="tx" parameter="720"/>
34+
<command name="tx" parameter="1440"/>
35+
<command name="tx" parameter="2160"/>
36+
<command name="tx" parameter="2880"/>
37+
<command name="tx" parameter="3600"/>
38+
<command name="tx" parameter="4320"/>
39+
<command name="tx" parameter="5040"/>
40+
<command name="tx" parameter="5760"/>
41+
<command name="tx" parameter="6480"/>
42+
<command name="tx" parameter="7200"/>
43+
<command name="tx" parameter="7920"/>
44+
<command name="tx" parameter="8640"/>
45+
<command name="ql"/>
46+
<command name="f" parameter="0"/>
47+
<command name="fs" parameter="36"/>
48+
<command name="cf" parameter="0"/>
49+
<chars>Комманда разработчиков </chars>
50+
<command name="f" parameter="1"/>
51+
<command name="par"/>
52+
</group>
53+
</rtf>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
{\rtf1 \mac \ansicpg437 \cocoartf102 {\fonttbl {\f0 \fnil \fcharset77 Times New Roman{\*\falt Times}
2+
;}
3+
{\f1 \fnil \fcharset77 Helvetica-Bold{\*\falt Helvetica}
4+
;}
5+
{\f2 \fnil \fcharset77 Helvetica;}
6+
{\f3 \fnil \fcharset77 LucidaGrande{\*\falt Lucida Grande}
7+
;}
8+
}
9+
{\colortbl ;\red0 \green0 \blue0 ;}
10+
{\stylesheet {\*\cs335 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.}
11+
\super footnote reference;}
12+
{\*\cs336 {\*\nsmpltxt The quick brown fox jumped over the lazy dogs.}
13+
\super endnote reference;}
14+
{\s337 \nisusnoteplacement0 \nisusreferencestyle335 {\*\nsmpltxt Some text goes here so you can see what your style will look like.}
15+
\f3 footnote text;}
16+
{\s338 \nisusnoteplacement1 \nisusreferencestyle336 {\*\nsmpltxt Sample text for Foot/End Notes Style}
17+
\f3 endnote text;}
18+
}
19+
\deftab720 \defformat \viewkind1 \viewzk1 {\*\nisuswindow \x70 \y194 \w741 \h638 }
20+
\nshwinv0 \nshwpg1 \hyphauto0 \ftnnar \endnotes \aendnotes \aftnnar \fet2 \ftnbj \paperw12240 \paperh15840 \margl1440 \margr1440 \margt1440 \margb1440 \gutter0 \pgnstart1 \nocolbal \sectd \sbknone \cols1 \ltrsect \colbalsxn0 \marglsxn1440 \margrsxn1440 \margtsxn1440 \margbsxn1440 \guttersxn0 \headery720 \footery720 \pgnstarts1 \pgnrestart \pgndec \sxnstarts1 \sxnrestart \sxndec {\header \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par }
21+
{\footer \pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 \par }
22+
{\pard \ql \sb0 \sa0 \sl240 \slmult1 \ilvl0 \li0 \lin0 \fi0 \ri0 \rin0 {\f1 \fs24 \b \cf1 Test Document\par
23+
\f2 \b0 \par
24+
\b Test Title: \tab (Test Subtitle)\par
25+
\b0 \par
26+
\b Test Heading:\b0 \par
27+
Test Text. \par}
28+
}
29+
}

0 commit comments

Comments
 (0)