Skip to content

Commit 4a8f2de

Browse files
authored
#1298 - Fix in LZWDecoder when valid PDF stream data does not start with LZW Clear Table code (256) (#1299)
* Issue 1298 - Fix in LZWDecoder when valid PDF stream data does not start with LZW Clear Table code (256), occurs in PostScript calculator functions in several PDFs we have encountered, changes include unit tests to cover both common LZW and this more uncommon scenario * Issue 1298 - Fixed up style violations in import statements in new LZWDecodedTest file * Issue 1298 - Removed unneeded throws IOException from new LZW decoder unit test * Issue 1298 - Simplified unexpected case in main LZW decode loop to not throw RuntimeException and silently proceed, not as big a fan of this change but cannot get past static code analyzers without it
1 parent 4305087 commit 4a8f2de

File tree

8 files changed

+378
-26
lines changed

8 files changed

+378
-26
lines changed

openpdf/src/main/java/com/lowagie/text/pdf/LZWDecoder.java

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -97,42 +97,33 @@ public void decode(byte[] data, OutputStream uncompData) {
9797
nextData = 0;
9898
nextBits = 0;
9999

100-
int code, oldCode = 0;
100+
int code;
101+
Integer oldCode = null;
101102
byte[] string;
102103

103104
while ((code = getNextCode()) != 257) {
104-
105105
if (code == 256) {
106-
106+
// Clear Table code, reset string table and previous code tracking state
107107
initializeStringTable();
108-
code = getNextCode();
109-
110-
if (code == 257) {
111-
break;
112-
}
113-
114-
writeString(stringTable[code]);
115-
oldCode = code;
116-
117-
} else {
118-
119-
if (code < tableIndex) {
108+
oldCode = null;
109+
continue;
110+
}
120111

121-
string = stringTable[code];
112+
if (code < tableIndex) {
113+
string = stringTable[code];
114+
writeString(string);
122115

123-
writeString(string);
116+
if (oldCode != null) {
124117
addStringToTable(stringTable[oldCode], string[0]);
125-
oldCode = code;
126-
127-
} else {
128-
129-
string = stringTable[oldCode];
130-
string = composeString(string, string[0]);
131-
writeString(string);
132-
addStringToTable(string);
133-
oldCode = code;
134118
}
119+
} else if (oldCode != null) {
120+
string = stringTable[oldCode];
121+
string = composeString(string, string[0]);
122+
writeString(string);
123+
addStringToTable(string);
135124
}
125+
126+
oldCode = code;
136127
}
137128
}
138129

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package com.lowagie.text.pdf;
2+
3+
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
4+
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
5+
import static org.junit.jupiter.api.Assertions.assertNotNull;
6+
7+
import java.io.ByteArrayOutputStream;
8+
import java.io.IOException;
9+
import java.io.InputStream;
10+
import org.junit.jupiter.api.Test;
11+
12+
class LZWDecoderTest {
13+
14+
@Test
15+
void shouldDecodeType4PSCalcFunction1() {
16+
// Test verifies fix for issue #1298 where LZW decoder gets lost when encoded data does not start with Clear
17+
// Table (256) code, which appears to happen with the short, text-based PostScript calculator functions in
18+
// colorspaces.
19+
20+
// This specific sample causes garbled output from LZWDecoder before the fix for issue #1298.
21+
assertDoesNotThrow(() -> testLzwDecoder(
22+
"/issue1298/lzw-ps-function-1-encoded.bin",
23+
"/issue1298/lzw-ps-function-1-decoded.txt"));
24+
}
25+
26+
@Test
27+
void shouldDecodeType4PSCalcFunction2() {
28+
// This specific sample is also an LZW encoded Type 4 PostScript calculator function that does not start with
29+
// LZW Clear Table code. This data caused the LZWDecoder to throw a null exception before the fix for #1298.
30+
assertDoesNotThrow(() -> testLzwDecoder(
31+
"/issue1298/lzw-ps-function-2-encoded.bin",
32+
"/issue1298/lzw-ps-function-2-decoded.txt"));
33+
}
34+
35+
@Test
36+
void shouldDecodeCmapData() {
37+
// This sample is the much more common case where LZW data starts with the Clear Table (256) code. LZWDecoder
38+
// was already decoding this sample perfectly. Included here to verify the fix and any future changes to
39+
// LZWDecoder do not break this more common case.
40+
assertDoesNotThrow(() -> testLzwDecoder(
41+
"/issue1298/lzw-cmap-table-encoded.bin",
42+
"/issue1298/lzw-cmap-table-decoded.txt"));
43+
}
44+
45+
private void testLzwDecoder(String encodedDataFile, String expectedDecodingFile)
46+
throws IOException {
47+
48+
// Get LZW encoded data from test resource. Actual data pulled from PDF streams in real-life PDF files.
49+
try (InputStream encodedStream = getClass().getResourceAsStream(encodedDataFile)) {
50+
// Read LZW encoded data taken from a PDF stream
51+
assertNotNull(encodedStream);
52+
byte[] encodedData = encodedStream.readAllBytes();
53+
54+
// Use LZWDecoder directly to decode this data. This decoder gets used in these calls:
55+
// PdfStream.getBytes(true)
56+
// PdfReader.getStreamBytes(PrStream)
57+
LZWDecoder decoder = new LZWDecoder();
58+
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
59+
decoder.decode(encodedData, outputStream);
60+
byte[] decodedData = outputStream.toByteArray();
61+
62+
// Read expected result and compare
63+
try (InputStream expectedStream = getClass().getResourceAsStream(expectedDecodingFile)) {
64+
assertNotNull(expectedStream);
65+
byte[] expectedData = expectedStream.readAllBytes();
66+
assertArrayEquals(expectedData, decodedData);
67+
}
68+
}
69+
}
70+
}
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
/CIDInit /ProcSet findresource begin
2+
12 dict begin
3+
begincmap
4+
CIDSystemInfo
5+
<< /Registry (Adobe)
6+
/Ordering (UCS) /Supplement 0 >> def
7+
/CMapName /Adobe-Identity-UCS def
8+
/CMapType 2 def
9+
1 begincodespacerange
10+
<00> <FF>
11+
endcodespacerange
12+
100 beginbfchar
13+
<01> <0141>
14+
<02> <2126>
15+
<04> <2113>
16+
<05> <2248>
17+
<06> <02D8>
18+
<07> <02C7>
19+
<08> <02D9>
20+
<09> <0131>
21+
<0A> <212E>
22+
<0D> <FB01>
23+
<0E> <FB02>
24+
<0F> <2044>
25+
<10> <2265>
26+
<11> <02DD>
27+
<12> <221E>
28+
<13> <222B>
29+
<14> <2264>
30+
<15> <25CA>
31+
<16> <0142>
32+
<17> <2212>
33+
<18> <2260>
34+
<1A> <02DB>
35+
<1C> <2202>
36+
<1D> <03C0>
37+
<1E> <220F>
38+
<1F> <221A>
39+
<20> <0020>
40+
<21> <0021>
41+
<22> <0022>
42+
<23> <0023>
43+
<24> <0024>
44+
<25> <0025>
45+
<26> <0026>
46+
<27> <0027>
47+
<28> <0028>
48+
<29> <0029>
49+
<2A> <002A>
50+
<2B> <002B>
51+
<2C> <002C>
52+
<2D> <002D>
53+
<2E> <002E>
54+
<2F> <002F>
55+
<30> <0030>
56+
<31> <0031>
57+
<32> <0032>
58+
<33> <0033>
59+
<34> <0034>
60+
<35> <0035>
61+
<36> <0036>
62+
<37> <0037>
63+
<38> <0038>
64+
<39> <0039>
65+
<3A> <003A>
66+
<3B> <003B>
67+
<3C> <003C>
68+
<3D> <003D>
69+
<3E> <003E>
70+
<3F> <003F>
71+
<40> <0040>
72+
<41> <0041>
73+
<42> <0042>
74+
<43> <0043>
75+
<44> <0044>
76+
<45> <0045>
77+
<46> <0046>
78+
<47> <0047>
79+
<48> <0048>
80+
<49> <0049>
81+
<4A> <004A>
82+
<4B> <004B>
83+
<4C> <004C>
84+
<4D> <004D>
85+
<4E> <004E>
86+
<4F> <004F>
87+
<50> <0050>
88+
<51> <0051>
89+
<52> <0052>
90+
<53> <0053>
91+
<54> <0054>
92+
<55> <0055>
93+
<56> <0056>
94+
<57> <0057>
95+
<58> <0058>
96+
<59> <0059>
97+
<5A> <005A>
98+
<5B> <005B>
99+
<5C> <005C>
100+
<5D> <005D>
101+
<5E> <005E>
102+
<5F> <005F>
103+
<60> <0060>
104+
<61> <0061>
105+
<62> <0062>
106+
<63> <0063>
107+
<64> <0064>
108+
<65> <0065>
109+
<66> <0066>
110+
<67> <0067>
111+
<68> <0068>
112+
<69> <0069>
113+
endbfchar
114+
100 beginbfchar
115+
<6A> <006A>
116+
<6B> <006B>
117+
<6C> <006C>
118+
<6D> <006D>
119+
<6E> <006E>
120+
<6F> <006F>
121+
<70> <0070>
122+
<71> <0071>
123+
<72> <0072>
124+
<73> <0073>
125+
<74> <0074>
126+
<75> <0075>
127+
<76> <0076>
128+
<77> <0077>
129+
<78> <0078>
130+
<79> <0079>
131+
<7A> <007A>
132+
<7B> <007B>
133+
<7C> <007C>
134+
<7D> <007D>
135+
<7E> <007E>
136+
<7F> <02DA>
137+
<80> <20AC>
138+
<81> <2211>
139+
<82> <201A>
140+
<83> <0192>
141+
<84> <201E>
142+
<85> <2026>
143+
<86> <2020>
144+
<87> <2021>
145+
<88> <02C6>
146+
<89> <2030>
147+
<8A> <0160>
148+
<8B> <2039>
149+
<8C> <0152>
150+
<8E> <017D>
151+
<90> <00A0>
152+
<91> <2018>
153+
<92> <2019>
154+
<93> <201C>
155+
<94> <201D>
156+
<95> <2022>
157+
<96> <2013>
158+
<97> <2014>
159+
<98> <02DC>
160+
<99> <2122>
161+
<9A> <0161>
162+
<9B> <203A>
163+
<9C> <0153>
164+
<9D> <00AD>
165+
<9E> <017E>
166+
<9F> <0178>
167+
<A0> <02C9>
168+
<A1> <00A1>
169+
<A2> <00A2>
170+
<A3> <00A3>
171+
<A4> <00A4>
172+
<A5> <00A5>
173+
<A6> <00A6>
174+
<A7> <00A7>
175+
<A8> <00A8>
176+
<A9> <00A9>
177+
<AA> <00AA>
178+
<AB> <00AB>
179+
<AC> <00AC>
180+
<AD> <03A9>
181+
<AE> <00AE>
182+
<AF> <00AF>
183+
<B0> <00B0>
184+
<B1> <00B1>
185+
<B2> <00B2>
186+
<B3> <00B3>
187+
<B4> <00B4>
188+
<B5> <00B5>
189+
<B6> <00B6>
190+
<B7> <00B7>
191+
<B8> <00B8>
192+
<B9> <00B9>
193+
<BA> <00BA>
194+
<BB> <00BB>
195+
<BC> <00BC>
196+
<BD> <00BD>
197+
<BE> <00BE>
198+
<BF> <00BF>
199+
<C0> <00C0>
200+
<C1> <00C1>
201+
<C2> <00C2>
202+
<C3> <00C3>
203+
<C4> <00C4>
204+
<C5> <00C5>
205+
<C6> <00C6>
206+
<C7> <00C7>
207+
<C8> <00C8>
208+
<C9> <00C9>
209+
<CA> <00CA>
210+
<CB> <00CB>
211+
<CC> <00CC>
212+
<CD> <00CD>
213+
<CE> <00CE>
214+
<CF> <00CF>
215+
endbfchar
216+
48 beginbfchar
217+
<D0> <00D0>
218+
<D1> <00D1>
219+
<D2> <00D2>
220+
<D3> <00D3>
221+
<D4> <00D4>
222+
<D5> <00D5>
223+
<D6> <00D6>
224+
<D7> <00D7>
225+
<D8> <00D8>
226+
<D9> <00D9>
227+
<DA> <00DA>
228+
<DB> <00DB>
229+
<DC> <00DC>
230+
<DD> <00DD>
231+
<DE> <00DE>
232+
<DF> <00DF>
233+
<E0> <00E0>
234+
<E1> <00E1>
235+
<E2> <00E2>
236+
<E3> <00E3>
237+
<E4> <00E4>
238+
<E5> <00E5>
239+
<E6> <00E6>
240+
<E7> <00E7>
241+
<E8> <00E8>
242+
<E9> <00E9>
243+
<EA> <00EA>
244+
<EB> <00EB>
245+
<EC> <00EC>
246+
<ED> <00ED>
247+
<EE> <00EE>
248+
<EF> <00EF>
249+
<F0> <00F0>
250+
<F1> <00F1>
251+
<F2> <00F2>
252+
<F3> <00F3>
253+
<F4> <00F4>
254+
<F5> <00F5>
255+
<F6> <00F6>
256+
<F7> <00F7>
257+
<F8> <00F8>
258+
<F9> <00F9>
259+
<FA> <00FA>
260+
<FB> <00FB>
261+
<FC> <00FC>
262+
<FD> <00FD>
263+
<FE> <00FE>
264+
<FF> <00FF>
265+
endbfchar
266+
endcmap CMapName currentdict /CMap defineresource pop end end
1.52 KB
Binary file not shown.

0 commit comments

Comments
 (0)