Skip to content

Commit e0df12d

Browse files
committed
Fix processing the end of an inline image.
DEVSIX-1914
1 parent 9ffb21a commit e0df12d

File tree

2 files changed

+78
-58
lines changed

2 files changed

+78
-58
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java

Lines changed: 21 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ This file is part of the iText (R) project.
4343
*/
4444
package com.itextpdf.kernel.pdf.canvas.parser.util;
4545

46-
import com.itextpdf.kernel.PdfException;
4746
import com.itextpdf.io.source.PdfTokenizer;
47+
import com.itextpdf.kernel.PdfException;
4848
import com.itextpdf.kernel.pdf.PdfArray;
4949
import com.itextpdf.kernel.pdf.PdfDictionary;
5050
import com.itextpdf.kernel.pdf.PdfName;
@@ -53,8 +53,9 @@ This file is part of the iText (R) project.
5353
import com.itextpdf.kernel.pdf.PdfReader;
5454
import com.itextpdf.kernel.pdf.PdfStream;
5555
import com.itextpdf.kernel.pdf.filters.DoNothingFilter;
56-
import com.itextpdf.kernel.pdf.filters.IFilterHandler;
5756
import com.itextpdf.kernel.pdf.filters.FilterHandlers;
57+
import com.itextpdf.kernel.pdf.filters.FlateDecodeFilter;
58+
import com.itextpdf.kernel.pdf.filters.IFilterHandler;
5859

5960
import java.io.ByteArrayOutputStream;
6061
import java.io.IOException;
@@ -67,6 +68,8 @@ This file is part of the iText (R) project.
6768
*/
6869
public final class InlineImageParsingUtils {
6970

71+
private static final byte[] EI = new byte[]{'E', 'I'};
72+
7073
private InlineImageParsingUtils() {
7174
}
7275

@@ -341,54 +344,30 @@ private static byte[] parseSamples(PdfDictionary imageDictionary, PdfDictionary
341344
}
342345

343346

344-
// read all content until we reach an EI operator surrounded by whitespace.
345-
// The following algorithm has two potential issues: what if the image stream
346-
// contains <ws>EI<ws> ?
347-
// Plus, there are some streams that don't have the <ws> before the EI operator
348-
// it sounds like we would have to actually decode the content stream, which
349-
// I'd rather avoid right now.
347+
// read all content until we reach an EI operator followed by whitespace.
348+
// then decode the content stream to check that bytes that were parsed are really all image bytes
350349
ByteArrayOutputStream baos = new ByteArrayOutputStream();
351-
ByteArrayOutputStream accumulated = new ByteArrayOutputStream();
352350
int ch;
353351
int found = 0;
354352
PdfTokenizer tokeniser = ps.getTokeniser();
355-
356353
while ((ch = tokeniser.read()) != -1) {
357-
if (found == 0 && PdfTokenizer.isWhitespace(ch)) {
358-
found++;
359-
accumulated.write(ch);
360-
} else if (found == 1 && ch == 'E') {
361-
found++;
362-
accumulated.write(ch);
363-
} else if (found == 1 && PdfTokenizer.isWhitespace(ch)) {
364-
// this clause is needed if we have a white space character that is part of the image data
365-
// followed by a whitespace character that precedes the EI operator. In this case, we need
366-
// to flush the first whitespace, then treat the current whitespace as the first potential
367-
// character for the end of stream check. Note that we don't increment 'found' here.
368-
baos.write(accumulated.toByteArray());
369-
accumulated.reset();
370-
accumulated.write(ch);
371-
} else if (found == 2 && ch == 'I') {
372-
found++;
373-
accumulated.write(ch);
374-
} else if (found == 3 && PdfTokenizer.isWhitespace(ch)) {
375-
byte[] tmp = baos.toByteArray();
376-
if (inlineImageStreamBytesAreComplete(tmp, imageDictionary)) {
377-
return tmp;
378-
}
379-
baos.write(accumulated.toByteArray());
380-
accumulated.reset();
381-
382-
baos.write(ch);
383-
found = 0;
384-
354+
if (ch == 'E') {
355+
baos.write(EI, 0, found); // probably some bytes were preserved so write them
356+
found = 1; // just preserve 'E' and do not write it immediately
357+
} else if (found == 1 && ch == 'I') {
358+
found = 2; // just preserve 'EI' and do not write it immediately
385359
} else {
386-
baos.write(accumulated.toByteArray());
387-
accumulated.reset();
388-
360+
if (found == 2 && PdfTokenizer.isWhitespace(ch)) {
361+
byte[] tmp = baos.toByteArray();
362+
if (inlineImageStreamBytesAreComplete(tmp, imageDictionary)) {
363+
return tmp;
364+
}
365+
}
366+
baos.write(EI, 0, found); // probably some bytes were preserved so write them
389367
baos.write(ch);
390368
found = 0;
391369
}
370+
392371
}
393372
throw new InlineImageParseException(PdfException.CannotFindImageDataOrEI);
394373
}
@@ -418,6 +397,7 @@ private static boolean inlineImageStreamBytesAreComplete(byte[] samples, PdfDict
418397
filters.put(PdfName.DCTDecode, stubfilter);
419398
filters.put(PdfName.JBIG2Decode, stubfilter);
420399
filters.put(PdfName.JPXDecode, stubfilter);
400+
((FlateDecodeFilter) filters.get(PdfName.FlateDecode)).setStrictDecoding(true);
421401
PdfReader.decodeBytes(samples, imageDictionary, filters);
422402
} catch (Exception ex) {
423403
return false;

kernel/src/main/java/com/itextpdf/kernel/pdf/filters/FlateDecodeFilter.java

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,61 @@ This file is part of the iText (R) project.
6060
*/
6161
public class FlateDecodeFilter implements IFilterHandler {
6262

63+
/**
64+
* Defines how the corrupted streams should be treated.
65+
*/
66+
private boolean strictDecoding = false;
67+
68+
/**
69+
* Creates a FlateDecodeFilter.
70+
*/
71+
public FlateDecodeFilter() {
72+
this(false);
73+
}
74+
75+
/**
76+
* Creates a FlateDecodeFilter.
77+
*
78+
* @param strictDecoding defines whether the decoder will try to read a corrupted stream
79+
*/
80+
public FlateDecodeFilter(boolean strictDecoding) {
81+
this.strictDecoding = strictDecoding;
82+
}
83+
84+
/**
85+
* Checks whether the decoder will try to read a corrupted stream (not strict) or not (strict)
86+
*
87+
* @return true if the decoder will try to read a corrupted stream otherwise false
88+
*/
89+
public boolean isStrictDecoding() {
90+
return strictDecoding;
91+
}
92+
93+
/**
94+
* Defines how the corrupted streams should be treated.
95+
*
96+
* @param strict true if the decoder should try to read a corrupted stream otherwise false
97+
* @return the decoder
98+
*/
99+
public FlateDecodeFilter setStrictDecoding(boolean strict) {
100+
this.strictDecoding = strict;
101+
return this;
102+
}
103+
63104
@Override
64105
public byte[] decode(byte[] b, PdfName filterName, PdfObject decodeParams, PdfDictionary streamDictionary) {
65106
byte[] res = flateDecode(b, true);
66-
if (res == null)
107+
if (res == null && !strictDecoding) {
67108
res = flateDecode(b, false);
109+
}
68110
b = decodePredictor(res, decodeParams);
69111
return b;
70112
}
71113

72114
/**
73115
* A helper to flateDecode.
74116
*
75-
* @param in the input data
117+
* @param in the input data
76118
* @param strict {@code true} to read a correct stream. {@code false} to try to read a corrupted stream.
77119
* @return the decoded data
78120
*/
@@ -89,45 +131,44 @@ public static byte[] flateDecode(byte[] in, boolean strict) {
89131
zip.close();
90132
out.close();
91133
return out.toByteArray();
92-
}
93-
catch (Exception e) {
134+
} catch (Exception e) {
94135
if (strict)
95136
return null;
96137
return out.toByteArray();
97138
}
98139
}
99140

100141
/**
101-
* @param in Input byte array.
142+
* @param in Input byte array.
102143
* @param decodeParams PdfDictionary of decodeParams.
103144
* @return a byte array
104145
*/
105146
public static byte[] decodePredictor(byte[] in, PdfObject decodeParams) {
106147
if (decodeParams == null || decodeParams.getType() != PdfObject.DICTIONARY)
107148
return in;
108-
PdfDictionary dic = (PdfDictionary)decodeParams;
149+
PdfDictionary dic = (PdfDictionary) decodeParams;
109150
PdfObject obj = dic.get(PdfName.Predictor);
110151
if (obj == null || obj.getType() != PdfObject.NUMBER)
111152
return in;
112-
int predictor = ((PdfNumber)obj).intValue();
153+
int predictor = ((PdfNumber) obj).intValue();
113154
if (predictor < 10 && predictor != 2)
114155
return in;
115156
int width = 1;
116157
obj = dic.get(PdfName.Columns);
117158
if (obj != null && obj.getType() == PdfObject.NUMBER)
118-
width = ((PdfNumber)obj).intValue();
159+
width = ((PdfNumber) obj).intValue();
119160
int colors = 1;
120161
obj = dic.get(PdfName.Colors);
121162
if (obj != null && obj.getType() == PdfObject.NUMBER)
122-
colors = ((PdfNumber)obj).intValue();
163+
colors = ((PdfNumber) obj).intValue();
123164
int bpc = 8;
124165
obj = dic.get(PdfName.BitsPerComponent);
125166
if (obj != null && obj.getType() == PdfObject.NUMBER)
126-
bpc = ((PdfNumber)obj).intValue();
167+
bpc = ((PdfNumber) obj).intValue();
127168
DataInputStream dataStream = new DataInputStream(new ByteArrayInputStream(in));
128169
ByteArrayOutputStream fout = new ByteArrayOutputStream(in.length);
129170
int bytesPerPixel = colors * bpc / 8;
130-
int bytesPerRow = (colors * width * bpc + 7)/8;
171+
int bytesPerRow = (colors * width * bpc + 7) / 8;
131172
byte[] curr = new byte[bytesPerRow];
132173
byte[] prior = new byte[bytesPerRow];
133174
if (predictor == 2) {
@@ -136,7 +177,7 @@ public static byte[] decodePredictor(byte[] in, PdfObject decodeParams) {
136177
for (int row = 0; row < numRows; row++) {
137178
int rowStart = row * bytesPerRow;
138179
for (int col = bytesPerPixel; col < bytesPerRow; col++) {
139-
in[rowStart + col] = (byte)(in[rowStart + col] + in[rowStart + col - bytesPerPixel]);
180+
in[rowStart + col] = (byte) (in[rowStart + col] + in[rowStart + col - bytesPerPixel]);
140181
}
141182
}
142183
}
@@ -174,7 +215,7 @@ public static byte[] decodePredictor(byte[] in, PdfObject decodeParams) {
174215
curr[i] += (byte) (prior[i] / 2);
175216
}
176217
for (int i = bytesPerPixel; i < bytesPerRow; i++) {
177-
curr[i] += (byte) (((curr[i - bytesPerPixel] & 0xff) + (prior[i] & 0xff))/2);
218+
curr[i] += (byte) (((curr[i - bytesPerPixel] & 0xff) + (prior[i] & 0xff)) / 2);
178219
}
179220
break;
180221
case 4: //PNG_FILTER_PAETH
@@ -201,7 +242,7 @@ public static byte[] decodePredictor(byte[] in, PdfObject decodeParams) {
201242
} else {
202243
ret = c;
203244
}
204-
curr[i] += (byte)ret;
245+
curr[i] += (byte) ret;
205246
}
206247
break;
207248
default:
@@ -210,10 +251,9 @@ public static byte[] decodePredictor(byte[] in, PdfObject decodeParams) {
210251
}
211252
try {
212253
fout.write(curr);
213-
}
214-
catch (IOException ioe) {
254+
} catch (IOException ioe) {
215255
// Never happens
216-
assert true: "Happens!";
256+
assert true : "Happens!";
217257
}
218258

219259
// Swap curr and prior

0 commit comments

Comments
 (0)