Skip to content

Commit bc2dcba

Browse files
committed
PDFBOX-6080: follow /EndOfLine setting if it exists, use heuristics from twelvemonkeys if not
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1928878 13f79535-47bb-0310-9956-ffa450edef68
1 parent 9693a6c commit bc2dcba

File tree

1 file changed

+28
-18
lines changed

1 file changed

+28
-18
lines changed

pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -67,29 +67,39 @@ public DecodeResult decode(InputStream encoded, OutputStream decoded,
6767
long tiffOptions = 0;
6868
if (k == 0)
6969
{
70-
type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D
71-
byte[] streamData = new byte[20];
72-
int bytesRead = encoded.read(streamData);
73-
if (bytesRead == -1)
70+
if (decodeParms.containsKey(COSName.END_OF_LINE))
7471
{
75-
throw new IOException("EOF while reading CCITT header");
72+
// PDFBOX-6080: respect the parameter if it exists
73+
boolean hasEndOfLine = decodeParms.getBoolean(COSName.END_OF_LINE, false);
74+
type = hasEndOfLine ? TIFFExtension.COMPRESSION_CCITT_T4 : TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
7675
}
77-
PushbackInputStream pushbackInputStream = new PushbackInputStream(encoded, streamData.length);
78-
pushbackInputStream.unread(streamData, 0, bytesRead);
79-
encoded = pushbackInputStream;
80-
if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1))
76+
else
8177
{
82-
// leading EOL (0b000000000001) not found, search further and try RLE if not
83-
// found
84-
type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
85-
short b = (short) (((streamData[0] << 8) + (streamData[1] & 0xff)) >> 4);
86-
for (int i = 12; i < bytesRead * 8; i++)
78+
// In twelvemonkeys, this part is found in CCITTFaxDecoderStream.findCompressionType()
79+
// needed for 015315-p8-ccitt.pdf, PDFBOX-2123-1bit.pdf, PDFBOX-2778.pdf
80+
type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D
81+
byte[] streamData = new byte[20];
82+
int bytesRead = encoded.read(streamData);
83+
if (bytesRead == -1)
8784
{
88-
b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01));
89-
if ((b & 0xFFF) == 1)
85+
throw new IOException("EOF while reading CCITT header");
86+
}
87+
PushbackInputStream pushbackInputStream = new PushbackInputStream(encoded, streamData.length);
88+
pushbackInputStream.unread(streamData, 0, bytesRead);
89+
encoded = pushbackInputStream;
90+
if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1))
91+
{
92+
// leading EOL (0b000000000001) not found, search further and try RLE if not found
93+
type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
94+
short b = (short) (((streamData[0] << 8) + (streamData[1] & 0xff)) >> 4);
95+
for (int i = 12; i < bytesRead * 8; i++)
9096
{
91-
type = TIFFExtension.COMPRESSION_CCITT_T4;
92-
break;
97+
b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01));
98+
if ((b & 0xFFF) == 1)
99+
{
100+
type = TIFFExtension.COMPRESSION_CCITT_T4;
101+
break;
102+
}
93103
}
94104
}
95105
}

0 commit comments

Comments
 (0)