Skip to content

Commit 58dd79c

Browse files
committed
PDFBOX-5957: refactor, add comments, improve logging
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1923832 13f79535-47bb-0310-9956-ffa450edef68
1 parent b9794d5 commit 58dd79c

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,10 @@ private boolean hasNoFollowingBinData() throws IOException
333333
boolean noBinData = true;
334334
int startOpIdx = -1;
335335
int endOpIdx = -1;
336-
336+
String s = "";
337+
338+
LOG.debug("String after EI: '{}'", new String(binCharTestArr));
339+
337340
if (readBytes > 0)
338341
{
339342
for (int bIdx = 0; bIdx < readBytes; bIdx++)
@@ -360,34 +363,37 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
360363
// PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough
361364
if (endOpIdx != -1 && startOpIdx != -1)
362365
{
363-
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784).
364-
String s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
366+
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784)
367+
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
365368
if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s))
366369
{
370+
// operator is not Q, not EMC, not S -> assume binary data
367371
noBinData = false;
368372
}
369373
}
370374

371-
// only if not close to eof
372-
if (readBytes == MAX_BIN_CHAR_TEST_LENGTH)
375+
// only if not close to EOF
376+
if (startOpIdx != -1 && readBytes == MAX_BIN_CHAR_TEST_LENGTH)
373377
{
374-
// a PDF operator is 1-3 bytes long
375-
if (startOpIdx != -1 && endOpIdx == -1)
378+
if (endOpIdx == -1)
376379
{
377380
endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
381+
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
378382
}
379-
if (endOpIdx != -1 && startOpIdx != -1 && endOpIdx - startOpIdx > 3)
383+
LOG.debug("startOpIdx: {} endOpIdx: {} s = {}", startOpIdx, endOpIdx, s);
384+
// a PDF operator is 1-3 bytes long
385+
if (endOpIdx - startOpIdx > 3)
380386
{
381-
noBinData = false;
387+
noBinData = false; // "operator" too long, assume binary data
382388
}
383389
}
384390
source.rewind(readBytes);
385391
}
386392
if (!noBinData)
387393
{
388394
LOG.warn(
389-
"ignoring 'EI' assumed to be in the middle of inline image at stream offset {}",
390-
source.getPosition());
395+
"ignoring 'EI' assumed to be in the middle of inline image at stream offset {}, s = '{}'",
396+
source.getPosition(), s);
391397
}
392398
return noBinData;
393399
}

0 commit comments

Comments
 (0)