Skip to content

Commit 5fb947b

Browse files
committed
PDFBOX-5957: detect number after EI
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1923838 13f79535-47bb-0310-9956-ffa450edef68
1 parent eebce4d commit 5fb947b

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,11 +363,13 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
363363
// PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough
364364
if (noBinData && endOpIdx != -1 && startOpIdx != -1)
365365
{
366-
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784)
366+
// usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784),
367+
// or a number (PDFBOX-5957)
367368
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
368-
if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s))
369+
if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s) &&
370+
!s.matches("^\\d*\\.?\\d*$"))
369371
{
370-
// operator is not Q, not EMC, not S -> assume binary data
372+
// operator is not Q, not EMC, not S, nur a number -> assume binary data
371373
noBinData = false;
372374
}
373375
}
@@ -380,9 +382,9 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
380382
endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
381383
s = new String(binCharTestArr, startOpIdx, endOpIdx - startOpIdx);
382384
}
383-
LOG.debug("startOpIdx: {} endOpIdx: {} s = {}", startOpIdx, endOpIdx, s);
384-
// a PDF operator is 1-3 bytes long
385-
if (endOpIdx - startOpIdx > 3)
385+
LOG.debug("startOpIdx: {} endOpIdx: {} s = '{}'", startOpIdx, endOpIdx, s);
386+
// look for token of 3 chars max or a number
387+
if (endOpIdx - startOpIdx > 3 && !s.matches("^\\d*\\.?\\d*$"))
386388
{
387389
noBinData = false; // "operator" too long, assume binary data
388390
}

0 commit comments

Comments
 (0)