@@ -363,11 +363,13 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
363363 // PDFBOX-3742: just assuming that 1-3 non blanks is a PDF operator isn't enough
364364 if (noBinData && endOpIdx != -1 && startOpIdx != -1 )
365365 {
366- // usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784)
366+ // usually, the operator here is Q, sometimes EMC (PDFBOX-2376), S (PDFBOX-3784),
367+ // or a number (PDFBOX-5957)
367368 s = new String (binCharTestArr , startOpIdx , endOpIdx - startOpIdx );
368- if (!"Q" .equals (s ) && !"EMC" .equals (s ) && !"S" .equals (s ))
369+ if (!"Q" .equals (s ) && !"EMC" .equals (s ) && !"S" .equals (s ) &&
370+ !s .matches ("^\\ d*\\ .?\\ d*$" ))
369371 {
370- // operator is not Q, not EMC, not S -> assume binary data
372+ // operator is not Q, not EMC, not S, nur a number -> assume binary data
371373 noBinData = false ;
372374 }
373375 }
@@ -380,9 +382,9 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
380382 endOpIdx = MAX_BIN_CHAR_TEST_LENGTH ;
381383 s = new String (binCharTestArr , startOpIdx , endOpIdx - startOpIdx );
382384 }
383- LOG .debug ("startOpIdx: {} endOpIdx: {} s = {} " , startOpIdx , endOpIdx , s );
384- // a PDF operator is 1-3 bytes long
385- if (endOpIdx - startOpIdx > 3 )
385+ LOG .debug ("startOpIdx: {} endOpIdx: {} s = '{}' " , startOpIdx , endOpIdx , s );
386+ // look for token of 3 chars max or a number
387+ if (endOpIdx - startOpIdx > 3 && ! s . matches ( "^ \\ d* \\ .? \\ d*$" ) )
386388 {
387389 noBinData = false ; // "operator" too long, assume binary data
388390 }
0 commit comments