2121import java .nio .charset .StandardCharsets ;
2222import java .util .ArrayList ;
2323import java .util .List ;
24+ import java .util .regex .Pattern ;
2425import org .apache .logging .log4j .Logger ;
2526import org .apache .logging .log4j .LogManager ;
2627import org .apache .pdfbox .contentstream .PDContentStream ;
@@ -46,6 +47,7 @@ public class PDFStreamParser extends COSParser
4647 */
4748 private static final Logger LOG = LogManager .getLogger (PDFStreamParser .class );
4849
50+ private static final Pattern NUMBER_PATTERN = Pattern .compile ("^\\ d*\\ .?\\ d*$" );
4951 private static final int MAX_BIN_CHAR_TEST_LENGTH = 10 ;
5052 private final byte [] binCharTestArr = new byte [MAX_BIN_CHAR_TEST_LENGTH ];
5153 private int inlineImageDepth = 0 ;
@@ -388,7 +390,7 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
388390 // or a number (PDFBOX-5957)
389391 s = new String (binCharTestArr , startOpIdx , endOpIdx - startOpIdx , StandardCharsets .US_ASCII );
390392 if (!"Q" .equals (s ) && !"EMC" .equals (s ) && !"S" .equals (s ) &&
391- !s . matches ( "^ \\ d* \\ .? \\ d*$" ))
393+ !NUMBER_PATTERN . matcher ( s ). find ( ))
392394 {
393395 // operator is not Q, not EMC, not S, nor a number -> assume binary data
394396 noBinData = false ;
@@ -405,7 +407,7 @@ else if (startOpIdx != -1 && endOpIdx == -1 &&
405407 }
406408 LOG .debug ("startOpIdx: {} endOpIdx: {} s = '{}'" , startOpIdx , endOpIdx , s );
407409 // look for token of 3 chars max or a number
408- if (endOpIdx - startOpIdx > 3 && !s . matches ( "^ \\ d* \\ .? \\ d*$" ))
410+ if (endOpIdx - startOpIdx > 3 && !NUMBER_PATTERN . matcher ( s ). find ( ))
409411 {
410412 noBinData = false ; // "operator" too long, assume binary data
411413 }
0 commit comments