@@ -65,6 +65,11 @@ public abstract class BaseParser
6565 private static final int MAX_LENGTH_LONG = Long .toString (Long .MAX_VALUE ).length ();
6666
6767 private static final Charset ALTERNATIVE_CHARSET ;
68+ private static final int MAX_RECURSION_DEPTH = 500 ;
69+ private static final String MAX_RECUSRION_MSG = //
70+ "Reached maximum recursion depth " + Integer .toString (MAX_RECURSION_DEPTH );
71+
72+ private int recursionDepth = 0 ;
6873
6974 private final Map <Long , COSObjectKey > keyCache = new HashMap <>();
7075
@@ -280,51 +285,63 @@ private COSBase getObjectFromPool(COSObjectKey key) throws IOException
280285 */
281286 protected COSDictionary parseCOSDictionary (boolean isDirect ) throws IOException
282287 {
283- readExpectedChar ('<' );
284- readExpectedChar ('<' );
285- skipSpaces ();
286- COSDictionary obj = new COSDictionary ();
287- obj .setDirect (isDirect );
288- while (true )
288+ try
289289 {
290- skipSpaces ();
291- char c = (char ) source .peek ();
292- if (c == '>' )
290+ recursionDepth ++;
291+ if (recursionDepth > MAX_RECURSION_DEPTH )
293292 {
294- break ;
293+ throw new IOException ( MAX_RECUSRION_MSG ) ;
295294 }
296- else if (c == '/' )
295+ readExpectedChar ('<' );
296+ readExpectedChar ('<' );
297+ skipSpaces ();
298+ COSDictionary obj = new COSDictionary ();
299+ obj .setDirect (isDirect );
300+ while (true )
297301 {
298- // something went wrong, most likely the dictionary is corrupted
299- // stop immediately and return everything read so far
300- if (!parseCOSDictionaryNameValuePair (obj ))
302+ skipSpaces ();
303+ char c = (char ) source .peek ();
304+ if (c == '>' )
305+ {
306+ break ;
307+ }
308+ else if (c == '/' )
301309 {
302- return obj ;
310+ // something went wrong, most likely the dictionary is corrupted
311+ // stop immediately and return everything read so far
312+ if (!parseCOSDictionaryNameValuePair (obj ))
313+ {
314+ return obj ;
315+ }
316+ }
317+ else
318+ {
319+ // invalid dictionary, we were expecting a /Name, read until the end or until we can recover
320+ LOG .warn ("Invalid dictionary, found: '{}' but expected: '/' at offset {}" , c ,
321+ source .getPosition ());
322+ if (readUntilEndOfCOSDictionary ())
323+ {
324+ // we couldn't recover
325+ return obj ;
326+ }
303327 }
304328 }
305- else
329+ try
330+ {
331+ readExpectedChar ('>' );
332+ readExpectedChar ('>' );
333+ }
334+ catch (IOException exception )
306335 {
307- // invalid dictionary, we were expecting a /Name, read until the end or until we can recover
308- LOG .warn ("Invalid dictionary, found: '{}' but expected: '/' at offset {}" , c ,
336+ LOG .warn ("Invalid dictionary, can't find end of dictionary at offset {}" ,
309337 source .getPosition ());
310- if (readUntilEndOfCOSDictionary ())
311- {
312- // we couldn't recover
313- return obj ;
314- }
315338 }
339+ return obj ;
316340 }
317- try
318- {
319- readExpectedChar ('>' );
320- readExpectedChar ('>' );
321- }
322- catch (IOException exception )
341+ finally
323342 {
324- LOG .warn ("Invalid dictionary, can't find end of dictionary at offset {}" ,
325- source .getPosition ());
343+ recursionDepth --;
326344 }
327- return obj ;
328345 }
329346
330347 /**
@@ -754,71 +771,83 @@ else if ( ( c == ' ' ) || ( c == '\n' ) ||
754771 */
755772 protected COSArray parseCOSArray () throws IOException
756773 {
757- long startPosition = source .getPosition ();
758- readExpectedChar ('[' );
759- COSArray po = new COSArray ();
760- COSBase pbo ;
761- skipSpaces ();
762- int i ;
763- while (((i = source .peek ()) > 0 ) && ((char ) i != ']' ))
774+ try
764775 {
765- pbo = parseDirObject () ;
766- if ( pbo instanceof COSObject )
776+ recursionDepth ++ ;
777+ if ( recursionDepth > MAX_RECURSION_DEPTH )
767778 {
768- // the current empty COSObject is replaced with the correct one
769- pbo = null ;
770- // We have to check if the expected values are there or not PDFBOX-385
771- if (po .size () > 1 && po .get (po .size () - 1 ) instanceof COSInteger )
779+ throw new IOException (MAX_RECUSRION_MSG );
780+ }
781+ long startPosition = source .getPosition ();
782+ readExpectedChar ('[' );
783+ COSArray po = new COSArray ();
784+ COSBase pbo ;
785+ skipSpaces ();
786+ int i ;
787+ while (((i = source .peek ()) > 0 ) && ((char ) i != ']' ))
788+ {
789+ pbo = parseDirObject ();
790+ if (pbo instanceof COSObject )
772791 {
773- COSInteger genNumber = (COSInteger )po .remove ( po .size () -1 );
774- if (po .size () > 0 && po .get (po .size () - 1 ) instanceof COSInteger )
792+ // the current empty COSObject is replaced with the correct one
793+ pbo = null ;
794+ // We have to check if the expected values are there or not PDFBOX-385
795+ if (po .size () > 1 && po .get (po .size () - 1 ) instanceof COSInteger )
775796 {
776- COSInteger number = (COSInteger )po .remove ( po .size () -1 );
777- if (number . longValue () >= 0 && genNumber . intValue () >= 0 )
797+ COSInteger genNumber = (COSInteger ) po .remove (po .size () - 1 );
798+ if (po . size () > 0 && po . get ( po . size () - 1 ) instanceof COSInteger )
778799 {
779- COSObjectKey key = getObjectKey (number .longValue (),
780- genNumber .intValue ());
781- pbo = getObjectFromPool (key );
782- }
783- else
784- {
785- LOG .warn ("Invalid value(s) for an object key {} {}" , number .longValue (),
786- genNumber .intValue ());
800+ COSInteger number = (COSInteger ) po .remove (po .size () - 1 );
801+ if (number .longValue () >= 0 && genNumber .intValue () >= 0 )
802+ {
803+ COSObjectKey key = getObjectKey (number .longValue (),
804+ genNumber .intValue ());
805+ pbo = getObjectFromPool (key );
806+ }
807+ else
808+ {
809+ LOG .warn ("Invalid value(s) for an object key {} {}" , number .longValue (),
810+ genNumber .intValue ());
811+ }
787812 }
788813 }
789814 }
790- }
791- // something went wrong
792- if (pbo == null )
793- {
794- //it could be a bad object in the array which is just skipped
795- LOG .warn ("Corrupt array element at offset {}, start offset: {}" ,
796- source .getPosition (), startPosition );
797- String isThisTheEnd = readString ();
798- // return immediately if a corrupt element is followed by another array
799- // to avoid a possible infinite recursion as most likely the whole array is corrupted
800- if (isThisTheEnd .isEmpty () && source .peek () == '[' )
815+ // something went wrong
816+ if (pbo == null )
801817 {
802- return po ;
818+ //it could be a bad object in the array which is just skipped
819+ LOG .warn ("Corrupt array element at offset {}, start offset: {}" ,
820+ source .getPosition (), startPosition );
821+ String isThisTheEnd = readString ();
822+ // return immediately if a corrupt element is followed by another array
823+ // to avoid a possible infinite recursion as most likely the whole array is corrupted
824+ if (isThisTheEnd .isEmpty () && source .peek () == '[' )
825+ {
826+ return po ;
827+ }
828+ source .rewind (isThisTheEnd .getBytes (StandardCharsets .ISO_8859_1 ).length );
829+ // This could also be an "endobj" or "endstream" which means we can assume that
830+ // the array has ended.
831+ if (ENDOBJ_STRING .equals (isThisTheEnd ) || ENDSTREAM_STRING .equals (isThisTheEnd ))
832+ {
833+ return po ;
834+ }
803835 }
804- source .rewind (isThisTheEnd .getBytes (StandardCharsets .ISO_8859_1 ).length );
805- // This could also be an "endobj" or "endstream" which means we can assume that
806- // the array has ended.
807- if (ENDOBJ_STRING .equals (isThisTheEnd ) || ENDSTREAM_STRING .equals (isThisTheEnd ))
836+ else
808837 {
809- return po ;
838+ po . add ( pbo ) ;
810839 }
840+ skipSpaces ();
811841 }
812- else
813- {
814- po .add (pbo );
815- }
842+ // read ']'
843+ source .read ();
816844 skipSpaces ();
845+ return po ;
846+ }
847+ finally
848+ {
849+ recursionDepth --;
817850 }
818- // read ']'
819- source .read ();
820- skipSpaces ();
821- return po ;
822851 }
823852
824853 /**
@@ -947,72 +976,84 @@ private String decodeBuffer(ByteArrayOutputStream buffer)
947976 */
948977 protected COSBase parseDirObject () throws IOException
949978 {
950- skipSpaces ();
951- char c = (char ) source .peek ();
952- switch (c )
979+ try
953980 {
954- case '<' :
955- // pull off first left bracket
956- source .read ();
957- // check for second left bracket
958- c = (char ) source .peek ();
959- source .rewind (1 );
960- return c == '<' ? parseCOSDictionary (true ) : parseCOSString ();
961- case '[' :
962- // array
963- return parseCOSArray ();
964- case '(' :
965- return parseCOSString ();
966- case '/' :
967- // name
968- return parseCOSName ();
969- case 'n' :
970- // null
971- readExpectedString (NULL , false );
972- return COSNull .NULL ;
973- case 't' :
974- readExpectedString (TRUE , false );
975- return COSBoolean .TRUE ;
976- case 'f' :
977- readExpectedString (FALSE , false );
978- return COSBoolean .FALSE ;
979- case 'R' :
980- source .read ();
981- return new COSObject (null );
982- case (char )-1 :
983- return null ;
984- default :
985- if (isDigit (c ) || c == '-' || c == '+' || c == '.' )
981+ recursionDepth ++;
982+ if (recursionDepth > MAX_RECURSION_DEPTH )
986983 {
987- return parseCOSNumber ( );
984+ throw new IOException ( MAX_RECUSRION_MSG );
988985 }
989- // This is not suppose to happen, but we will allow for it
990- // so we are more compatible with POS writers that don't
991- // follow the spec
992- long startOffset = source .getPosition ();
993- String badString = readString ();
994- if (badString .isEmpty ())
986+ skipSpaces ();
987+ char c = (char ) source .peek ();
988+ switch (c )
995989 {
996- int peek = source .peek ();
997- // we can end up in an infinite loop otherwise
998- throw new IOException ("Unknown dir object c='" + c + "' cInt=" + (int ) c + " peek='"
999- + (char ) peek + "' peekInt=" + peek + " at offset " + source .getPosition ()
1000- + " (start offset: " + startOffset + ")" );
1001- }
990+ case '<' :
991+ // pull off first left bracket
992+ source .read ();
993+ // check for second left bracket
994+ c = (char ) source .peek ();
995+ source .rewind (1 );
996+ return c == '<' ? parseCOSDictionary (true ) : parseCOSString ();
997+ case '[' :
998+ // array
999+ return parseCOSArray ();
1000+ case '(' :
1001+ return parseCOSString ();
1002+ case '/' :
1003+ // name
1004+ return parseCOSName ();
1005+ case 'n' :
1006+ // null
1007+ readExpectedString (NULL , false );
1008+ return COSNull .NULL ;
1009+ case 't' :
1010+ readExpectedString (TRUE , false );
1011+ return COSBoolean .TRUE ;
1012+ case 'f' :
1013+ readExpectedString (FALSE , false );
1014+ return COSBoolean .FALSE ;
1015+ case 'R' :
1016+ source .read ();
1017+ return new COSObject (null );
1018+ case (char ) -1 :
1019+ return null ;
1020+ default :
1021+ if (isDigit (c ) || c == '-' || c == '+' || c == '.' )
1022+ {
1023+ return parseCOSNumber ();
1024+ }
1025+ // This is not suppose to happen, but we will allow for it
1026+ // so we are more compatible with POS writers that don't
1027+ // follow the spec
1028+ long startOffset = source .getPosition ();
1029+ String badString = readString ();
1030+ if (badString .isEmpty ())
1031+ {
1032+ int peek = source .peek ();
1033+ // we can end up in an infinite loop otherwise
1034+ throw new IOException ("Unknown dir object c='" + c + "' cInt=" + (int ) c + " peek='"
1035+ + (char ) peek + "' peekInt=" + peek + " at offset " + source .getPosition ()
1036+ + " (start offset: " + startOffset + ")" );
1037+ }
10021038
1003- // if it's an endstream/endobj, we want to put it back so the caller will see it
1004- if (ENDOBJ_STRING .equals (badString ) || ENDSTREAM_STRING .equals (badString ))
1005- {
1006- source .rewind (badString .getBytes (StandardCharsets .ISO_8859_1 ).length );
1007- }
1008- else
1009- {
1010- LOG .warn ("Skipped unexpected dir object = '{}' at offset {} (start offset: {})" ,
1011- badString , source .getPosition (), startOffset );
1012- return this instanceof PDFStreamParser ? null : COSNull .NULL ;
1039+ // if it's an endstream/endobj, we want to put it back so the caller will see it
1040+ if (ENDOBJ_STRING .equals (badString ) || ENDSTREAM_STRING .equals (badString ))
1041+ {
1042+ source .rewind (badString .getBytes (StandardCharsets .ISO_8859_1 ).length );
1043+ }
1044+ else
1045+ {
1046+ LOG .warn ("Skipped unexpected dir object = '{}' at offset {} (start offset: {})" ,
1047+ badString , source .getPosition (), startOffset );
1048+ return this instanceof PDFStreamParser ? null : COSNull .NULL ;
1049+ }
10131050 }
1051+ return null ;
1052+ }
1053+ finally
1054+ {
1055+ recursionDepth --;
10141056 }
1015- return null ;
10161057 }
10171058
10181059 private COSNumber parseCOSNumber () throws IOException
0 commit comments