Skip to content

Commit 297e84c

Browse files
committed
PDFBOX-6041: limit recursion depth to avoid a stack overflow exception as proposed by Davia Justamante
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1928953 13f79535-47bb-0310-9956-ffa450edef68
1 parent 456507d commit 297e84c

File tree

3 files changed

+204
-141
lines changed

3 files changed

+204
-141
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java

Lines changed: 182 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ public abstract class BaseParser
6565
private static final int MAX_LENGTH_LONG = Long.toString(Long.MAX_VALUE).length();
6666

6767
private static final Charset ALTERNATIVE_CHARSET;
68+
private static final int MAX_RECURSION_DEPTH = 500;
69+
private static final String MAX_RECUSRION_MSG = //
70+
"Reached maximum recursion depth " + Integer.toString(MAX_RECURSION_DEPTH);
71+
72+
private int recursionDepth = 0;
6873

6974
private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
7075

@@ -280,51 +285,63 @@ private COSBase getObjectFromPool(COSObjectKey key) throws IOException
280285
*/
281286
protected COSDictionary parseCOSDictionary(boolean isDirect) throws IOException
282287
{
283-
readExpectedChar('<');
284-
readExpectedChar('<');
285-
skipSpaces();
286-
COSDictionary obj = new COSDictionary();
287-
obj.setDirect(isDirect);
288-
while (true)
288+
try
289289
{
290-
skipSpaces();
291-
char c = (char) source.peek();
292-
if (c == '>')
290+
recursionDepth++;
291+
if (recursionDepth > MAX_RECURSION_DEPTH)
293292
{
294-
break;
293+
throw new IOException(MAX_RECUSRION_MSG);
295294
}
296-
else if (c == '/')
295+
readExpectedChar('<');
296+
readExpectedChar('<');
297+
skipSpaces();
298+
COSDictionary obj = new COSDictionary();
299+
obj.setDirect(isDirect);
300+
while (true)
297301
{
298-
// something went wrong, most likely the dictionary is corrupted
299-
// stop immediately and return everything read so far
300-
if (!parseCOSDictionaryNameValuePair(obj))
302+
skipSpaces();
303+
char c = (char) source.peek();
304+
if (c == '>')
305+
{
306+
break;
307+
}
308+
else if (c == '/')
301309
{
302-
return obj;
310+
// something went wrong, most likely the dictionary is corrupted
311+
// stop immediately and return everything read so far
312+
if (!parseCOSDictionaryNameValuePair(obj))
313+
{
314+
return obj;
315+
}
316+
}
317+
else
318+
{
319+
// invalid dictionary, we were expecting a /Name, read until the end or until we can recover
320+
LOG.warn("Invalid dictionary, found: '{}' but expected: '/' at offset {}", c,
321+
source.getPosition());
322+
if (readUntilEndOfCOSDictionary())
323+
{
324+
// we couldn't recover
325+
return obj;
326+
}
303327
}
304328
}
305-
else
329+
try
330+
{
331+
readExpectedChar('>');
332+
readExpectedChar('>');
333+
}
334+
catch (IOException exception)
306335
{
307-
// invalid dictionary, we were expecting a /Name, read until the end or until we can recover
308-
LOG.warn("Invalid dictionary, found: '{}' but expected: '/' at offset {}", c,
336+
LOG.warn("Invalid dictionary, can't find end of dictionary at offset {}",
309337
source.getPosition());
310-
if (readUntilEndOfCOSDictionary())
311-
{
312-
// we couldn't recover
313-
return obj;
314-
}
315338
}
339+
return obj;
316340
}
317-
try
318-
{
319-
readExpectedChar('>');
320-
readExpectedChar('>');
321-
}
322-
catch (IOException exception)
341+
finally
323342
{
324-
LOG.warn("Invalid dictionary, can't find end of dictionary at offset {}",
325-
source.getPosition());
343+
recursionDepth--;
326344
}
327-
return obj;
328345
}
329346

330347
/**
@@ -754,71 +771,83 @@ else if ( ( c == ' ' ) || ( c == '\n' ) ||
754771
*/
755772
protected COSArray parseCOSArray() throws IOException
756773
{
757-
long startPosition = source.getPosition();
758-
readExpectedChar('[');
759-
COSArray po = new COSArray();
760-
COSBase pbo;
761-
skipSpaces();
762-
int i;
763-
while (((i = source.peek()) > 0) && ((char) i != ']'))
774+
try
764775
{
765-
pbo = parseDirObject();
766-
if( pbo instanceof COSObject )
776+
recursionDepth++;
777+
if (recursionDepth > MAX_RECURSION_DEPTH)
767778
{
768-
// the current empty COSObject is replaced with the correct one
769-
pbo = null;
770-
// We have to check if the expected values are there or not PDFBOX-385
771-
if (po.size() > 1 && po.get(po.size() - 1) instanceof COSInteger)
779+
throw new IOException(MAX_RECUSRION_MSG);
780+
}
781+
long startPosition = source.getPosition();
782+
readExpectedChar('[');
783+
COSArray po = new COSArray();
784+
COSBase pbo;
785+
skipSpaces();
786+
int i;
787+
while (((i = source.peek()) > 0) && ((char) i != ']'))
788+
{
789+
pbo = parseDirObject();
790+
if (pbo instanceof COSObject)
772791
{
773-
COSInteger genNumber = (COSInteger)po.remove( po.size() -1 );
774-
if (po.size() > 0 && po.get(po.size() - 1) instanceof COSInteger)
792+
// the current empty COSObject is replaced with the correct one
793+
pbo = null;
794+
// We have to check if the expected values are there or not PDFBOX-385
795+
if (po.size() > 1 && po.get(po.size() - 1) instanceof COSInteger)
775796
{
776-
COSInteger number = (COSInteger)po.remove( po.size() -1 );
777-
if (number.longValue() >= 0 && genNumber.intValue() >= 0)
797+
COSInteger genNumber = (COSInteger) po.remove(po.size() - 1);
798+
if (po.size() > 0 && po.get(po.size() - 1) instanceof COSInteger)
778799
{
779-
COSObjectKey key = getObjectKey(number.longValue(),
780-
genNumber.intValue());
781-
pbo = getObjectFromPool(key);
782-
}
783-
else
784-
{
785-
LOG.warn("Invalid value(s) for an object key {} {}", number.longValue(),
786-
genNumber.intValue());
800+
COSInteger number = (COSInteger) po.remove(po.size() - 1);
801+
if (number.longValue() >= 0 && genNumber.intValue() >= 0)
802+
{
803+
COSObjectKey key = getObjectKey(number.longValue(),
804+
genNumber.intValue());
805+
pbo = getObjectFromPool(key);
806+
}
807+
else
808+
{
809+
LOG.warn("Invalid value(s) for an object key {} {}", number.longValue(),
810+
genNumber.intValue());
811+
}
787812
}
788813
}
789814
}
790-
}
791-
// something went wrong
792-
if (pbo == null)
793-
{
794-
//it could be a bad object in the array which is just skipped
795-
LOG.warn("Corrupt array element at offset {}, start offset: {}",
796-
source.getPosition(), startPosition);
797-
String isThisTheEnd = readString();
798-
// return immediately if a corrupt element is followed by another array
799-
// to avoid a possible infinite recursion as most likely the whole array is corrupted
800-
if (isThisTheEnd.isEmpty() && source.peek() == '[')
815+
// something went wrong
816+
if (pbo == null)
801817
{
802-
return po;
818+
//it could be a bad object in the array which is just skipped
819+
LOG.warn("Corrupt array element at offset {}, start offset: {}",
820+
source.getPosition(), startPosition);
821+
String isThisTheEnd = readString();
822+
// return immediately if a corrupt element is followed by another array
823+
// to avoid a possible infinite recursion as most likely the whole array is corrupted
824+
if (isThisTheEnd.isEmpty() && source.peek() == '[')
825+
{
826+
return po;
827+
}
828+
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
829+
// This could also be an "endobj" or "endstream" which means we can assume that
830+
// the array has ended.
831+
if (ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd))
832+
{
833+
return po;
834+
}
803835
}
804-
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
805-
// This could also be an "endobj" or "endstream" which means we can assume that
806-
// the array has ended.
807-
if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd))
836+
else
808837
{
809-
return po;
838+
po.add(pbo);
810839
}
840+
skipSpaces();
811841
}
812-
else
813-
{
814-
po.add(pbo);
815-
}
842+
// read ']'
843+
source.read();
816844
skipSpaces();
845+
return po;
846+
}
847+
finally
848+
{
849+
recursionDepth--;
817850
}
818-
// read ']'
819-
source.read();
820-
skipSpaces();
821-
return po;
822851
}
823852

824853
/**
@@ -947,72 +976,84 @@ private String decodeBuffer(ByteArrayOutputStream buffer)
947976
*/
948977
protected COSBase parseDirObject() throws IOException
949978
{
950-
skipSpaces();
951-
char c = (char) source.peek();
952-
switch(c)
979+
try
953980
{
954-
case '<':
955-
// pull off first left bracket
956-
source.read();
957-
// check for second left bracket
958-
c = (char) source.peek();
959-
source.rewind(1);
960-
return c == '<' ? parseCOSDictionary(true) : parseCOSString();
961-
case '[':
962-
// array
963-
return parseCOSArray();
964-
case '(':
965-
return parseCOSString();
966-
case '/':
967-
// name
968-
return parseCOSName();
969-
case 'n':
970-
// null
971-
readExpectedString(NULL, false);
972-
return COSNull.NULL;
973-
case 't':
974-
readExpectedString(TRUE, false);
975-
return COSBoolean.TRUE;
976-
case 'f':
977-
readExpectedString(FALSE, false);
978-
return COSBoolean.FALSE;
979-
case 'R':
980-
source.read();
981-
return new COSObject(null);
982-
case (char)-1:
983-
return null;
984-
default:
985-
if (isDigit(c) || c == '-' || c == '+' || c == '.')
981+
recursionDepth++;
982+
if (recursionDepth > MAX_RECURSION_DEPTH)
986983
{
987-
return parseCOSNumber();
984+
throw new IOException(MAX_RECUSRION_MSG);
988985
}
989-
// This is not suppose to happen, but we will allow for it
990-
// so we are more compatible with POS writers that don't
991-
// follow the spec
992-
long startOffset = source.getPosition();
993-
String badString = readString();
994-
if (badString.isEmpty())
986+
skipSpaces();
987+
char c = (char) source.peek();
988+
switch (c)
995989
{
996-
int peek = source.peek();
997-
// we can end up in an infinite loop otherwise
998-
throw new IOException("Unknown dir object c='" + c + "' cInt=" + (int) c + " peek='"
999-
+ (char) peek + "' peekInt=" + peek + " at offset " + source.getPosition()
1000-
+ " (start offset: " + startOffset + ")");
1001-
}
990+
case '<':
991+
// pull off first left bracket
992+
source.read();
993+
// check for second left bracket
994+
c = (char) source.peek();
995+
source.rewind(1);
996+
return c == '<' ? parseCOSDictionary(true) : parseCOSString();
997+
case '[':
998+
// array
999+
return parseCOSArray();
1000+
case '(':
1001+
return parseCOSString();
1002+
case '/':
1003+
// name
1004+
return parseCOSName();
1005+
case 'n':
1006+
// null
1007+
readExpectedString(NULL, false);
1008+
return COSNull.NULL;
1009+
case 't':
1010+
readExpectedString(TRUE, false);
1011+
return COSBoolean.TRUE;
1012+
case 'f':
1013+
readExpectedString(FALSE, false);
1014+
return COSBoolean.FALSE;
1015+
case 'R':
1016+
source.read();
1017+
return new COSObject(null);
1018+
case (char) -1:
1019+
return null;
1020+
default:
1021+
if (isDigit(c) || c == '-' || c == '+' || c == '.')
1022+
{
1023+
return parseCOSNumber();
1024+
}
1025+
// This is not suppose to happen, but we will allow for it
1026+
// so we are more compatible with POS writers that don't
1027+
// follow the spec
1028+
long startOffset = source.getPosition();
1029+
String badString = readString();
1030+
if (badString.isEmpty())
1031+
{
1032+
int peek = source.peek();
1033+
// we can end up in an infinite loop otherwise
1034+
throw new IOException("Unknown dir object c='" + c + "' cInt=" + (int) c + " peek='"
1035+
+ (char) peek + "' peekInt=" + peek + " at offset " + source.getPosition()
1036+
+ " (start offset: " + startOffset + ")");
1037+
}
10021038

1003-
// if it's an endstream/endobj, we want to put it back so the caller will see it
1004-
if (ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString))
1005-
{
1006-
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
1007-
}
1008-
else
1009-
{
1010-
LOG.warn("Skipped unexpected dir object = '{}' at offset {} (start offset: {})",
1011-
badString, source.getPosition(), startOffset);
1012-
return this instanceof PDFStreamParser ? null : COSNull.NULL;
1039+
// if it's an endstream/endobj, we want to put it back so the caller will see it
1040+
if (ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString))
1041+
{
1042+
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
1043+
}
1044+
else
1045+
{
1046+
LOG.warn("Skipped unexpected dir object = '{}' at offset {} (start offset: {})",
1047+
badString, source.getPosition(), startOffset);
1048+
return this instanceof PDFStreamParser ? null : COSNull.NULL;
1049+
}
10131050
}
1051+
return null;
1052+
}
1053+
finally
1054+
{
1055+
recursionDepth--;
10141056
}
1015-
return null;
10161057
}
10171058

10181059
private COSNumber parseCOSNumber() throws IOException

0 commit comments

Comments
 (0)