Skip to content

Commit 006d23f

Browse files
committed
PDFBOX-6036: replace recursive algorithm with an iterative one to avoid a StackOverflowException
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1929915 13f79535-47bb-0310-9956-ffa450edef68
1 parent ab5734a commit 006d23f

File tree

2 files changed

+134
-32
lines changed

2 files changed

+134
-32
lines changed

pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java

Lines changed: 78 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
import java.io.IOException;
2020
import java.util.ArrayList;
21+
import java.util.Collection;
2122
import java.util.Collections;
2223
import java.util.HashSet;
23-
import java.util.Iterator;
2424
import java.util.List;
2525
import java.util.Set;
2626

@@ -29,7 +29,6 @@
2929
import org.apache.pdfbox.cos.COSArray;
3030
import org.apache.pdfbox.cos.COSBase;
3131
import org.apache.pdfbox.cos.COSDictionary;
32-
import org.apache.pdfbox.cos.COSDocument;
3332
import org.apache.pdfbox.cos.COSName;
3433
import org.apache.pdfbox.cos.COSObject;
3534
import org.apache.pdfbox.cos.COSObjectKey;
@@ -79,11 +78,23 @@ public COSWriterCompressionPool(PDDocument document, CompressParameters paramete
7978
objectPool = new COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
8079

8180
// Initialize object pool.
82-
COSDocument cosDocument = document.getDocument();
83-
84-
COSDictionary trailer = cosDocument.getTrailer();
85-
addStructure(trailer.getItem(COSName.ROOT));
86-
addStructure(trailer.getItem(COSName.INFO));
81+
COSDictionary trailer = document.getDocument().getTrailer();
82+
List<COSBase> cosBaseList = new ArrayList<>();
83+
COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
84+
if (root != null)
85+
{
86+
cosBaseList.add(root);
87+
}
88+
COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
89+
if (info != null)
90+
{
91+
cosBaseList.add(info);
92+
}
93+
while (!cosBaseList.isEmpty())
94+
{
95+
cosBaseList = addStructure(cosBaseList);
96+
}
97+
allDirectObjects.clear();
8798

8899
Collections.sort(objectStreamObjects);
89100
Collections.sort(topLevelObjects);
@@ -156,13 +167,30 @@ private COSBase addObjectToPool(COSObjectKey key, COSBase base)
156167
return current;
157168
}
158169

170+
/**
171+
* Attempts to find yet unregistered streams and dictionaries in the given structure.
172+
*
173+
* @param cosBaseList A list of objects to be added for compressing.
174+
* @throws IOException Shall be thrown, if adding failed.
175+
*/
176+
private List<COSBase> addStructure(List<COSBase> cosBaseList) throws IOException
177+
{
178+
List<COSBase> cosBaseListNext = new ArrayList<>();
179+
for (COSBase cosBase : cosBaseList)
180+
{
181+
cosBaseListNext.addAll(addStructure(cosBase));
182+
}
183+
cosBaseList.clear();
184+
return cosBaseListNext;
185+
}
186+
159187
/**
160188
* Attempts to find yet unregistered streams and dictionaries in the given structure.
161189
*
162190
* @param current The object to be added for compressing.
163191
* @throws IOException Shall be thrown, if compressing the object failed.
164192
*/
165-
private void addStructure(COSBase current) throws IOException
193+
private List<COSBase> addStructure(COSBase current) throws IOException
166194
{
167195
COSBase base = current;
168196
if (current instanceof COSStream
@@ -182,45 +210,63 @@ else if (current instanceof COSObject)
182210
}
183211
if (base instanceof COSArray)
184212
{
185-
addElements(((COSArray) base).iterator());
213+
return getElements(((COSArray) base).toList());
186214
}
187215
else if (base instanceof COSDictionary)
188216
{
189-
addElements(((COSDictionary) base).getValues().iterator());
217+
return getElements(((COSDictionary) base).getValues());
190218
}
219+
return Collections.emptyList();
191220
}
192221

193-
private void addElements(Iterator<COSBase> elements) throws IOException
222+
/**
223+
* Collect all relevant objects from a COSDictionary/COSArray.
224+
*
225+
* @param elements collection of all elements of a COSDictionary/COSArray.
226+
*
227+
* @return a collection containing the relevant objects within the given Collection.
228+
* @throws IOException if something went wrong.
229+
*/
230+
private List<COSBase> getElements(Collection<? extends COSBase> elements) throws IOException
194231
{
195-
while (elements.hasNext())
232+
List<COSBase> relevantElements = new ArrayList<>();
233+
for (COSBase element : elements)
196234
{
197-
COSBase value = elements.next();
198-
if (value instanceof COSArray
199-
|| (value instanceof COSDictionary
200-
&& !allDirectObjects.contains(value)))
235+
if (filterElement(element))
201236
{
202-
allDirectObjects.add(value);
203-
addStructure(value);
237+
relevantElements.add(element);
204238
}
205-
else if (value instanceof COSObject)
239+
}
240+
return relevantElements;
241+
}
242+
243+
private boolean filterElement(COSBase element) throws IOException
244+
{
245+
if (element instanceof COSObject)
246+
{
247+
COSObject cosObject = (COSObject) element;
248+
if (cosObject.getKey() != null && objectPool.contains(cosObject.getKey()))
206249
{
207-
COSObject cosObject = (COSObject) value;
208-
if (cosObject.getKey() != null && objectPool.contains(cosObject.getKey()))
209-
{
210-
// check if the stored object matches the referenced object otherwise replace the key with a new one
211-
// there may differences if some imported content uses the same object numbers than the target pdf
212-
if (objectPool.getObject(cosObject.getKey()).equals(cosObject.getObject()))
213-
{
214-
continue;
215-
}
216-
cosObject.setKey(null);
217-
}
218-
if (cosObject.getObject() != null)
250+
// check if the stored object matches the referenced object otherwise replace the key with a new one
251+
// there may differences if some imported content uses the same object numbers than the target pdf
252+
if (objectPool.getObject(cosObject.getKey()).equals(cosObject.getObject()))
219253
{
220-
addStructure(value);
254+
return false;
221255
}
256+
cosObject.setKey(null);
222257
}
258+
if (cosObject.getObject() != null)
259+
{
260+
return true;
261+
}
262+
}
263+
else if (element instanceof COSArray
264+
|| (element instanceof COSDictionary && !allDirectObjects.contains(element)))
265+
{
266+
allDirectObjects.add(element);
267+
return true;
223268
}
269+
return false;
224270
}
225271

226272
/**
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.pdfbox.pdfwriter;
18+
19+
import java.io.IOException;
20+
21+
import org.apache.pdfbox.pdfwriter.compress.COSWriterCompressionPool;
22+
import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
23+
import org.apache.pdfbox.pdmodel.PDDocument;
24+
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
25+
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
26+
import org.junit.jupiter.api.Test;
27+
28+
class COSWriterCompressionPoolTest
29+
{
30+
/**
31+
* The old implementation may run into a stack overflow whenever the recursion depth gets too deep to be processed
32+
* when collecting the objects to be compressed.
33+
*
34+
* The new solution replaces the recursion with an iteration.
35+
*
36+
* @throws IOException
37+
*/
38+
@Test
39+
void testPDFBox6036() throws IOException
40+
{
41+
for (int i = 1; i <= 222_222; i *= 2)
42+
{
43+
try (PDDocument document = new PDDocument())
44+
{
45+
PDDocumentOutline outline = new PDDocumentOutline();
46+
document.getDocumentCatalog().setDocumentOutline(outline);
47+
for (int j = 0; j < i; j++)
48+
{
49+
outline.addLast(new PDOutlineItem());
50+
}
51+
new COSWriterCompressionPool(document, CompressParameters.DEFAULT_COMPRESSION);
52+
}
53+
}
54+
}
55+
56+
}

0 commit comments

Comments
 (0)