Skip to content

Commit 15eba0e

Browse files
committed
Provide a NullUnlimitedList implementation to prevent OOM exception while opening document with enormous page count
DEVSIX-7793
1 parent dee3219 commit 15eba0e

File tree

5 files changed

+216
-13
lines changed

5 files changed

+216
-13
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPagesTree.java

Lines changed: 105 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ This file is part of the iText (R) project.
4646
import com.itextpdf.io.LogMessageConstant;
4747
import com.itextpdf.io.util.MessageFormatUtil;
4848
import com.itextpdf.kernel.PdfException;
49-
import org.slf4j.Logger;
50-
import org.slf4j.LoggerFactory;
5149

5250
import java.io.Serializable;
5351
import java.util.ArrayList;
52+
import java.util.HashMap;
5453
import java.util.List;
54+
import java.util.Map;
55+
import org.slf4j.Logger;
56+
import org.slf4j.LoggerFactory;
5557

5658
/**
5759
* Algorithm for construction {@link PdfPages} tree
@@ -62,9 +64,9 @@ class PdfPagesTree implements Serializable {
6264

6365
private final int leafSize = 10;
6466

65-
private List<PdfIndirectReference> pageRefs;
67+
private NullUnlimitedList<PdfIndirectReference> pageRefs;
6668
private List<PdfPages> parents;
67-
private List<PdfPage> pages;
69+
private NullUnlimitedList<PdfPage> pages;
6870
private PdfDocument document;
6971
private boolean generated = false;
7072
private PdfPages root;
@@ -78,9 +80,9 @@ class PdfPagesTree implements Serializable {
7880
*/
7981
public PdfPagesTree(PdfCatalog pdfCatalog) {
8082
this.document = pdfCatalog.getDocument();
81-
this.pageRefs = new ArrayList<>();
83+
this.pageRefs = new NullUnlimitedList<>();
8284
this.parents = new ArrayList<>();
83-
this.pages = new ArrayList<>();
85+
this.pages = new NullUnlimitedList<>();
8486
if (pdfCatalog.getPdfObject().containsKey(PdfName.Pages)) {
8587
PdfDictionary pages = pdfCatalog.getPdfObject().getAsDictionary(PdfName.Pages);
8688
if (pages == null)
@@ -425,10 +427,9 @@ private void loadPage(int pageNum) {
425427
} else {
426428
int from = parent.getFrom();
427429

428-
// Possible exception in case kids.getSize() < parent.getCount().
429-
// In any case parent.getCount() has higher priority.
430430
// NOTE optimization? when we already found needed index
431-
for (int i = 0; i < parent.getCount(); i++) {
431+
final int pageCount = Math.min(parent.getCount(), kids.size());
432+
for (int i = 0; i < pageCount; i++) {
432433
PdfObject kid = kids.get(i, false);
433434
if (kid instanceof PdfIndirectReference) {
434435
pageRefs.set(from + i, (PdfIndirectReference) kid);
@@ -486,4 +487,99 @@ private void correctPdfPagesFromProperty(int index, int correction) {
486487
}
487488
}
488489
}
490+
491+
/**
492+
* The class represents a list which allows null elements, but doesn't allocate a memory for them, in the rest of
493+
* cases it behaves like usual {@link ArrayList} and should have the same complexity (because keys are unique
494+
* integers, so collisions are impossible). Class doesn't implement {@code List} interface because it provides
495+
* only methods which are in use in {@link PdfPagesTree} class.
496+
*
497+
* @param <T> elements of the list
498+
*/
499+
static final class NullUnlimitedList<T> implements Serializable {
500+
private final Map<Integer, T> map = new HashMap<>();
501+
private int size = 0;
502+
503+
// O(1)
504+
public void add(T element) {
505+
if (element == null) {
506+
size++;
507+
return;
508+
}
509+
map.put(size++, element);
510+
}
511+
512+
// In worth scenario O(n^2) but it is mostly impossible because keys shouldn't have
513+
// collisions at all (they are integers). So in average should be O(n).
514+
public void add(int index, T element) {
515+
if (index < 0 || index > size) {
516+
return;
517+
}
518+
size++;
519+
// Shifts the element currently at that position (if any) and any
520+
// subsequent elements to the right (adds one to their indices).
521+
T previous = map.get(index);
522+
for (int i = index + 1; i < size; i++) {
523+
T currentToAdd = previous;
524+
previous = map.get(i);
525+
this.set(i, currentToAdd);
526+
}
527+
528+
this.set(index, element);
529+
}
530+
531+
// average O(1), worth O(n) (mostly impossible in case when keys are integers)
532+
public T get(int index) {
533+
return map.get(index);
534+
}
535+
536+
// average O(1), worth O(n) (mostly impossible in case when keys are integers)
537+
public void set(int index, T element) {
538+
if (element == null) {
539+
map.remove(index);
540+
} else {
541+
map.put(index, element);
542+
}
543+
}
544+
545+
// O(n)
546+
public int indexOf(T element) {
547+
if (element == null) {
548+
for (int i = 0; i < size; i++) {
549+
if (!map.containsKey(i)) {
550+
return i;
551+
}
552+
}
553+
return -1;
554+
}
555+
for (Map.Entry<Integer, T> entry : map.entrySet()) {
556+
if (element.equals(entry.getValue())) {
557+
return entry.getKey();
558+
}
559+
}
560+
return -1;
561+
}
562+
563+
// In worth scenario O(n^2) but it is mostly impossible because keys shouldn't have
564+
// collisions at all (they are integers). So in average should be O(n).
565+
public void remove(int index) {
566+
if (index < 0 || index >= size) {
567+
return;
568+
}
569+
map.remove(index);
570+
// Shifts any subsequent elements to the left (subtracts one from their indices).
571+
T previous = map.get(size - 1);
572+
for (int i = size - 2; i >= index; i--) {
573+
T current = previous;
574+
previous = map.get(i);
575+
this.set(i, current);
576+
}
577+
map.remove(--size);
578+
}
579+
580+
// O(1)
581+
public int size() {
582+
return size;
583+
}
584+
}
489585
}

kernel/src/test/java/com/itextpdf/kernel/pdf/PdfPagesTest.java

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,11 @@ This file is part of the iText (R) project.
5656
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
5757
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
5858
import com.itextpdf.kernel.utils.CompareTool;
59+
import com.itextpdf.test.AssertUtil;
5960
import com.itextpdf.test.ExtendedITextTest;
6061
import com.itextpdf.test.annotations.LogMessage;
6162
import com.itextpdf.test.annotations.LogMessages;
6263
import com.itextpdf.test.annotations.type.IntegrationTest;
63-
import org.junit.Assert;
64-
import org.junit.BeforeClass;
65-
import org.junit.Test;
66-
import org.junit.experimental.categories.Category;
6764

6865
import java.io.ByteArrayOutputStream;
6966
import java.io.IOException;
@@ -73,6 +70,10 @@ This file is part of the iText (R) project.
7370
import java.util.List;
7471
import java.util.Random;
7572
import java.util.Set;
73+
import org.junit.Assert;
74+
import org.junit.BeforeClass;
75+
import org.junit.Test;
76+
import org.junit.experimental.categories.Category;
7677

7778
@Category(IntegrationTest.class)
7879
public class PdfPagesTest extends ExtendedITextTest {
@@ -86,6 +87,25 @@ public static void setup() {
8687
createDestinationFolder(destinationFolder);
8788
}
8889

90+
@Test
91+
public void hugeNumberOfPagesWithOnePageTest() throws IOException {
92+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(sourceFolder + "hugeNumberOfPagesWithOnePage.pdf"),
93+
new PdfWriter(new ByteArrayOutputStream()));
94+
PdfPage page = new PdfPage(pdfDoc, pdfDoc.getDefaultPageSize());
95+
AssertUtil.doesNotThrow(() -> pdfDoc.addPage(1, page));
96+
}
97+
98+
@Test
99+
public void countDontCorrespondToRealTest() throws IOException {
100+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(sourceFolder + "countDontCorrespondToReal.pdf"),
101+
new PdfWriter(new ByteArrayOutputStream()));
102+
PdfPage page = new PdfPage(pdfDoc, pdfDoc.getDefaultPageSize());
103+
AssertUtil.doesNotThrow(() -> pdfDoc.addPage(1, page));
104+
105+
// we don't expect that Count will be different from real number of pages
106+
Assert.assertThrows(NullPointerException.class, () -> pdfDoc.close());
107+
}
108+
89109
@Test
90110
public void simplePagesTest() throws IOException {
91111
String filename = "simplePagesTest.pdf";
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2022 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is free software; you can redistribute it and/or modify
7+
it under the terms of the GNU Affero General Public License version 3
8+
as published by the Free Software Foundation with the addition of the
9+
following permission added to Section 15 as permitted in Section 7(a):
10+
FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
11+
ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
12+
OF THIRD PARTY RIGHTS
13+
14+
This program is distributed in the hope that it will be useful, but
15+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16+
or FITNESS FOR A PARTICULAR PURPOSE.
17+
See the GNU Affero General Public License for more details.
18+
You should have received a copy of the GNU Affero General Public License
19+
along with this program; if not, see http://www.gnu.org/licenses or write to
20+
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21+
Boston, MA, 02110-1301 USA, or download the license from the following URL:
22+
http://itextpdf.com/terms-of-use/
23+
24+
The interactive user interfaces in modified source and object code versions
25+
of this program must display Appropriate Legal Notices, as required under
26+
Section 5 of the GNU Affero General Public License.
27+
28+
In accordance with Section 7(b) of the GNU Affero General Public License,
29+
a covered work must retain the producer line in every PDF that is created
30+
or manipulated using iText.
31+
32+
You can be released from the requirements of the license by purchasing
33+
a commercial license. Buying such a license is mandatory as soon as you
34+
develop commercial activities involving the iText software without
35+
disclosing the source code of your own applications.
36+
These activities include: offering paid services to customers as an ASP,
37+
serving PDFs on the fly in a web application, shipping iText with a closed
38+
source product.
39+
40+
For more information, please contact iText Software Corp. at this
41+
42+
*/
43+
package com.itextpdf.kernel.pdf;
44+
45+
import com.itextpdf.kernel.pdf.PdfPagesTree.NullUnlimitedList;
46+
import com.itextpdf.test.ExtendedITextTest;
47+
import com.itextpdf.test.annotations.type.UnitTest;
48+
49+
import org.junit.Assert;
50+
import org.junit.Test;
51+
import org.junit.experimental.categories.Category;
52+
53+
@Category(UnitTest.class)
54+
public class PdfPagesTreeTest extends ExtendedITextTest {
55+
@Test
56+
public void nullUnlimitedListAddTest() {
57+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
58+
list.add("hey");
59+
list.add("bye");
60+
Assert.assertEquals(2, list.size());
61+
list.add(-1, "hello");
62+
list.add(3, "goodbye");
63+
Assert.assertEquals(2, list.size());
64+
}
65+
66+
@Test
67+
public void nullUnlimitedListIndexOfTest() {
68+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
69+
list.add("hey");
70+
list.add(null);
71+
list.add("bye");
72+
list.add(null);
73+
Assert.assertEquals(4, list.size());
74+
Assert.assertEquals(1, list.indexOf(null));
75+
}
76+
77+
@Test
78+
public void nullUnlimitedListRemoveTest() {
79+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
80+
list.add("hey");
81+
list.add("bye");
82+
Assert.assertEquals(2, list.size());
83+
list.remove(-1);
84+
list.remove(2);
85+
Assert.assertEquals(2, list.size());
86+
}
87+
}

0 commit comments

Comments
 (0)