Skip to content

Commit deb658a

Browse files
committed
Provide a NullUnlimitedList implementation to prevent OOM exception while opening document with enormous page count
DEVSIX-7793
1 parent 12cb9d9 commit deb658a

File tree

5 files changed

+167
-23
lines changed

5 files changed

+167
-23
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPagesTree.java

Lines changed: 107 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,20 @@ This file is part of the iText (R) project.
4343
*/
4444
package com.itextpdf.kernel.pdf;
4545

46-
import com.itextpdf.io.logs.IoLogMessageConstant;
4746
import com.itextpdf.commons.utils.MessageFormatUtil;
48-
import com.itextpdf.kernel.exceptions.PdfException;
47+
import com.itextpdf.io.logs.IoLogMessageConstant;
4948
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
49+
import com.itextpdf.kernel.exceptions.PdfException;
5050

51+
import java.util.ArrayList;
52+
import java.util.HashMap;
5153
import java.util.HashSet;
54+
import java.util.List;
55+
import java.util.Map;
5256
import java.util.Set;
5357
import org.slf4j.Logger;
5458
import org.slf4j.LoggerFactory;
5559

56-
import java.util.ArrayList;
57-
import java.util.List;
58-
5960
/**
6061
* Algorithm for construction {@link PdfPages} tree
6162
*/
@@ -65,9 +66,9 @@ class PdfPagesTree {
6566

6667
private final int leafSize = DEFAULT_LEAF_SIZE;
6768

68-
private List<PdfIndirectReference> pageRefs;
69+
private NullUnlimitedList<PdfIndirectReference> pageRefs;
6970
private List<PdfPages> parents;
70-
private List<PdfPage> pages;
71+
private NullUnlimitedList<PdfPage> pages;
7172
private PdfDocument document;
7273
private boolean generated = false;
7374
private PdfPages root;
@@ -81,9 +82,9 @@ class PdfPagesTree {
8182
*/
8283
public PdfPagesTree(PdfCatalog pdfCatalog) {
8384
this.document = pdfCatalog.getDocument();
84-
this.pageRefs = new ArrayList<>();
85+
this.pageRefs = new NullUnlimitedList<>();
8586
this.parents = new ArrayList<>();
86-
this.pages = new ArrayList<>();
87+
this.pages = new NullUnlimitedList<>();
8788
if (pdfCatalog.getPdfObject().containsKey(PdfName.Pages)) {
8889
PdfDictionary pages = pdfCatalog.getPdfObject().getAsDictionary(PdfName.Pages);
8990
if (pages == null) {
@@ -472,10 +473,9 @@ private void loadPage(int pageNum, Set<PdfIndirectReference> processedParents) {
472473
} else {
473474
int from = parent.getFrom();
474475

475-
// Possible exception in case kids.getSize() < parent.getCount().
476-
// In any case parent.getCount() has higher priority.
477476
// NOTE optimization? when we already found needed index
478-
for (int i = 0; i < parent.getCount(); i++) {
477+
final int pageCount = Math.min(parent.getCount(), kids.size());
478+
for (int i = 0; i < pageCount; i++) {
479479
PdfObject kid = kids.get(i, false);
480480
if (kid instanceof PdfIndirectReference) {
481481
pageRefs.set(from + i, (PdfIndirectReference) kid);
@@ -533,4 +533,99 @@ private void correctPdfPagesFromProperty(int index, int correction) {
533533
}
534534
}
535535
}
536+
537+
/**
538+
* The class represents a list which allows null elements, but doesn't allocate a memory for them, in the rest of
539+
* cases it behaves like usual {@link ArrayList} and should have the same complexity (because keys are unique
540+
* integers, so collisions are impossible). Class doesn't implement {@code List} interface because it provides
541+
* only methods which are in use in {@link PdfPagesTree} class.
542+
*
543+
* @param <T> elements of the list
544+
*/
545+
static final class NullUnlimitedList<T> {
546+
private final Map<Integer, T> map = new HashMap<>();
547+
private int size = 0;
548+
549+
// O(1)
550+
public void add(T element) {
551+
if (element == null) {
552+
size++;
553+
return;
554+
}
555+
map.put(size++, element);
556+
}
557+
558+
// In worth scenario O(n^2) but it is mostly impossible because keys shouldn't have
559+
// collisions at all (they are integers). So in average should be O(n).
560+
public void add(int index, T element) {
561+
if (index < 0 || index > size) {
562+
return;
563+
}
564+
size++;
565+
// Shifts the element currently at that position (if any) and any
566+
// subsequent elements to the right (adds one to their indices).
567+
T previous = map.get(index);
568+
for (int i = index + 1; i < size; i++) {
569+
T currentToAdd = previous;
570+
previous = map.get(i);
571+
this.set(i, currentToAdd);
572+
}
573+
574+
this.set(index, element);
575+
}
576+
577+
// average O(1), worth O(n) (mostly impossible in case when keys are integers)
578+
public T get(int index) {
579+
return map.get(index);
580+
}
581+
582+
// average O(1), worth O(n) (mostly impossible in case when keys are integers)
583+
public void set(int index, T element) {
584+
if (element == null) {
585+
map.remove(index);
586+
} else {
587+
map.put(index, element);
588+
}
589+
}
590+
591+
// O(n)
592+
public int indexOf(T element) {
593+
if (element == null) {
594+
for (int i = 0; i < size; i++) {
595+
if (!map.containsKey(i)) {
596+
return i;
597+
}
598+
}
599+
return -1;
600+
}
601+
for (Map.Entry<Integer, T> entry : map.entrySet()) {
602+
if (element.equals(entry.getValue())) {
603+
return entry.getKey();
604+
}
605+
}
606+
return -1;
607+
}
608+
609+
// In worth scenario O(n^2) but it is mostly impossible because keys shouldn't have
610+
// collisions at all (they are integers). So in average should be O(n).
611+
public void remove(int index) {
612+
if (index < 0 || index >= size) {
613+
return;
614+
}
615+
map.remove(index);
616+
// Shifts any subsequent elements to the left (subtracts one from their indices).
617+
T previous = map.get(size - 1);
618+
for (int i = size - 2; i >= index; i--) {
619+
T current = previous;
620+
previous = map.get(i);
621+
this.set(i, current);
622+
}
623+
map.remove(--size);
624+
}
625+
626+
// O(1)
627+
public int size() {
628+
return size;
629+
}
630+
}
536631
}

kernel/src/test/java/com/itextpdf/kernel/pdf/PdfPagesTest.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ This file is part of the iText (R) project.
4242
*/
4343
package com.itextpdf.kernel.pdf;
4444

45-
import com.itextpdf.io.logs.IoLogMessageConstant;
45+
import com.itextpdf.commons.utils.MessageFormatUtil;
4646
import com.itextpdf.io.image.ImageDataFactory;
47+
import com.itextpdf.io.logs.IoLogMessageConstant;
4748
import com.itextpdf.io.source.RandomAccessSourceFactory;
48-
import com.itextpdf.commons.utils.MessageFormatUtil;
49-
import com.itextpdf.kernel.exceptions.PdfException;
5049
import com.itextpdf.kernel.colors.ColorConstants;
5150
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
51+
import com.itextpdf.kernel.exceptions.PdfException;
5252
import com.itextpdf.kernel.geom.PageSize;
5353
import com.itextpdf.kernel.geom.Rectangle;
5454
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
@@ -62,10 +62,6 @@ This file is part of the iText (R) project.
6262
import com.itextpdf.test.annotations.LogMessage;
6363
import com.itextpdf.test.annotations.LogMessages;
6464
import com.itextpdf.test.annotations.type.IntegrationTest;
65-
import org.junit.Assert;
66-
import org.junit.BeforeClass;
67-
import org.junit.Test;
68-
import org.junit.experimental.categories.Category;
6965

7066
import java.io.ByteArrayOutputStream;
7167
import java.io.IOException;
@@ -75,6 +71,10 @@ This file is part of the iText (R) project.
7571
import java.util.List;
7672
import java.util.Random;
7773
import java.util.Set;
74+
import org.junit.Assert;
75+
import org.junit.BeforeClass;
76+
import org.junit.Test;
77+
import org.junit.experimental.categories.Category;
7878

7979
@Category(IntegrationTest.class)
8080
public class PdfPagesTest extends ExtendedITextTest {
@@ -87,6 +87,25 @@ public static void setup() {
8787
createDestinationFolder(DESTINATION_FOLDER);
8888
}
8989

90+
@Test
91+
public void hugeNumberOfPagesWithOnePageTest() throws IOException {
92+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SOURCE_FOLDER + "hugeNumberOfPagesWithOnePage.pdf"),
93+
new PdfWriter(new ByteArrayOutputStream()));
94+
PdfPage page = new PdfPage(pdfDoc, pdfDoc.getDefaultPageSize());
95+
AssertUtil.doesNotThrow(() -> pdfDoc.addPage(1, page));
96+
}
97+
98+
@Test
99+
public void countDontCorrespondToRealTest() throws IOException {
100+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(SOURCE_FOLDER + "countDontCorrespondToReal.pdf"),
101+
new PdfWriter(new ByteArrayOutputStream()));
102+
PdfPage page = new PdfPage(pdfDoc, pdfDoc.getDefaultPageSize());
103+
AssertUtil.doesNotThrow(() -> pdfDoc.addPage(1, page));
104+
105+
// we don't expect that Count will be different from real number of pages
106+
Assert.assertThrows(NullPointerException.class, () -> pdfDoc.close());
107+
}
108+
90109
@Test
91110
public void simplePagesTest() throws IOException {
92111
String filename = "simplePagesTest.pdf";

kernel/src/test/java/com/itextpdf/kernel/pdf/PdfPagesTreeTest.java

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,11 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.kernel.pdf;
2424

2525
import com.itextpdf.io.source.ByteArrayOutputStream;
26-
import com.itextpdf.kernel.events.PdfDocumentEvent;
27-
import com.itextpdf.kernel.pdf.layer.PdfLayer;
28-
import com.itextpdf.kernel.utils.CompareTool;
26+
import com.itextpdf.kernel.pdf.PdfPagesTree.NullUnlimitedList;
2927
import com.itextpdf.test.AssertUtil;
3028
import com.itextpdf.test.ExtendedITextTest;
3129
import com.itextpdf.test.annotations.type.UnitTest;
3230

33-
import java.io.IOException;
3431
import org.junit.Assert;
3532
import org.junit.Test;
3633
import org.junit.experimental.categories.Category;
@@ -42,4 +39,37 @@ public void generateTreeDocHasNoPagesTest() {
4239
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(new ByteArrayOutputStream()));
4340
AssertUtil.doesNotThrow(() -> pdfDoc.close());
4441
}
42+
43+
@Test
44+
public void nullUnlimitedListAddTest() {
45+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
46+
list.add("hey");
47+
list.add("bye");
48+
Assert.assertEquals(2, list.size());
49+
list.add(-1, "hello");
50+
list.add(3, "goodbye");
51+
Assert.assertEquals(2, list.size());
52+
}
53+
54+
@Test
55+
public void nullUnlimitedListIndexOfTest() {
56+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
57+
list.add("hey");
58+
list.add(null);
59+
list.add("bye");
60+
list.add(null);
61+
Assert.assertEquals(4, list.size());
62+
Assert.assertEquals(1, list.indexOf(null));
63+
}
64+
65+
@Test
66+
public void nullUnlimitedListRemoveTest() {
67+
NullUnlimitedList<String> list = new NullUnlimitedList<>();
68+
list.add("hey");
69+
list.add("bye");
70+
Assert.assertEquals(2, list.size());
71+
list.remove(-1);
72+
list.remove(2);
73+
Assert.assertEquals(2, list.size());
74+
}
4575
}

0 commit comments

Comments
 (0)