Skip to content

Commit f904bcc

Browse files
committed
Support reading only one page
Previously all of the pages that are leaves of the parent node of the requested page-leaf in a page tree were read on loadPage DEVSIX-3242, DEVSIX-3498
1 parent 37f2089 commit f904bcc

File tree

7 files changed

+152
-30
lines changed

7 files changed

+152
-30
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPages.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,16 @@ public void addPage(PdfDictionary page) {
9393
kids.add(page);
9494
incrementCount();
9595
page.put(PdfName.Parent, getPdfObject());
96+
page.setModified();
9697
}
9798

9899
public boolean addPage(int index, PdfPage pdfPage) {
99100
if (index < from || index > from + getCount())
100101
return false;
101102
kids.add(index - from, pdfPage.getPdfObject());
102103
pdfPage.getPdfObject().put(PdfName.Parent, getPdfObject());
104+
pdfPage.setModified();
103105
incrementCount();
104-
setModified();
105106
return true;
106107
}
107108

@@ -117,6 +118,7 @@ public void addPages(PdfPages pdfPages) {
117118
kids.add(pdfPages.getPdfObject());
118119
count.setValue(count.intValue() + pdfPages.getCount());
119120
pdfPages.getPdfObject().put(PdfName.Parent, getPdfObject());
121+
pdfPages.setModified();
120122
setModified();
121123
}
122124

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPagesTree.java

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class PdfPagesTree implements Serializable {
6969
private boolean generated = false;
7070
private PdfPages root;
7171

72+
private static final Logger LOGGER = LoggerFactory.getLogger(PdfPagesTree.class);
73+
7274
/**
7375
* Creates a PdfPages tree.
7476
*
@@ -113,10 +115,15 @@ public PdfPage getPage(int pageNum) {
113115
loadPage(pageNum);
114116
if (pageRefs.get(pageNum) != null) {
115117
int parentIndex = findPageParent(pageNum);
116-
pdfPage = new PdfPage((PdfDictionary) pageRefs.get(pageNum).getRefersTo());
117-
pdfPage.parentPages = parents.get(parentIndex);
118+
PdfObject pageObject = pageRefs.get(pageNum).getRefersTo();
119+
if (pageObject instanceof PdfDictionary) {
120+
pdfPage = new PdfPage((PdfDictionary) pageObject);
121+
pdfPage.parentPages = parents.get(parentIndex);
122+
} else {
123+
LOGGER.error(MessageFormatUtil.format(LogMessageConstant.PAGE_TREE_IS_BROKEN_FAILED_TO_RETRIEVE_PAGE, pageNum + 1));
124+
}
118125
} else {
119-
LoggerFactory.getLogger(getClass()).error(MessageFormatUtil.format(LogMessageConstant.PAGE_TREE_IS_BROKEN_FAILED_TO_RETRIEVE_PAGE, pageNum + 1));
126+
LOGGER.error(MessageFormatUtil.format(LogMessageConstant.PAGE_TREE_IS_BROKEN_FAILED_TO_RETRIEVE_PAGE, pageNum + 1));
120127
}
121128
pages.set(pageNum, pdfPage);
122129
}
@@ -244,8 +251,7 @@ public void addPage(int index, PdfPage pdfPage) {
244251
public PdfPage removePage(int pageNum) {
245252
PdfPage pdfPage = getPage(pageNum);
246253
if (pdfPage.isFlushed()) {
247-
Logger logger = LoggerFactory.getLogger(PdfPage.class);
248-
logger.warn(LogMessageConstant.REMOVING_PAGE_HAS_ALREADY_BEEN_FLUSHED);
254+
LOGGER.warn(LogMessageConstant.REMOVING_PAGE_HAS_ALREADY_BEEN_FLUSHED);
249255
}
250256
if (internalRemovePage(--pageNum)) {
251257
return pdfPage;
@@ -355,10 +361,12 @@ private void loadPage(int pageNum) {
355361
findPdfPages = true;
356362
} else {
357363
// kids must be of type array
358-
359364
throw new PdfException(PdfException.InvalidPageStructure1).setMessageParams(pageNum + 1);
360365
}
361366
}
367+
if (document.getReader().isMemorySavingMode() && !findPdfPages && parent.getFrom() + i != pageNum) {
368+
page.release();
369+
}
362370
}
363371
if (findPdfPages) {
364372

@@ -367,6 +375,13 @@ private void loadPage(int pageNum) {
367375
List<PdfPages> newParents = new ArrayList<>(kids.size());
368376
PdfPages lastPdfPages = null;
369377
for (int i = 0; i < kids.size() && kidsCount > 0; i++) {
378+
/*
379+
* We don't release pdfPagesObject in the end of each loop because we enter this for-cycle only when parent has PdfPages kids.
380+
* If all of the kids are PdfPages, then there's nothing to release, because we don't release PdfPages at this point.
381+
* If there are kids that are instances of PdfPage, then there's no sense in releasing them:
382+
* in this case ParentTreeStructure is being rebuilt by inserting an intermediate PdfPages between the parent and a PdfPage,
383+
* thus modifying the page object by resetting its parent, thus making it impossible to release the object.
384+
*/
370385
PdfDictionary pdfPagesObject = kids.getAsDictionary(i);
371386
if (pdfPagesObject.getAsArray(PdfName.Kids) == null) {
372387
// pdfPagesObject is PdfPage
@@ -414,12 +429,13 @@ private void loadPage(int pageNum) {
414429
// In any case parent.getCount() has higher priority.
415430
// NOTE optimization? when we already found needed index
416431
for (int i = 0; i < parent.getCount(); i++) {
417-
PdfDictionary kid = kids.getAsDictionary(i);
418-
419-
// make sure it's a dictionary
420-
if (kid != null) {
432+
PdfObject kid = kids.get(i, false);
433+
if (kid instanceof PdfIndirectReference) {
434+
pageRefs.set(from + i, (PdfIndirectReference) kid);
435+
} else {
421436
pageRefs.set(from + i, kid.getIndirectReference());
422437
}
438+
423439
}
424440
}
425441
}

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfReader.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ public class PdfReader implements Closeable, Serializable {
8585

8686
private boolean unethicalReading;
8787

88+
private boolean memorySavingMode;
89+
8890
//indicate nearest first Indirect reference object which includes current reading the object, using for PdfString decrypt
8991
private PdfIndirectReference currentIndirectReference;
9092

@@ -201,6 +203,11 @@ public PdfReader setUnethicalReading(boolean unethicalReading) {
201203
return this;
202204
}
203205

206+
public PdfReader setMemorySavingMode(boolean memorySavingMode) {
207+
this.memorySavingMode = memorySavingMode;
208+
return this;
209+
}
210+
204211
/**
205212
* Gets whether {@link #close()} method shall close input stream.
206213
*
@@ -1211,6 +1218,10 @@ protected void rebuildXref() throws IOException {
12111218
throw new PdfException(PdfException.TrailerNotFound);
12121219
}
12131220

1221+
boolean isMemorySavingMode() {
1222+
return memorySavingMode;
1223+
}
1224+
12141225
private void readDecryptObj() {
12151226
if (encrypted)
12161227
return;

kernel/src/test/java/com/itextpdf/kernel/pdf/PdfPagesTest.java

Lines changed: 112 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ This file is part of the iText (R) project.
5858
import com.itextpdf.test.annotations.LogMessage;
5959
import com.itextpdf.test.annotations.LogMessages;
6060
import com.itextpdf.test.annotations.type.IntegrationTest;
61+
6162
import org.junit.Assert;
6263
import org.junit.BeforeClass;
6364
import org.junit.Rule;
@@ -144,22 +145,26 @@ public void reversePagesTest() throws IOException {
144145
@Test
145146
public void reversePagesTest2() throws Exception {
146147
String filename = "1000PagesDocument_reversed.pdf";
147-
PdfDocument pdfDoc = new PdfDocument(new PdfReader(sourceFolder + "1000PagesDocument.pdf"), new PdfWriter(destinationFolder + filename));
148+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(sourceFolder + "1000PagesDocument.pdf"),
149+
new PdfWriter(destinationFolder + filename));
148150
int n = pdfDoc.getNumberOfPages();
149151
for (int i = n - 1; i > 0; --i) {
150152
pdfDoc.movePage(i, n + 1);
151153
}
152154
pdfDoc.close();
153-
new CompareTool().compareByContent(destinationFolder + filename, sourceFolder + "cmp_" + filename, destinationFolder, "diff");
155+
new CompareTool()
156+
.compareByContent(destinationFolder + filename, sourceFolder + "cmp_" + filename, destinationFolder,
157+
"diff");
154158
}
155159

156160
@Test
157161
public void randomObjectPagesTest() throws IOException {
158162
String filename = "randomObjectPagesTest.pdf";
159163
int pageCount = 10000;
160164
int[] indexes = new int[pageCount];
161-
for (int i = 0; i < indexes.length; i++)
165+
for (int i = 0; i < indexes.length; i++) {
162166
indexes[i] = i + 1;
167+
}
163168

164169
Random rnd = new Random();
165170
for (int i = indexes.length - 1; i > 0; i--) {
@@ -196,8 +201,9 @@ public void randomNumberPagesTest() throws IOException {
196201
String filename = "randomNumberPagesTest.pdf";
197202
int pageCount = 3000;
198203
int[] indexes = new int[pageCount];
199-
for (int i = 0; i < indexes.length; i++)
204+
for (int i = 0; i < indexes.length; i++) {
200205
indexes[i] = i + 1;
206+
}
201207

202208
Random rnd = new Random();
203209
for (int i = indexes.length - 1; i > 0; i--) {
@@ -245,8 +251,9 @@ public void insertFlushedPageTest() {
245251
pdfDoc.addPage(1, page);
246252
pdfDoc.close();
247253
} catch (PdfException e) {
248-
if (PdfException.FlushedPageCannotBeAddedOrInserted.equals(e.getMessage()))
254+
if (PdfException.FlushedPageCannotBeAddedOrInserted.equals(e.getMessage())) {
249255
error = true;
256+
}
250257
}
251258

252259
Assert.assertTrue(error);
@@ -267,8 +274,9 @@ public void addFlushedPageTest() {
267274
pdfDoc.addPage(page);
268275
pdfDoc.close();
269276
} catch (PdfException e) {
270-
if (PdfException.FlushedPageCannotBeAddedOrInserted.equals(e.getMessage()))
277+
if (PdfException.FlushedPageCannotBeAddedOrInserted.equals(e.getMessage())) {
271278
error = true;
279+
}
272280
}
273281

274282
Assert.assertTrue(error);
@@ -289,7 +297,6 @@ public void removeFlushedPage() throws IOException {
289297
removedPage.flush();
290298
pdfDoc.removePage(removedPage);
291299

292-
293300
for (int i = 0; i < pageCount; i++) {
294301
PdfPage page = pdfDoc.addNewPage();
295302
page.getPdfObject().put(PageNum, new PdfNumber(i + 1));
@@ -323,8 +330,9 @@ int verifyIntegrity(PdfPagesTree pagesTree) {
323330
List<PdfPages> parents = pagesTree.getParents();
324331
int from = 0;
325332
for (int i = 0; i < parents.size(); i++) {
326-
if (parents.get(i).getFrom() != from)
333+
if (parents.get(i).getFrom() != from) {
327334
return i;
335+
}
328336
from = parents.get(i).getFrom() + parents.get(i).getCount();
329337
}
330338
return -1;
@@ -390,7 +398,7 @@ public void getPageByDictionary() throws IOException {
390398
String filename = sourceFolder + "1000PagesDocument.pdf";
391399
PdfReader reader = new PdfReader(filename);
392400
PdfDocument pdfDoc = new PdfDocument(reader);
393-
PdfObject[] pageDictionaries = new PdfObject[]{
401+
PdfObject[] pageDictionaries = new PdfObject[] {
394402
pdfDoc.getPdfObject(4),
395403
pdfDoc.getPdfObject(255),
396404
pdfDoc.getPdfObject(512),
@@ -415,7 +423,8 @@ public void removePageWithFormFieldsTest() throws IOException {
415423
PdfDocument pdfDoc = new PdfDocument(new PdfReader(filename));
416424
pdfDoc.removePage(1);
417425

418-
PdfArray fields = pdfDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm).getAsArray(PdfName.Fields);
426+
PdfArray fields = pdfDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm)
427+
.getAsArray(PdfName.Fields);
419428
PdfDictionary field = (PdfDictionary) fields.get(0);
420429
PdfDictionary kid = (PdfDictionary) field.getAsArray(PdfName.Kids).get(0);
421430
Assert.assertEquals(6, kid.keySet().size());
@@ -443,11 +452,15 @@ public void getPageSizeWithInheritedMediaBox() throws IOException {
443452
public void pageThumbnailTest() throws Exception {
444453
String filename = "pageThumbnail.pdf";
445454
String imageSrc = "icon.jpg";
446-
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(destinationFolder + filename).setCompressionLevel(CompressionConstants.NO_COMPRESSION));
447-
PdfPage page = pdfDoc.addNewPage().setThumbnailImage(new PdfImageXObject(ImageDataFactory.create(sourceFolder + imageSrc)));
455+
PdfDocument pdfDoc = new PdfDocument(
456+
new PdfWriter(destinationFolder + filename).setCompressionLevel(CompressionConstants.NO_COMPRESSION));
457+
PdfPage page = pdfDoc.addNewPage()
458+
.setThumbnailImage(new PdfImageXObject(ImageDataFactory.create(sourceFolder + imageSrc)));
448459
new PdfCanvas(page).setFillColor(ColorConstants.RED).rectangle(100, 100, 400, 400).fill();
449460
pdfDoc.close();
450-
new CompareTool().compareByContent(destinationFolder + filename, sourceFolder + "cmp_" + filename, destinationFolder, "diff");
461+
new CompareTool()
462+
.compareByContent(destinationFolder + filename, sourceFolder + "cmp_" + filename, destinationFolder,
463+
"diff");
451464
}
452465

453466
@Test
@@ -498,7 +511,8 @@ private boolean testPageTreeParentsValid(String src) throws com.itextpdf.io.IOEx
498511
PdfDictionary page_root = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.Pages);
499512
for (int x = 1; x < pdfDocument.getNumberOfPdfObjects(); x++) {
500513
PdfObject obj = pdfDocument.getPdfObject(x);
501-
if (obj != null && obj.isDictionary() && ((PdfDictionary) obj).getAsName(PdfName.Type) != null && ((PdfDictionary) obj).getAsName(PdfName.Type).equals(PdfName.Pages)) {
514+
if (obj != null && obj.isDictionary() && ((PdfDictionary) obj).getAsName(PdfName.Type) != null
515+
&& ((PdfDictionary) obj).getAsName(PdfName.Type).equals(PdfName.Pages)) {
502516
if (obj != page_root) {
503517
PdfDictionary parent = ((PdfDictionary) obj).getAsDictionary(PdfName.Parent);
504518
if (parent == null) {
@@ -554,7 +568,8 @@ public void pageGetMediaBoxTooManyArgumentsTest() throws IOException {
554568
@Test
555569
public void pageGetMediaBoxNotEnoughArgumentsTest() throws IOException {
556570
junitExpectedException.expect(PdfException.class);
557-
junitExpectedException.expectMessage(MessageFormatUtil.format(PdfException.WRONGMEDIABOXSIZETOOFEWARGUMENTS,3));
571+
junitExpectedException
572+
.expectMessage(MessageFormatUtil.format(PdfException.WRONGMEDIABOXSIZETOOFEWARGUMENTS, 3));
558573

559574
PdfReader reader = new PdfReader(sourceFolder + "helloWorldMediaboxNotEnoughArguments.pdf");
560575

@@ -565,18 +580,94 @@ public void pageGetMediaBoxNotEnoughArgumentsTest() throws IOException {
565580
Assert.fail("Exception was not thrown");
566581
}
567582

583+
@Test
584+
public void insertIntermediateParentTest() throws IOException {
585+
String filename = "insertIntermediateParentTest.pdf";
586+
PdfReader reader = new PdfReader(sourceFolder + filename);
587+
PdfWriter writer = new PdfWriter(new ByteArrayOutputStream());
588+
PdfDocument pdfDoc = new PdfDocument(reader, writer, new StampingProperties().useAppendMode());
589+
590+
PdfPage page = pdfDoc.getFirstPage();
591+
592+
PdfPages pdfPages = new PdfPages(page.parentPages.getFrom(), pdfDoc, page.parentPages);
593+
page.parentPages.getKids().set(0, pdfPages.getPdfObject());
594+
page.parentPages.decrementCount();
595+
pdfPages.addPage(page.getPdfObject());
596+
597+
pdfDoc.close();
598+
599+
Assert.assertTrue(page.getPdfObject().isModified());
600+
}
601+
568602
@Test
569603
public void verifyPagesAreNotReadOnOpenTest() throws IOException {
570604
String srcFile = sourceFolder + "taggedOnePage.pdf";
571605
CustomPdfReader reader = new CustomPdfReader(srcFile);
572606
PdfDocument document = new PdfDocument(reader);
573607
document.close();
574-
Assert.assertFalse(reader.undesiredPageHasBeenRead);
608+
Assert.assertFalse(reader.pagesAreRead);
575609
}
576610

611+
@Test
612+
public void readPagesInBlocksTest() throws IOException {
613+
String srcFile = sourceFolder + "docWithBalancedPageTree.pdf";
614+
int maxAmountOfPagesReadAtATime = 0;
615+
CustomPdfReader reader = new CustomPdfReader(srcFile);
616+
PdfDocument document = new PdfDocument(reader);
617+
for (int page = 1; page <= document.getNumberOfPages(); page++) {
618+
document.getPage(page);
619+
if (reader.numOfPagesRead > maxAmountOfPagesReadAtATime) {
620+
maxAmountOfPagesReadAtATime = reader.numOfPagesRead;
621+
}
622+
reader.numOfPagesRead = 0;
623+
}
624+
625+
Assert.assertEquals(111, document.getNumberOfPages());
626+
Assert.assertEquals(10, maxAmountOfPagesReadAtATime);
627+
628+
document.close();
629+
}
630+
631+
@Test
632+
public void readSinglePageTest() throws IOException {
633+
String srcFile = sourceFolder + "allPagesAreLeaves.pdf";
634+
CustomPdfReader reader = new CustomPdfReader(srcFile);
635+
reader.setMemorySavingMode(true);
636+
PdfDocument document = new PdfDocument(reader);
637+
int amountOfPages = document.getNumberOfPages();
638+
639+
PdfPages pdfPages = document.catalog.getPageTree().getRoot();
640+
PdfArray pageIndRefArray = ((PdfDictionary) pdfPages.getPdfObject()).getAsArray(PdfName.Kids);
641+
642+
document.getPage(amountOfPages);
643+
Assert.assertEquals(1, getAmountOfReadPages(pageIndRefArray));
644+
645+
document.getPage(amountOfPages / 2);
646+
Assert.assertEquals(2, getAmountOfReadPages(pageIndRefArray));
647+
648+
document.getPage(1);
649+
Assert.assertEquals(3, getAmountOfReadPages(pageIndRefArray));
650+
651+
document.close();
652+
}
653+
654+
private int getAmountOfReadPages(PdfArray pageIndRefArray) {
655+
int amountOfLoadedPages = 0;
656+
for (int i = 0; i < pageIndRefArray.size(); i++) {
657+
if (((PdfIndirectReference) pageIndRefArray.get(i, false)).refersTo != null) {
658+
amountOfLoadedPages++;
659+
}
660+
}
661+
return amountOfLoadedPages;
662+
}
663+
664+
665+
577666
private class CustomPdfReader extends PdfReader {
578667

579-
public boolean undesiredPageHasBeenRead = false;
668+
public boolean pagesAreRead = false;
669+
670+
public int numOfPagesRead = 0;
580671

581672
public CustomPdfReader(String filename) throws IOException {
582673
super(filename);
@@ -585,8 +676,10 @@ public CustomPdfReader(String filename) throws IOException {
585676
@Override
586677
protected PdfObject readObject(PdfIndirectReference reference) {
587678
PdfObject toReturn = super.readObject(reference);
588-
if (reference.getObjNumber() == 6) {
589-
undesiredPageHasBeenRead = true;
679+
if (toReturn instanceof PdfDictionary
680+
&& PdfName.Page.equals(((PdfDictionary) toReturn).get(PdfName.Type))) {
681+
numOfPagesRead++;
682+
pagesAreRead = true;
590683
}
591684
return toReturn;
592685
}

0 commit comments

Comments
 (0)