Skip to content

Commit a80bb1b

Browse files
committed
PDFBOX-6036: avoid overlapping object keys when importing pages from another pdf
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930615 13f79535-47bb-0310-9956-ffa450edef68
1 parent 832593c commit a80bb1b

File tree

4 files changed

+69
-40
lines changed

4 files changed

+69
-40
lines changed

pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -772,28 +772,30 @@ public COSUpdateState getUpdateState()
772772

773773
/**
774774
* Collects all indirect objects numbers within this COSArray and all included dictionaries. It is used to avoid
775-
* mixed up object numbers when importing an existing page to another pdf.
775+
* overlapping object numbers when importing an existing page to another pdf.
776776
*
777777
* Expert use only. You might run into an endless recursion if choosing a wrong starting point.
778778
*
779779
* @param indirectObjects a collection of already found indirect objects.
780780
*
781781
*/
782-
public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
782+
protected Collection<COSObjectKey> resetObjectKeys(Collection<COSObjectKey> indirectObjects)
783783
{
784784
if (indirectObjects == null)
785785
{
786-
return;
786+
return indirectObjects;
787787
}
788788
COSObjectKey key = getKey();
789789
if (key != null)
790790
{
791791
// avoid endless recursions
792792
if (indirectObjects.contains(key))
793793
{
794-
return;
794+
return indirectObjects;
795795
}
796796
indirectObjects.add(key);
797+
// reset key
798+
setKey(null);
797799
}
798800
for (COSBase cosBase : objects)
799801
{
@@ -808,25 +810,29 @@ public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
808810
{
809811
continue;
810812
}
811-
// dereference object
812-
cosBase = ((COSObject) cosBase).getObject();
813+
// dereference object first
814+
COSBase dereferencedObject = ((COSObject) cosBase).getObject();
815+
// reset key
816+
cosBase.setKey(null);
817+
cosBase = dereferencedObject;
813818
}
814819
if (cosBase instanceof COSDictionary)
815820
{
816-
// descend to included dictionary to collect all included indirect objects
817-
((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects);
821+
// descend to included dictionary to reset all included indirect objects
822+
((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
818823
}
819824
else if (cosBase instanceof COSArray)
820825
{
821-
// descend to included array to collect all included indirect objects
822-
((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
826+
// descend to included array to reset all included indirect objects
827+
((COSArray) cosBase).resetObjectKeys(indirectObjects);
823828
}
824829
else if (indirectObjectKey != null)
825830
{
826831
// add key for all indirect objects other than COSDictionary/COSArray
827832
indirectObjects.add(indirectObjectKey);
828833
}
829834
}
835+
return indirectObjects;
830836
}
831837

832838
// wrap indirect objects

pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.Arrays;
2323
import java.util.Calendar;
2424
import java.util.Collection;
25+
import java.util.HashSet;
2526
import java.util.LinkedHashMap;
2627
import java.util.List;
2728
import java.util.Map;
@@ -1424,30 +1425,40 @@ public COSUpdateState getUpdateState()
14241425
return updateState;
14251426
}
14261427

1428+
/**
1429+
* Reset all object keys to avoid overlapping numbers when saving the new pdf.
1430+
*/
1431+
public void resetImportedObjectKeys()
1432+
{
1433+
resetObjectKeys(new HashSet<>()).clear();
1434+
}
1435+
14271436
/**
14281437
* Collects all indirect objects numbers within this dictionary and all included dictionaries. It is used to avoid
1429-
* mixed up object numbers when importing an existing page to another pdf.
1438+
* overlapping object numbers when importing an existing page to another pdf.
14301439
*
14311440
* Expert use only. You might run into an endless recursion if choosing a wrong starting point.
14321441
*
14331442
* @param indirectObjects a collection of already found indirect objects.
14341443
*
14351444
*/
1436-
public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
1445+
protected Collection<COSObjectKey> resetObjectKeys(Collection<COSObjectKey> indirectObjects)
14371446
{
14381447
if (indirectObjects == null)
14391448
{
1440-
return;
1449+
return indirectObjects;
14411450
}
14421451
COSObjectKey key = getKey();
14431452
if (key != null)
14441453
{
14451454
// avoid endless recursions
14461455
if (indirectObjects.contains(key))
14471456
{
1448-
return;
1457+
return indirectObjects;
14491458
}
14501459
indirectObjects.add(key);
1460+
// reset object key
1461+
setKey(null);
14511462
}
14521463
for (Entry<COSName, COSBase> entry : items.entrySet())
14531464
{
@@ -1460,30 +1471,33 @@ public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
14601471
{
14611472
continue;
14621473
}
1463-
// dereference object
1474+
// dereference object first
14641475
cosBase = ((COSObject) cosBase).getObject();
1476+
// reset object key
1477+
entry.getValue().setKey(null);
14651478
}
14661479
if (cosBase instanceof COSDictionary)
14671480
{
14681481
COSName entryKey = entry.getKey();
1469-
// descend to included dictionary to collect all included indirect objects
1482+
// descend to included dictionary to reset all included indirect objects
14701483
// skip PARENT and P references to avoid recursions
14711484
if (!COSName.PARENT.equals(entryKey) && !COSName.P.equals(entryKey))
14721485
{
1473-
((COSDictionary) cosBase).getIndirectObjectKeys(indirectObjects);
1486+
((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
14741487
}
14751488
}
14761489
else if (cosBase instanceof COSArray)
14771490
{
1478-
// descend to included array to collect all included indirect objects
1479-
((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
1491+
// descend to included array to reset all included indirect objects
1492+
((COSArray) cosBase).resetObjectKeys(indirectObjects);
14801493
}
14811494
else if (indirectObjectKey != null)
14821495
{
14831496
// add key for all indirect objects other than COSDictionary/COSArray
14841497
indirectObjects.add(indirectObjectKey);
14851498
}
14861499
}
1500+
return indirectObjects;
14871501
}
14881502

14891503
}

pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import java.io.OutputStream;
2929
import java.util.ArrayList;
3030
import java.util.Arrays;
31-
import java.util.Collection;
3231
import java.util.HashSet;
3332
import java.util.Iterator;
3433
import java.util.List;
@@ -45,7 +44,6 @@
4544
import org.apache.pdfbox.cos.COSInteger;
4645
import org.apache.pdfbox.cos.COSName;
4746
import org.apache.pdfbox.cos.COSObject;
48-
import org.apache.pdfbox.cos.COSObjectKey;
4947
import org.apache.pdfbox.cos.COSUpdateInfo;
5048
import org.apache.pdfbox.io.IOUtils;
5149
import org.apache.pdfbox.io.RandomAccessRead;
@@ -152,9 +150,6 @@ public class PDDocument implements Closeable
152150
// to make sure only one signature is added
153151
private boolean signatureAdded = false;
154152

155-
// cache for the key of all imported indirect objects
156-
private final Collection<COSObjectKey> indirectObjectKeys = new HashSet<>();
157-
158153
/**
159154
* Creates an empty PDF document.
160155
* You need to add at least one page for the document to be valid.
@@ -240,7 +235,6 @@ public PDDocument(COSDocument doc, RandomAccessRead source, AccessPermission per
240235
public void addPage(PDPage page)
241236
{
242237
getPages().add(page);
243-
setHighestImportedObjectNumber(page);
244238
}
245239

246240
/**
@@ -703,6 +697,8 @@ public PDPage importPage(PDPage page) throws IOException
703697
importedPage.getCOSObject().removeItem(COSName.PARENT);
704698
PDStream dest = new PDStream(this, page.getContents(), COSName.FLATE_DECODE);
705699
importedPage.setContents(dest);
700+
// reset imported object keys to avoid overlapping object numbers
701+
importedPage.getCOSObject().resetImportedObjectKeys();
706702
addPage(importedPage);
707703
importedPage.setCropBox(new PDRectangle(page.getCropBox().getCOSArray()));
708704
importedPage.setMediaBox(new PDRectangle(page.getMediaBox().getCOSArray()));
@@ -715,21 +711,6 @@ public PDPage importPage(PDPage page) throws IOException
715711
return importedPage;
716712
}
717713

718-
/**
719-
* Determine the highest object number from the imported page to avoid mixed up numbers when saving the new pdf.
720-
*
721-
* @param importedPage the imported page.
722-
*/
723-
private void setHighestImportedObjectNumber(PDPage importedPage)
724-
{
725-
importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
726-
long highestImportedNumber = indirectObjectKeys.stream().map(COSObjectKey::getNumber)
727-
.max(Long::compare).orElse(0L);
728-
long highestXRefObjectNumber = getDocument().getHighestXRefObjectNumber();
729-
getDocument().setHighestXRefObjectNumber(
730-
Math.max(highestXRefObjectNumber, highestImportedNumber));
731-
}
732-
733714
/**
734715
* This will get the low level document.
735716
*

pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import java.io.ByteArrayOutputStream;
2424
import java.io.File;
2525
import java.io.IOException;
26+
import java.net.URI;
27+
import java.net.URISyntaxException;
2628
import java.nio.file.Paths;
2729

2830
import org.apache.pdfbox.Loader;
@@ -148,4 +150,30 @@ private static byte[] edit(byte[] input) throws IOException
148150
}
149151
}
150152

153+
@Test
154+
void testPDFBox5752() throws IOException, URISyntaxException
155+
{
156+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
157+
byte[] emptyPDF = new URI(
158+
"https://issues.apache.org/jira/secure/attachment/13066015/empty.pdf").toURL()
159+
.openStream().readAllBytes();
160+
byte[] roboPDF = new URI(
161+
"https://issues.apache.org/jira/secure/attachment/13066016/roboto-14.pdf").toURL()
162+
.openStream().readAllBytes();
163+
try (PDDocument targetDoc = Loader.loadPDF(emptyPDF);
164+
PDDocument doc2 = Loader.loadPDF(roboPDF))
165+
{
166+
PDPage sourcePage = doc2.getPage(0);
167+
targetDoc.importPage(sourcePage);
168+
targetDoc.save(baos);
169+
}
170+
try (PDDocument targetDoc = Loader.loadPDF(baos.toByteArray()))
171+
{
172+
assertNotNull(targetDoc.getDocumentCatalog().getStructureTreeRoot());
173+
PDResources res = targetDoc.getPage(1).getResources();
174+
assertEquals("BCDEEE+Roboto-Regular", res.getFont(COSName.getPDFName("F1")).getName());
175+
assertEquals("BCDFEE+Roboto-Regular", res.getFont(COSName.getPDFName("F2")).getName());
176+
}
177+
}
178+
151179
}

0 commit comments

Comments
 (0)