Skip to content

Commit 7c32532

Browse files
committed
PDFBOX-5929: remove orphan annotations from the structure tree if annotations were removed from the page
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1922671 13f79535-47bb-0310-9956-ffa450edef68
1 parent 91f2db5 commit 7c32532

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,10 @@ private COSBase createDictionaryClone(COSBase src, COSBase dstParent, COSDiction
454454
// replace annotation with clone
455455
dstDict.setItem(COSName.OBJ, dstObj);
456456
}
457+
else
458+
{
459+
removePossibleOrphanAnnotation(srcObj, srcDict, currentPageDict, dstDict);
460+
}
457461
}
458462
else
459463
{
@@ -492,6 +496,35 @@ private COSBase createDictionaryClone(COSBase src, COSBase dstParent, COSDiction
492496
}
493497
return dstDict;
494498
}
499+
500+
private void removePossibleOrphanAnnotation(COSDictionary srcObj, COSDictionary srcDict,
501+
COSDictionary currentPageDict, COSDictionary dstDict)
502+
{
503+
// PDFBOX-5929: Check whether this is an "orphan" annotation that isn't in the page
504+
COSBase objType = srcObj.getDictionaryObject(COSName.TYPE);
505+
COSBase objSubtype = srcObj.getDictionaryObject(COSName.SUBTYPE);
506+
if (COSName.ANNOT.equals(objType) || COSName.LINK.equals(objSubtype))
507+
{
508+
COSDictionary srcPageDict = srcDict.getCOSDictionary(COSName.PG);
509+
if (srcPageDict == null)
510+
{
511+
// /Pg entry is not always on this level
512+
srcPageDict = currentPageDict;
513+
}
514+
if (srcPageDict != null)
515+
{
516+
COSArray annotationArray = srcPageDict.getCOSArray(COSName.ANNOTS);
517+
if (annotationArray == null || annotationArray.indexOfObject(srcObj) == -1)
518+
{
519+
// Ideally the entire OBJR entry should be removed.
520+
// Removing the OBJ entry is done to avoid potential page orphans
521+
// from the annotation destination.
522+
LOG.warn("An annotation OBJ that isn't in the page has been removed from the structure tree");
523+
dstDict.removeItem(COSName.OBJ);
524+
}
525+
}
526+
}
527+
}
495528
}
496529

497530
// Look for /StructParent and /StructParents and add them to the destination tree

pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,7 @@ else if (kdict.containsKey(COSName.NUMS))
924924
PDPage page = annotation.getPage();
925925
if (annotation instanceof PDAnnotationLink)
926926
{
927+
// PDFBOX-5928: check whether the destination of a link annotation is an orphan
927928
PDAnnotationLink link = (PDAnnotationLink) annotation;
928929
PDDestination destination = link.getDestination();
929930
if (destination == null)
@@ -1130,6 +1131,35 @@ void testSplitWithStructureTreeAndDestinations() throws IOException
11301131
}
11311132
}
11321133

1134+
/**
1135+
* PDFBOX-5929: Check that orphan annotations are removed from the structure tree if annotations
1136+
* were removed from the pages (don't do that!).
1137+
*
1138+
* @throws IOException
1139+
*/
1140+
@Test
1141+
void testSplitWithStructureTreeAndDestinationsAndRemovedAnnotations() throws IOException
1142+
{
1143+
try (PDDocument doc = Loader.loadPDF(new File(SRCDIR,"PDFBOX-5762-722238.pdf")))
1144+
{
1145+
Splitter splitter = new Splitter();
1146+
for (PDPage page : doc.getPages())
1147+
{
1148+
page.setAnnotations(Collections.emptyList());
1149+
}
1150+
splitter.setStartPage(1);
1151+
splitter.setEndPage(2);
1152+
splitter.setSplitAtPage(2);
1153+
List<PDDocument> splitResult = splitter.split(doc);
1154+
assertEquals(1, splitResult.size());
1155+
try (PDDocument dstDoc = splitResult.get(0))
1156+
{
1157+
assertEquals(2, dstDoc.getNumberOfPages());
1158+
checkForPageOrphans(dstDoc);
1159+
}
1160+
}
1161+
}
1162+
11331163
/**
11341164
* Check for the bug that happened in PDFBOX-5792, where a destination was outside a target
11351165
* document and hit an NPE in the next call of Splitter.fixDestinations().

0 commit comments

Comments
 (0)