Skip to content

Commit 41a8cfb

Browse files
committed
Support Link and Reference UA-2 related rules
DEVSIX-9002
1 parent b7c2cb3 commit 41a8cfb

File tree

4 files changed

+735
-1
lines changed

4 files changed

+735
-1
lines changed

pdfua/src/main/java/com/itextpdf/pdfua/checkers/PdfUA2Checker.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@ This file is part of the iText (R) project.
4848
import com.itextpdf.pdfua.checkers.utils.LayoutCheckUtil;
4949
import com.itextpdf.pdfua.checkers.utils.PdfUAValidationContext;
5050
import com.itextpdf.pdfua.checkers.utils.tables.TableCheckUtil;
51-
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2FormChecker;
5251
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2DestinationsChecker;
52+
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2FormChecker;
5353
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2FormulaChecker;
5454
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2HeadingsChecker;
55+
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2LinkChecker;
5556
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2ListChecker;
5657
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2NotesChecker;
5758
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2TableOfContentsChecker;
@@ -169,6 +170,7 @@ private void checkCatalog(PdfCatalog catalog) {
169170
PdfUA2FormChecker formChecker = new PdfUA2FormChecker(context);
170171
formChecker.checkFormFields(catalog.getPdfObject().getAsDictionary(PdfName.AcroForm));
171172
formChecker.checkWidgetAnnotations(this.pdfDocument);
173+
PdfUA2LinkChecker.checkLinkAnnotations(this.pdfDocument);
172174
}
173175

174176
/**
@@ -225,6 +227,7 @@ private void checkStructureTreeRoot(PdfStructTreeRoot structTreeRoot) {
225227
tagTreeIterator.addHandler(new PdfUA2NotesChecker.PdfUA2NotesHandler(context));
226228
tagTreeIterator.addHandler(new PdfUA2TableOfContentsChecker.PdfUA2TableOfContentsHandler(context));
227229
tagTreeIterator.addHandler(new PdfUA2FormulaChecker.PdfUA2FormulaTagHandler(context));
230+
tagTreeIterator.addHandler(new PdfUA2LinkChecker.PdfUA2LinkAnnotationHandler(context, pdfDocument));
228231
tagTreeIterator.traverse();
229232
}
230233
}
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2025 Apryse Group NV
4+
Authors: Apryse Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.pdfua.checkers.utils.ua2;
24+
25+
import com.itextpdf.kernel.pdf.PdfArray;
26+
import com.itextpdf.kernel.pdf.PdfDictionary;
27+
import com.itextpdf.kernel.pdf.PdfDocument;
28+
import com.itextpdf.kernel.pdf.PdfName;
29+
import com.itextpdf.kernel.pdf.PdfNameTree;
30+
import com.itextpdf.kernel.pdf.PdfObject;
31+
import com.itextpdf.kernel.pdf.PdfPage;
32+
import com.itextpdf.kernel.pdf.PdfString;
33+
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
34+
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
35+
import com.itextpdf.kernel.pdf.tagging.IStructureNode;
36+
import com.itextpdf.kernel.pdf.tagging.PdfObjRef;
37+
import com.itextpdf.kernel.pdf.tagging.PdfStructElem;
38+
import com.itextpdf.pdfua.checkers.utils.ContextAwareTagTreeIteratorHandler;
39+
import com.itextpdf.pdfua.checkers.utils.PdfUAValidationContext;
40+
import com.itextpdf.pdfua.exceptions.PdfUAConformanceException;
41+
import com.itextpdf.pdfua.exceptions.PdfUAExceptionMessageConstants;
42+
43+
import java.util.HashMap;
44+
import java.util.HashSet;
45+
import java.util.Map;
46+
import java.util.Set;
47+
48+
/**
49+
* Class that provides methods for checking PDF/UA-2 compliance of link annotations.
50+
*/
51+
public final class PdfUA2LinkChecker {
52+
private final PdfDocument pdfDoc;
53+
private final PdfUAValidationContext context;
54+
55+
private final Map<PdfObject, Set<IStructureNode>> destinationToStructParentsMap = new HashMap<>();
56+
57+
private PdfUA2LinkChecker(PdfUAValidationContext context, PdfDocument pdfDoc) {
58+
this.context = context;
59+
this.pdfDoc = pdfDoc;
60+
}
61+
62+
/**
63+
* Verifies that each link annotation present in the document is tagged.
64+
*
65+
* @param document the {@link PdfDocument} to check links for
66+
*/
67+
public static void checkLinkAnnotations(PdfDocument document) {
68+
int amountOfPages = document.getNumberOfPages();
69+
for (int i = 1; i <= amountOfPages; ++i) {
70+
PdfPage page = document.getPage(i);
71+
for (final PdfAnnotation annot : page.getAnnotations()) {
72+
if (!(annot instanceof PdfLinkAnnotation)) {
73+
continue;
74+
}
75+
if (annot.getStructParentIndex() == -1) {
76+
throw new PdfUAConformanceException(
77+
PdfUAExceptionMessageConstants.LINK_ANNOT_IS_NOT_NESTED_WITHIN_LINK_OR_REFERENCE);
78+
}
79+
}
80+
}
81+
}
82+
83+
/**
84+
* Checks that link annotation is enclosed in either a Link or Reference structure element.
85+
*
86+
* <p>
87+
* Also checks that link annotations that target different locations are in separate Link or Reference structure
88+
* elements, and multiple link annotations targeting the same location are included in a single Link or Reference
89+
* structure element.
90+
*
91+
* @param elem link annotation object reference in the structure tree
92+
*/
93+
private void checkLinkAnnotationStructureParent(IStructureNode elem) {
94+
if (!(elem instanceof PdfObjRef) || ((PdfObjRef) elem).getReferencedObject() == null) {
95+
return;
96+
}
97+
PdfName subtype = ((PdfObjRef) elem).getReferencedObject().getAsName(PdfName.Subtype);
98+
if (!PdfName.Link.equals(subtype)) {
99+
return;
100+
}
101+
102+
IStructureNode linkParent = elem.getParent();
103+
PdfStructElem parentLink = context.getElementIfRoleMatches(PdfName.Link, linkParent);
104+
if (parentLink == null) {
105+
PdfStructElem parentRef = context.getElementIfRoleMatches(PdfName.Reference, linkParent);
106+
if (parentRef == null) {
107+
throw new PdfUAConformanceException(
108+
PdfUAExceptionMessageConstants.LINK_ANNOT_IS_NOT_NESTED_WITHIN_LINK_OR_REFERENCE);
109+
}
110+
}
111+
checkStructDestinationsInLinkAndReference((PdfObjRef) elem);
112+
}
113+
114+
/**
115+
* Checks that link annotations that target different locations (destinations) are in separate Link or Reference
116+
* structure elements, and multiple link annotations targeting the same location are included in a single Link
117+
* or Reference structure element.
118+
*
119+
* @param objRef link annotation object reference in the structure tree
120+
*/
121+
private void checkStructDestinationsInLinkAndReference(PdfObjRef objRef) {
122+
IStructureNode parent = objRef.getParent();
123+
if (parent == null) {
124+
return;
125+
}
126+
PdfObject structDestination = getStructureDestinationObject(objRef.getReferencedObject());
127+
if (structDestination == null) {
128+
return;
129+
}
130+
131+
// In the map, key is a destination object from current link annotation, value is a set of Link or Reference
132+
// structure elements enclosing already checked links annotation with that same destination (actually, value
133+
// always contains either 0 or 1 parent, it's just more convenient to use set during checks).
134+
Set<IStructureNode> destinationStructParents = destinationToStructParentsMap.computeIfAbsent(structDestination,
135+
k -> new HashSet<>());
136+
137+
// Go through all parents: Link or Reference structure elements enclosing links with current destination.
138+
// It shall be the same single parent if present. Otherwise, exception will be thrown.
139+
for (IStructureNode parentNode : destinationStructParents) {
140+
if (!parent.equals(parentNode)) {
141+
throw new PdfUAConformanceException(
142+
PdfUAExceptionMessageConstants.SAME_LINKS_IN_DIFFERENT_STRUCT_ELEMS);
143+
}
144+
}
145+
// Go through all other already checked destinations. They shall have separate Link or Reference structure
146+
// elements, so no other parent should be equal to the current one. Otherwise, exception will be thrown.
147+
for (Map.Entry<PdfObject, Set<IStructureNode>> entry : destinationToStructParentsMap.entrySet()) {
148+
if (structDestination.equals(entry.getKey())) {
149+
// Skip current destination.
150+
continue;
151+
}
152+
for (IStructureNode parentNode : entry.getValue()) {
153+
if (parent.equals(parentNode)) {
154+
throw new PdfUAConformanceException(
155+
PdfUAExceptionMessageConstants.DIFFERENT_LINKS_IN_SINGLE_STRUCT_ELEM);
156+
}
157+
}
158+
}
159+
// Add current parent to the map.
160+
destinationStructParents.add(parent);
161+
}
162+
163+
private PdfObject getStructureDestinationObject(PdfDictionary annotObj) {
164+
PdfLinkAnnotation linkAnnotation = (PdfLinkAnnotation) PdfAnnotation.makeAnnotation(annotObj);
165+
PdfObject destination = null;
166+
PdfDictionary action = linkAnnotation.getAction();
167+
if (action != null) {
168+
if (PdfName.GoTo.equals(action.getAsName(PdfName.S))) {
169+
destination = action.get(PdfName.SD);
170+
if (destination == null) {
171+
destination = action.get(PdfName.D);
172+
}
173+
}
174+
} else {
175+
destination = linkAnnotation.getDestinationObject();
176+
}
177+
if (destination == null) {
178+
return null;
179+
}
180+
PdfArray dest = getDestination(destination);
181+
if (dest == null || dest.isEmpty()) {
182+
return null;
183+
} else {
184+
return dest.get(0);
185+
}
186+
}
187+
188+
private PdfArray getDestination(PdfObject destination) {
189+
return getDestination(destination, new HashSet<>());
190+
}
191+
192+
private PdfArray getDestination(PdfObject destination, Set<PdfObject> checkedDestinations) {
193+
if (destination == null || checkedDestinations.contains(destination)) {
194+
return null;
195+
}
196+
checkedDestinations.add(destination);
197+
switch (destination.getType()) {
198+
case PdfObject.STRING:
199+
PdfNameTree destinations = pdfDoc.getCatalog().getNameTree(PdfName.Dests);
200+
destination = getDestination(destinations.getEntry((PdfString) destination), checkedDestinations);
201+
break;
202+
case PdfObject.NAME:
203+
PdfDictionary dests = pdfDoc.getCatalog().getPdfObject().getAsDictionary(PdfName.Dests);
204+
if (dests != null) {
205+
destination = getDestination(dests.get((PdfName) destination), checkedDestinations);
206+
}
207+
break;
208+
case PdfObject.ARRAY:
209+
break;
210+
case PdfObject.DICTIONARY:
211+
PdfObject actualDestinationObject = getDestination(((PdfDictionary) destination).get(PdfName.SD),
212+
checkedDestinations);
213+
if (actualDestinationObject == null) {
214+
destination = getDestination(((PdfDictionary) destination).get(PdfName.D), checkedDestinations);
215+
} else {
216+
destination = actualDestinationObject;
217+
}
218+
break;
219+
default:
220+
return null;
221+
}
222+
if (destination instanceof PdfArray) {
223+
return (PdfArray) destination;
224+
}
225+
return null;
226+
}
227+
228+
/**
229+
* Helper class that checks the conformance of link annotations while iterating the tag tree structure.
230+
*/
231+
public static class PdfUA2LinkAnnotationHandler extends ContextAwareTagTreeIteratorHandler {
232+
private final PdfUA2LinkChecker checker;
233+
234+
/**
235+
* Creates a new instance of the {@link PdfUA2LinkAnnotationHandler}.
236+
*
237+
* @param context the validation context
238+
* @param document the {@link PdfDocument} to check link annotations for
239+
*/
240+
public PdfUA2LinkAnnotationHandler(PdfUAValidationContext context, PdfDocument document) {
241+
super(context);
242+
this.checker = new PdfUA2LinkChecker(context, document);
243+
}
244+
245+
@Override
246+
public boolean accept(IStructureNode node) {
247+
return node != null;
248+
}
249+
250+
@Override
251+
public void processElement(IStructureNode elem) {
252+
this.checker.checkLinkAnnotationStructureParent(elem);
253+
}
254+
}
255+
}

pdfua/src/main/java/com/itextpdf/pdfua/exceptions/PdfUAExceptionMessageConstants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ public final class PdfUAExceptionMessageConstants {
4848
"clip data dictionary.";
4949
public static final String DESTINATION_NOT_STRUCTURE_DESTINATION =
5050
"All destinations whose target lies within the same document shall be structure destinations.";
51+
public static final String DIFFERENT_LINKS_IN_SINGLE_STRUCT_ELEM = "Link annotations that target different " +
52+
"locations shall be in separate Link or Reference structure elements instead of a single one.";
5153
public static final String DOCUMENT_SHALL_CONTAIN_VALID_LANG_ENTRY = "Document does not contain valid lang entry.";
5254
public static final String DOCUMENT_SHALL_CONTAIN_XMP_METADATA_STREAM
5355
= "Document shall contain a XMP metadata stream.";
@@ -85,6 +87,8 @@ public final class PdfUAExceptionMessageConstants {
8587
"shall contain an alternate description via their Contents key.";
8688
public static final String LINK_ANNOT_IS_NOT_NESTED_WITHIN_LINK =
8789
"A link annotation is not nested within a <Link> tag.";
90+
public static final String LINK_ANNOT_IS_NOT_NESTED_WITHIN_LINK_OR_REFERENCE =
91+
"A link annotation is either not tagged or not nested within a <Link> or <Reference> tag.";
8892
public static final String LIST_ITEM_CONTENT_HAS_INVALID_TAG = "Any real content within an LI structure element " +
8993
"that is not enclosed in a Lbl structure element shall be enclosed in an LBody structure element.";
9094
public static final String LIST_NUMBERING_IS_NOT_SPECIFIED = "If Lbl structure elements are present, the " +
@@ -125,6 +129,8 @@ public final class PdfUAExceptionMessageConstants {
125129
"Content marked as content may not reside in Artifact content.";
126130
public static final String REAL_CONTENT_INSIDE_ARTIFACT_OR_VICE_VERSA =
127131
"Tagged content is present inside content marked as Artifact or vice versa.";
132+
public static final String SAME_LINKS_IN_DIFFERENT_STRUCT_ELEMS = "Multiple link annotations targeting the same " +
133+
"location shall be included in a single Link or Reference structure element instead of separate ones.";
128134
public static final String STRUCTURE_TYPE_IS_ROLE_MAPPED_TO_OTHER_STRUCTURE_TYPE_IN_THE_SAME_NAMESPACE =
129135
"Structure type {0}:{1} is role mapped to other structure type in the same namespace.";
130136
public static final String SUSPECTS_ENTRY_IN_MARK_INFO_DICTIONARY_SHALL_NOT_HAVE_A_VALUE_OF_TRUE =

0 commit comments

Comments
 (0)