Skip to content

Commit 4ea7189

Browse files
Implement advanced tag structure editing, refactor and document tagging tool, improve tests
DEVSIX-502
1 parent dedc5b0 commit 4ea7189

File tree

67 files changed

+1404
-480
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1404
-480
lines changed

forms/src/main/java/com/itextpdf/forms/PdfAcroForm.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import com.itextpdf.kernel.pdf.PdfStream;
1717
import com.itextpdf.kernel.pdf.PdfString;
1818
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
19-
import com.itextpdf.kernel.pdf.tagutils.PdfTagReference;
19+
import com.itextpdf.kernel.pdf.tagutils.TagReference;
2020
import com.itextpdf.kernel.pdf.tagutils.TagTreePointer;
2121
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
2222
import com.itextpdf.forms.fields.PdfFormField;
@@ -617,8 +617,8 @@ public void flattenFields() {
617617
}
618618

619619
if (tagPointer != null) {
620-
tagPointer.setPage(page);
621-
PdfTagReference tagRef = tagPointer.getTagReference();
620+
tagPointer.setPageForTagging(page);
621+
TagReference tagRef = tagPointer.getTagReference();
622622
canvas.openTag(tagRef);
623623
}
624624
canvas.addXObject(xObject, box.getX(), box.getY());

kernel/src/main/java/com/itextpdf/kernel/PdfException.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ public class PdfException extends RuntimeException {
157157
public static final String SignatureWithName1IsNotTheLastItDoesntCoverWholeDocument = "signature.with.name.1.is.not.the.last.it.doesnt.cover.whole.document";
158158
public static final String StdcfNotFoundEncryption = "stdcf.not.found.encryption";
159159
public static final String StructureElementShallContainParentObject = "structure.element.shall.contain.parent.object";
160+
public static final String TagCannotBeMovedToTheAnotherDocumentsTagStructure = "tag.cannot.be.moved.to.the.another.documents.tag.structure";
161+
public static final String TagTreePointerIsInInvalidStateItPointsAtFlushedElementUseMoveToRoot = "tagtreepointer.is.in.invalid.state.it.points.at.flushed.element.use.movetoroot";
162+
public static final String TagTreePointerIsInInvalidStateItPointsAtRemovedElementUseMoveToRoot = "tagtreepointer.is.in.invalid.state.it.points.at.removed.element.use.movetoroot";
160163
public static final String TextCannotBeNull = "text.cannot.be.null";
161164
public static final String TextIsTooBig = "text.is.too.big";
162165
public static final String TextMustBeEven = "the.text.length.must.be.even";

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfDocument.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,11 @@ public Integer getNextStructParentIndex() {
806806
return structParentIndex++;
807807
}
808808

809+
/**
810+
* Gets document {@code TagStructureContext}.
811+
* The document must be tagged, otherwise an exception will be thrown.
812+
* @return document {@code TagStructureContext}.
813+
*/
809814
public TagStructureContext getTagStructureContext() {
810815
checkClosingStatus();
811816
if (tagStructureContext != null) {
@@ -945,13 +950,15 @@ public List<PdfPage> copyPagesTo(Set<Integer> pagesToCopy, PdfDocument toDocumen
945950
// It's important to copy tag structure after link annotations were copied, because object content items in tag
946951
// structure are not copied in case if their's OBJ key is annotation and doesn't contain /P entry.
947952
if (toDocument.isTagged()) {
953+
if (tagStructureContext != null) {
954+
tagStructureContext.removeAllConnectionsToTags();
955+
}
948956
if (insertBeforePage > toDocument.getNumberOfPages()) {
949957
getStructTreeRoot().copyTo(toDocument, page2page);
950-
toDocument.getTagStructureContext().reinitialize();
951958
} else {
952959
getStructTreeRoot().copyTo(toDocument, insertBeforePage, page2page);
953-
toDocument.getTagStructureContext().reinitialize();
954960
}
961+
toDocument.getTagStructureContext().reinitialize();
955962
}
956963
if (catalog.isOutlineMode()) {
957964
copyOutlines(outlinesToCopy, toDocument, page2Outlines);

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfObject.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ public PdfObject clone() {
232232

233233
/**
234234
* Copies object to a specified document.
235-
* Works only for objects that are read from existing document, otherwise an exception is thrown.
235+
* <br/><br/>
236+
* NOTE: Works only for objects that are read from document opened in reading mode, otherwise an exception is thrown.
236237
*
237238
* @param document document to copy object to.
238239
* @return copied object.
@@ -243,7 +244,8 @@ public <T extends PdfObject> T copyTo(PdfDocument document) {
243244

244245
/**
245246
* Copies object to a specified document.
246-
* Works only for objects that are read from existing document, otherwise an exception is thrown.
247+
* <br/><br/>
248+
* NOTE: Works only for objects that are read from document opened in reading mode, otherwise an exception is thrown.
247249
*
248250
* @param document document to copy object to.
249251
* @param allowDuplicating indicates if to allow copy objects which already have been copied.

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPage.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ public PdfStream getXmpMetadata() throws XMPException {
221221

222222
/**
223223
* Copies page to the specified document.
224+
* <br/><br/>
225+
* NOTE: Works only for pages from the document opened in reading mode, otherwise an exception is thrown.
224226
*
225227
* @param toDocument a document to copy page to.
226228
* @return copied page.
@@ -231,6 +233,8 @@ public PdfPage copyTo(PdfDocument toDocument) {
231233

232234
/**
233235
* Copies page to the specified document.
236+
* <br/><br/>
237+
* NOTE: Works only for pages from the document opened in reading mode, otherwise an exception is thrown.
234238
*
235239
* @param toDocument a document to copy page to.
236240
* @param copier a copier which bears a specific copy logic. May be NULL
@@ -504,10 +508,10 @@ public PdfPage addAnnotation(PdfAnnotation annotation) {
504508
public PdfPage addAnnotation(int index, PdfAnnotation annotation, boolean tagAnnotation) {
505509
if (getDocument().isTagged() && tagAnnotation) {
506510
TagTreePointer tagPointer = getDocument().getTagStructureContext().getAutoTaggingPointer();
507-
PdfPage prevPage = tagPointer.getCurrentPage();
508-
tagPointer.setPage(this).addAnnotationTag(annotation);
511+
PdfPage prevPage = tagPointer.getCurrentPage(); // TODO what about if current tagging stream is set
512+
tagPointer.setPageForTagging(this).addAnnotationTag(annotation);
509513
if (prevPage != null) {
510-
tagPointer.setPage(prevPage);
514+
tagPointer.setPageForTagging(prevPage);
511515
}
512516
}
513517

@@ -550,7 +554,7 @@ public PdfPage removeAnnotation(PdfAnnotation annotation) {
550554
if (tagPointer != null) {
551555
boolean standardAnnotTagRole = tagPointer.getRole().equals(PdfName.Annot)
552556
|| tagPointer.getRole().equals(PdfName.Form);
553-
if (tagPointer.getListOfKidsRoles().isEmpty() && standardAnnotTagRole) {
557+
if (tagPointer.getKidsRoles().isEmpty() && standardAnnotTagRole) {
554558
tagPointer.removeTag();
555559
}
556560
}

kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/PdfCanvas.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import com.itextpdf.kernel.pdf.layer.PdfLayer;
3636
import com.itextpdf.kernel.pdf.layer.PdfLayerMembership;
3737
import com.itextpdf.kernel.pdf.layer.PdfOCG;
38-
import com.itextpdf.kernel.pdf.tagutils.PdfTagReference;
38+
import com.itextpdf.kernel.pdf.tagutils.TagReference;
3939
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
4040
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
4141
import com.itextpdf.kernel.pdf.xobject.PdfXObject;
@@ -2026,7 +2026,7 @@ public PdfCanvas openTag(CanvasTag tag) {
20262026
* @param tagReference reference to the tag from the document logical structure
20272027
* @return current canvas
20282028
*/
2029-
public PdfCanvas openTag(PdfTagReference tagReference) {
2029+
public PdfCanvas openTag(TagReference tagReference) {
20302030
if (tagReference.getRole() == null)
20312031
return this;
20322032
CanvasTag tag = new CanvasTag(tagReference.getRole(), tagReference.createNextMcid());

kernel/src/main/java/com/itextpdf/kernel/pdf/tagging/PdfStructElem.java

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import com.itextpdf.kernel.pdf.PdfArray;
55
import com.itextpdf.kernel.pdf.PdfDictionary;
66
import com.itextpdf.kernel.pdf.PdfDocument;
7+
import com.itextpdf.kernel.pdf.PdfIndirectReference;
78
import com.itextpdf.kernel.pdf.PdfName;
89
import com.itextpdf.kernel.pdf.PdfNumber;
910
import com.itextpdf.kernel.pdf.PdfObject;
@@ -126,9 +127,12 @@ public PdfStructElem(PdfDocument document, final PdfName role) {
126127
}}).makeIndirect(document));
127128
}
128129

130+
/**
131+
* Method to to distinguish struct elements from other elements of the logical tree (like mcr or struct tree root).
132+
*/
129133
static public boolean isStructElem(PdfDictionary dictionary) {
130134
return (PdfName.StructElem.equals(dictionary.getAsName(PdfName.Type)) ||
131-
(dictionary.containsKey(PdfName.K) && dictionary.containsKey(PdfName.S)));
135+
dictionary.containsKey(PdfName.S)); // required key of the struct elem
132136
}
133137

134138
/**
@@ -191,6 +195,10 @@ public PdfName getRole() {
191195
return getPdfObject().getAsName(PdfName.S);
192196
}
193197

198+
public void setRole(PdfName role) {
199+
getPdfObject().put(PdfName.S, role);
200+
}
201+
194202
public PdfStructElem addKid(PdfStructElem kid) {
195203
return addKid(-1, kid);
196204
}
@@ -213,6 +221,25 @@ public PdfMcr addKid(int index, PdfMcr kid) {
213221
return kid;
214222
}
215223

224+
public IPdfStructElem removeKid(int index) {
225+
PdfObject k = getK();
226+
if (k == null || !k.isArray() && index != 0) {
227+
throw new IndexOutOfBoundsException();
228+
}
229+
230+
if (k.isArray()) {
231+
PdfArray kidsArray = (PdfArray) k;
232+
k = kidsArray.remove(index);
233+
if (kidsArray.isEmpty()) {
234+
remove(PdfName.K);
235+
}
236+
} else {
237+
remove(PdfName.K);
238+
}
239+
240+
return convertPdfObjectToIPdfStructElem(k);
241+
}
242+
216243
/**
217244
* @return parent of the current structure element. If parent is already flushed it returns null.
218245
*/
@@ -295,26 +322,7 @@ private void addKidObjectToStructElemList(PdfObject k, List<IPdfStructElem> list
295322
return;
296323
}
297324

298-
switch (k.getType()) {
299-
case PdfObject.Dictionary:
300-
PdfDictionary d = (PdfDictionary) k;
301-
if (isStructElem(d))
302-
list.add(new PdfStructElem(d));
303-
else if (PdfName.MCR.equals(d.getAsName(PdfName.Type)))
304-
list.add(new PdfMcrDictionary(d, this));
305-
else if (PdfName.OBJR.equals(d.getAsName(PdfName.Type)))
306-
list.add(new PdfObjRef(d, this));
307-
break;
308-
case PdfObject.Number:
309-
list.add(new PdfMcrNumber((PdfNumber) k, this));
310-
break;
311-
default:
312-
break;
313-
}
314-
}
315-
316-
private void addKidObject(PdfObject kid) {
317-
addKidObject(-1, kid);
325+
list.add(convertPdfObjectToIPdfStructElem(k));
318326
}
319327

320328
private void addKidObject(int index, PdfObject kid) {
@@ -345,4 +353,30 @@ private void addKidObject(int index, PdfObject kid) {
345353
if (kid instanceof PdfDictionary && isStructElem((PdfDictionary) kid))
346354
((PdfDictionary) kid).put(PdfName.P, pdfObject);
347355
}
356+
357+
private IPdfStructElem convertPdfObjectToIPdfStructElem(PdfObject obj) {
358+
if (obj.isIndirectReference()) {
359+
obj = ((PdfIndirectReference)obj).getRefersTo();
360+
}
361+
362+
IPdfStructElem elem = null;
363+
switch (obj.getType()) {
364+
case PdfObject.Dictionary:
365+
PdfDictionary d = (PdfDictionary) obj;
366+
if (isStructElem(d))
367+
elem = new PdfStructElem(d);
368+
else if (PdfName.MCR.equals(d.getAsName(PdfName.Type)))
369+
elem = new PdfMcrDictionary(d, this);
370+
else if (PdfName.OBJR.equals(d.getAsName(PdfName.Type)))
371+
elem = new PdfObjRef(d, this);
372+
break;
373+
case PdfObject.Number:
374+
elem = new PdfMcrNumber((PdfNumber) obj, this);
375+
break;
376+
default:
377+
break;
378+
}
379+
380+
return elem;
381+
}
348382
}

kernel/src/main/java/com/itextpdf/kernel/pdf/tagging/PdfStructTreeRoot.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ public void flushStructElement(PdfStructElem structElem) {
239239

240240
/**
241241
* Copies structure to a {@code destDocument}.
242+
* <br/><br/>
243+
* NOTE: Works only for {@code PdfStructTreeRoot} that is read from the document opened in reading mode,
244+
* otherwise an exception is thrown.
242245
*
243246
* @param destDocument document to copy structure to. Shall not be current document.
244247
* @param page2page association between original page and copied page.
@@ -253,7 +256,10 @@ public void copyTo(PdfDocument destDocument, Map<PdfPage, PdfPage> page2page) {
253256
}
254257

255258
/**
256-
* Copies structure to a {@code destDocument} and insert it in a specified position in the document..
259+
* Copies structure to a {@code destDocument} and insert it in a specified position in the document.
260+
* <br/><br/>
261+
* NOTE: Works only for {@code PdfStructTreeRoot} that is read from the document opened in reading mode,
262+
* otherwise an exception is thrown.
257263
*
258264
* @param destDocument document to copy structure to.
259265
* @param insertBeforePage indicates where the structure to be inserted.

kernel/src/main/java/com/itextpdf/kernel/pdf/tagutils/AccessibleElementProperties.java renamed to kernel/src/main/java/com/itextpdf/kernel/pdf/tagutils/AccessibilityProperties.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import java.util.ArrayList;
1111
import java.util.List;
1212

13-
public class AccessibleElementProperties {
13+
public class AccessibilityProperties {
1414
protected String language;
1515
protected String actualText;
1616
protected String alternateDescription;
@@ -21,7 +21,7 @@ public String getLanguage() {
2121
return language;
2222
}
2323

24-
public AccessibleElementProperties setLanguage(String language) {
24+
public AccessibilityProperties setLanguage(String language) {
2525
this.language = language;
2626
return this;
2727
}
@@ -30,7 +30,7 @@ public String getActualText() {
3030
return actualText;
3131
}
3232

33-
public AccessibleElementProperties setActualText(String actualText) {
33+
public AccessibilityProperties setActualText(String actualText) {
3434
this.actualText = actualText;
3535
return this;
3636
}
@@ -39,7 +39,7 @@ public String getAlternateDescription() {
3939
return alternateDescription;
4040
}
4141

42-
public AccessibleElementProperties setAlternateDescription(String alternateDescription) {
42+
public AccessibilityProperties setAlternateDescription(String alternateDescription) {
4343
this.alternateDescription = alternateDescription;
4444
return this;
4545
}
@@ -48,18 +48,18 @@ public String getExpansion() {
4848
return expansion;
4949
}
5050

51-
public AccessibleElementProperties setExpansion(String expansion) {
51+
public AccessibilityProperties setExpansion(String expansion) {
5252
this.expansion = expansion;
5353
return this;
5454
}
5555

56-
public AccessibleElementProperties addAttributes(PdfDictionary attributes) {
56+
public AccessibilityProperties addAttributes(PdfDictionary attributes) {
5757
attributesList.add(attributes);
5858

5959
return this;
6060
}
6161

62-
public AccessibleElementProperties clearAttributes() {
62+
public AccessibilityProperties clearAttributes() {
6363
attributesList.clear();
6464

6565
return this;
@@ -92,7 +92,7 @@ void setToStructElem(PdfStructElem elem) {
9292
}
9393
}
9494

95-
private PdfObject combineAttributesList(PdfObject attributesObject, List<PdfDictionary> newAttributesList, PdfNumber revision) {
95+
protected PdfObject combineAttributesList(PdfObject attributesObject, List<PdfDictionary> newAttributesList, PdfNumber revision) {
9696
PdfObject combinedAttributes;
9797

9898
if (attributesObject instanceof PdfDictionary) {
@@ -116,7 +116,7 @@ private PdfObject combineAttributesList(PdfObject attributesObject, List<PdfDict
116116
return combinedAttributes;
117117
}
118118

119-
private void addNewAttributesToAttributesArray(List<PdfDictionary> newAttributesList, PdfNumber revision, PdfArray attributesArray) {
119+
protected void addNewAttributesToAttributesArray(List<PdfDictionary> newAttributesList, PdfNumber revision, PdfArray attributesArray) {
120120
if (revision != null) {
121121
for (PdfDictionary attributes : newAttributesList) {
122122
attributesArray.add(attributes);

0 commit comments

Comments
 (0)