Skip to content

Commit 690db0b

Browse files
Introduce initial separation of PdfStructTreeRoot logics
Copying logic is moved to the separate internal class; marked content references managing is moved to the separate class too
1 parent 9babf5a commit 690db0b

File tree

9 files changed

+525
-438
lines changed

9 files changed

+525
-438
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfDocument.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1269,7 +1269,7 @@ protected void open(PdfVersion newPdfVersion) {
12691269
PdfDictionary str = catalog.getPdfObject().getAsDictionary(PdfName.StructTreeRoot);
12701270
if (str != null) {
12711271
structTreeRoot = new PdfStructTreeRoot(str);
1272-
structParentIndex = getStructTreeRoot().getStructParentIndex() + 1;
1272+
structParentIndex = getStructTreeRoot().getMcrManager().getStructParentIndex() + 1; // TODO review it again
12731273
}
12741274
if (appendMode && (reader.hasRebuiltXref() || reader.hasFixedXref()))
12751275
throw new PdfException(PdfException.AppendModeRequiresADocumentWithoutErrorsEvenIfRecoveryWasPossible);

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ public void flush(boolean flushXObjects) {
330330
}
331331
if (getDocument().isTagged() && !getDocument().getStructTreeRoot().isFlushed()) {
332332
getDocument().getTagStructureContext().flushPageTags(this);
333-
getDocument().getStructTreeRoot().createParentTreeEntryForPage(this);
333+
getDocument().getStructTreeRoot().getMcrManager().createParentTreeEntryForPage(this);
334334
}
335335
getDocument().dispatchEvent(new PdfDocumentEvent(PdfDocumentEvent.END_PAGE, this));
336336
if (flushXObjects) {
@@ -457,7 +457,7 @@ public int getNextMcid() {
457457
}
458458
if (mcid == null) {
459459
PdfStructTreeRoot structTreeRoot = getDocument().getStructTreeRoot();
460-
List<PdfMcr> mcrs = structTreeRoot.getPageMarkedContentReferences(this);
460+
List<PdfMcr> mcrs = structTreeRoot.getMcrManager().getPageMarkedContentReferences(this);
461461
mcid = getMcid(mcrs);
462462
}
463463
return mcid++;
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
package com.itextpdf.kernel.pdf.tagging;
2+
3+
import com.itextpdf.kernel.PdfException;
4+
import com.itextpdf.kernel.pdf.PdfArray;
5+
import com.itextpdf.kernel.pdf.PdfDictionary;
6+
import com.itextpdf.kernel.pdf.PdfName;
7+
import com.itextpdf.kernel.pdf.PdfNull;
8+
import com.itextpdf.kernel.pdf.PdfNumTree;
9+
import com.itextpdf.kernel.pdf.PdfNumber;
10+
import com.itextpdf.kernel.pdf.PdfObject;
11+
import com.itextpdf.kernel.pdf.PdfPage;
12+
import java.util.ArrayList;
13+
import java.util.Collections;
14+
import java.util.Comparator;
15+
import java.util.HashMap;
16+
import java.util.List;
17+
import java.util.Map;
18+
import org.slf4j.LoggerFactory;
19+
20+
public class MarkedContentReferencesManager {
21+
22+
private PdfStructTreeRoot structTreeRoot;
23+
24+
private Map<PdfDictionary, Integer> objRefs = new HashMap<>();
25+
26+
/**
27+
* Represents parentTree in structTreeRoot. It contains only those entries that belong to the already flushed pages.
28+
*/
29+
private PdfNumTree parentTree;
30+
31+
/**
32+
* Contains marked content references for every page.
33+
* <p>
34+
* If new mcrs are added to the tag structure after this field is initialized, these new mcrs are also added to this map.
35+
* The idea that this field is initialized only once, therefore the struct tree would be traversed only once.
36+
* </p>
37+
* <p>
38+
* On this field initializing the whole tag structure is traversed.
39+
* This field is initialized:
40+
* <ul>
41+
* <li> when some structure element is flushed;</li>
42+
* <li> when {@code getPageMarkedContentReferences} method is called;</li>
43+
* </ul>
44+
* </p>
45+
*
46+
* <p>
47+
* If document structure tree was modified on low (PdfObjects) level information of this field could become incorrect.
48+
* In this case field shall be recalculated. This could be done by calling {@link #unregisterAllMcrs()} and then when
49+
* it is accessed it will be calculated again.
50+
* </p>
51+
*/
52+
private Map<PdfDictionary, List<PdfMcr>> pageToPageMcrs;
53+
54+
MarkedContentReferencesManager(PdfStructTreeRoot structTreeRoot) {
55+
this.structTreeRoot = structTreeRoot;
56+
parentTree = new PdfNumTree(structTreeRoot.getDocument().getCatalog(), PdfName.ParentTree);
57+
58+
}
59+
60+
/**
61+
* Creates and flushes parent tree entry for the page.
62+
* Effectively this means that new content mustn't be added to the page.
63+
* @param page {@link PdfPage} for which to create parent tree entry. Typically this page is flushed after this call.
64+
*/
65+
public void createParentTreeEntryForPage(PdfPage page) {
66+
List<PdfMcr> mcrs = getPageMarkedContentReferences(page);
67+
if (mcrs == null) {
68+
return;
69+
}
70+
pageToPageMcrs.remove(page.getPdfObject());
71+
updateStructParentTreeEntries(page.getStructParentIndex(), mcrs);
72+
}
73+
74+
/**
75+
* Gets a list of marked content references on page.
76+
*/
77+
public List<PdfMcr> getPageMarkedContentReferences(PdfPage page) {
78+
registerAllMcrsIfNotRegistered();
79+
return pageToPageMcrs.get(page.getPdfObject());
80+
}
81+
82+
// TODO THIS
83+
public int getStructParentIndex() {
84+
PdfArray nums = null;
85+
PdfArray kids = structTreeRoot.getParentTreeObject().getAsArray(PdfName.Kids);
86+
if (kids != null) {
87+
nums = new PdfArray();
88+
for (int i = 0; i < kids.size(); i++) {
89+
PdfObject o = kids.get(i);
90+
if (o instanceof PdfDictionary) {
91+
PdfArray numsLocal = ((PdfDictionary) o).getAsArray(PdfName.Nums);
92+
if (numsLocal != null) {
93+
nums.addAll(numsLocal);
94+
}
95+
} else {
96+
LoggerFactory.getLogger(this.getClass()).warn("Suspicious nums element in StructParentTree", o);
97+
}
98+
}
99+
}
100+
101+
int maxStructParentIndex = 0;
102+
if (nums == null)
103+
nums = structTreeRoot.getParentTreeObject().getAsArray(PdfName.Nums);
104+
if (nums != null) {
105+
for (int i = 0; i < nums.size(); i++) {
106+
PdfNumber n = nums.getAsNumber(i);
107+
if (n != null && n.getIntValue() > maxStructParentIndex)
108+
maxStructParentIndex = n.getIntValue();
109+
}
110+
}
111+
112+
return maxStructParentIndex;
113+
114+
}
115+
116+
public void registerMcr(PdfMcr mcr) {
117+
if (pageToPageMcrs == null) {
118+
return;
119+
}
120+
121+
List<PdfMcr> pageMcrs = pageToPageMcrs.get(mcr.getPageObject());
122+
if (pageMcrs == null) {
123+
pageMcrs = new ArrayList<>();
124+
pageToPageMcrs.put(mcr.getPageObject(), pageMcrs);
125+
}
126+
pageMcrs.add(mcr);
127+
if (mcr instanceof PdfObjRef) {
128+
registerObjRef((PdfObjRef) mcr);
129+
}
130+
}
131+
132+
public void unregisterMcr(PdfMcr mcrToUnregister) {
133+
if (pageToPageMcrs == null) {
134+
return;
135+
}
136+
137+
List<PdfMcr> pageMcrs = pageToPageMcrs.get(mcrToUnregister.getPageObject());
138+
if (pageMcrs != null) {
139+
PdfMcr mcrObjectToRemove = null;
140+
for (PdfMcr mcr : pageMcrs) {
141+
if (mcr.getPdfObject() == mcrToUnregister.getPdfObject()) {
142+
mcrObjectToRemove = mcr;
143+
break;
144+
}
145+
}
146+
pageMcrs.remove(mcrObjectToRemove);
147+
148+
if (mcrToUnregister instanceof PdfObjRef) {
149+
objRefs.remove(mcrToUnregister.getPdfObject());
150+
}
151+
}
152+
}
153+
154+
PdfDictionary buildParentTree() {
155+
return parentTree.buildTree().makeIndirect(structTreeRoot.getDocument());
156+
}
157+
158+
/**
159+
* It should be called when tag structure of document was modified on low level (on PdfObjects level).
160+
* E.g. this happens when we copy new pages into document.
161+
*/
162+
void unregisterAllMcrs() {
163+
if (structTreeRoot.isStructTreeIsPartialFlushed()) {
164+
throw new PdfException(PdfException.CannotModifyTagStructureWhenItWasPartlyFlushed);
165+
}
166+
pageToPageMcrs = null;
167+
parentTree = new PdfNumTree(structTreeRoot.getDocument().getCatalog(), PdfName.ParentTree);
168+
}
169+
170+
void registerAllMcrsIfNotRegistered() {
171+
if (pageToPageMcrs == null) {
172+
pageToPageMcrs = new HashMap<>();
173+
registerAllMcrs(structTreeRoot);
174+
175+
Comparator<PdfMcr> mcrComparator = new Comparator<PdfMcr>() {
176+
@Override
177+
public int compare(PdfMcr o1, PdfMcr o2) {
178+
Integer mcid1 = o1.getMcid();
179+
Integer mcid2 = o2.getMcid();
180+
181+
if (mcid1 == null && mcid2 == null) {
182+
return 0;
183+
}
184+
if (mcid1 == null) {
185+
return -1;
186+
}
187+
if (mcid2 == null) {
188+
return 1;
189+
}
190+
191+
return Integer.compare(mcid1, mcid2);
192+
}
193+
};
194+
for (List<PdfMcr> pdfMcrs : pageToPageMcrs.values()) {
195+
Collections.sort(pdfMcrs, mcrComparator);
196+
}
197+
}
198+
}
199+
200+
private void registerAllMcrs(IPdfStructElem element) {
201+
if (element == null) return;
202+
if (element instanceof PdfMcr) {
203+
registerMcr((PdfMcr)element);
204+
} else {
205+
for (IPdfStructElem kid : element.getKids()) {
206+
registerAllMcrs(kid);
207+
}
208+
}
209+
}
210+
/**
211+
* Number and dictionary references in list shall be ordered by mcid ascending.
212+
* Number and dictionary references in list shall belong to the same page.
213+
* @param pageStructParentIndex structParent index of the page to which mcrs belong.
214+
* @param mcrs list of the marked content references, that belong to the page with given structParent index.
215+
*/
216+
private void updateStructParentTreeEntries(Integer pageStructParentIndex, List<PdfMcr> mcrs) {
217+
// element indexes in parentsOfPageMcrs shall be the same as mcid of one of their kids.
218+
// See "Finding Structure Elements from Content Items" in pdf spec.
219+
PdfArray parentsOfPageMcrs = new PdfArray();
220+
int currentMcid = 0;
221+
for (PdfMcr mcr : mcrs) {
222+
if (mcr instanceof PdfObjRef) {
223+
Integer structParent = this.objRefs.get(mcr.getPdfObject());
224+
if (structParent != null) {
225+
parentTree.addEntry(structParent, ((PdfStructElem) mcr.getParent()).getPdfObject());
226+
}
227+
} else {
228+
// if for some reason some mcr where not registered or don't exist, we ensure that the rest
229+
// of the parent objects were placed at correct index
230+
while (currentMcid++ < mcr.getMcid()) {
231+
parentsOfPageMcrs.add(PdfNull.PdfNull);
232+
}
233+
parentsOfPageMcrs.add(((PdfStructElem)mcr.getParent()).getPdfObject());
234+
}
235+
}
236+
237+
238+
if (!parentsOfPageMcrs.isEmpty()) {
239+
parentsOfPageMcrs.makeIndirect(structTreeRoot.getDocument());
240+
parentTree.addEntry(pageStructParentIndex, parentsOfPageMcrs);
241+
parentsOfPageMcrs.flush();
242+
}
243+
}
244+
245+
private void registerObjRef(PdfObjRef objRef) {
246+
if (objRef == null)
247+
return;
248+
PdfDictionary o = ((PdfDictionary) objRef.getPdfObject()).getAsDictionary(PdfName.Obj);
249+
if (o != null) {
250+
PdfNumber n = o.getAsNumber(PdfName.StructParent);
251+
if (n != null)
252+
objRefs.put((PdfDictionary) objRef.getPdfObject(), n.getIntValue());
253+
}
254+
}
255+
}

kernel/src/main/java/com/itextpdf/kernel/pdf/tagging/PdfStructElem.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ public PdfMcr addKid(PdfMcr kid) {
216216
}
217217

218218
public PdfMcr addKid(int index, PdfMcr kid) {
219-
getDocument().getStructTreeRoot().registerMcr(kid);
219+
getDocument().getStructTreeRoot().getMcrManager().registerMcr(kid);
220220
addKidObject(index, kid.getPdfObject());
221221
return kid;
222222
}

0 commit comments

Comments
 (0)