Skip to content

Commit 14612e2

Browse files
Support removing redundant tags after page removing
DEVSIX-452
1 parent 9632cab commit 14612e2

File tree

15 files changed

+345
-167
lines changed

15 files changed

+345
-167
lines changed

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfCatalog.java

Lines changed: 29 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -77,21 +77,6 @@ public int getPageNumber(PdfDictionary pageDictionary) {
7777
return pageTree.getPageNumber(pageDictionary);
7878
}
7979

80-
public boolean removePage(PdfPage page) {
81-
//TODO log removing flushed page
82-
removeOutlines(page);
83-
removeUnusedWidgetsFromFields(page);
84-
return pageTree.removePage(page);
85-
}
86-
87-
public PdfPage removePage(int pageNum) {
88-
//TODO log removing flushed page
89-
PdfPage page = getPage(pageNum);
90-
removeOutlines(page);
91-
removeUnusedWidgetsFromFields(page);
92-
return pageTree.removePage(pageNum);
93-
}
94-
9580
/**
9681
* Use this method to get the <B>Optional Content Properties Dictionary</B>.
9782
* Note that if you call this method, then the PdfDictionary with OCProperties will be
@@ -311,6 +296,9 @@ protected boolean isOCPropertiesMayHaveChanged() {
311296
return ocProperties != null;
312297
}
313298

299+
PdfPage removePage(int pageNum) {
300+
return pageTree.removePage(pageNum);
301+
}
314302
/**
315303
* this method return map containing all pages of the document with associated outlines.
316304
*
@@ -373,6 +361,24 @@ PdfOutline getOutlines(boolean updateOutlines) {
373361
return outlines;
374362
}
375363

364+
/**
365+
* This method removes all outlines associated with a given page
366+
*
367+
* @param page
368+
* @throws PdfException
369+
*/
370+
void removeOutlines(PdfPage page) {
371+
if (getDocument().getWriter() == null) {
372+
return;
373+
}
374+
getOutlines(false);
375+
if (!pagesWithOutlines.isEmpty()) {
376+
for (PdfOutline outline : pagesWithOutlines.get(page.getPdfObject())) {
377+
outline.removeOutline();
378+
}
379+
}
380+
}
381+
376382
/**
377383
* This method sets the root outline element in the catalog.
378384
*
@@ -419,48 +425,16 @@ PdfDestination copyDestination(PdfObject dest, Map<PdfPage, PdfPage> page2page,
419425
return d;
420426
}
421427

422-
/**
423-
* This method removes all annotation entries from form fields associated with a given page.
424-
* @param page
425-
*/
426-
private void removeUnusedWidgetsFromFields(PdfPage page){
427-
if (page.isFlushed()) {
428-
return;
429-
}
430-
List<PdfAnnotation> annots = page.getAnnotations();
431-
for (PdfAnnotation annot : annots) {
432-
if (annot.getSubtype().equals(PdfName.Widget)) {
433-
((PdfWidgetAnnotation)annot).releaseFormFieldFromWidgetAnnotation();
434-
}
435-
}
436-
}
437-
438-
/**
439-
* This method removes all outlines associated with a given page
440-
*
441-
* @param page
442-
* @throws PdfException
443-
*/
444-
private void removeOutlines(PdfPage page) {
445-
if (getDocument().getWriter() == null) {
446-
return;
447-
}
448-
getOutlines(false);
449-
if (!pagesWithOutlines.isEmpty()) {
450-
for (PdfOutline outline : pagesWithOutlines.get(page.getPdfObject().getIndirectReference())) {
451-
outline.removeOutline();
452-
}
453-
}
454-
}
455-
456428
private void addOutlineToPage(PdfOutline outline, Map<String, PdfObject> names) {
457-
PdfObject obj = outline.getDestination().getDestinationPage(names);
458-
List<PdfOutline> outs = pagesWithOutlines.get(obj);
459-
if (outs == null) {
460-
outs = new ArrayList<>();
461-
pagesWithOutlines.put(obj, outs);
429+
PdfObject pageObj = outline.getDestination().getDestinationPage(names);
430+
if (pageObj != null) {
431+
List<PdfOutline> outs = pagesWithOutlines.get(pageObj);
432+
if (outs == null) {
433+
outs = new ArrayList<>();
434+
pagesWithOutlines.put(pageObj, outs);
435+
}
436+
outs.add(outline);
462437
}
463-
outs.add(outline);
464438
}
465439

466440
private void getNextItem(PdfDictionary item, PdfOutline parent, Map<String, PdfObject> names) {

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfDocument.java

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import com.itextpdf.kernel.numbering.EnglishAlphabetNumbering;
1414
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
1515
import com.itextpdf.kernel.pdf.annot.PdfLinkAnnotation;
16+
import com.itextpdf.kernel.pdf.annot.PdfWidgetAnnotation;
1617
import com.itextpdf.kernel.pdf.filespec.PdfFileSpec;
1718
import com.itextpdf.kernel.pdf.navigation.PdfDestination;
1819
import com.itextpdf.kernel.pdf.navigation.PdfExplicitDestination;
@@ -458,25 +459,47 @@ public int getPageNumber(PdfPage page) {
458459
}
459460

460461
/**
461-
* Removes page from the document.
462+
* Removes the first occurrence of the specified page from this document,
463+
* if it is present. Returns <tt>true</tt> if this document
464+
* contained the specified element (or equivalently, if this document
465+
* changed as a result of the call).
462466
*
463-
* @param page a page to remove.
467+
* @param page page to be removed from this document, if present
468+
* @return <tt>true</tt> if this document contained the specified page
464469
*/
465470
public boolean removePage(PdfPage page) {
466471
checkClosingStatus();
467-
boolean result = catalog.removePage(page);
468-
dispatchEvent(new PdfDocumentEvent(PdfDocumentEvent.REMOVE_PAGE, page));
469-
return result;
472+
int pageNum = getPageNumber(page);
473+
if (pageNum < 1)
474+
return false;
475+
return removePage(pageNum) != null;
470476
}
471477

472478
/**
473479
* Removes page from the document by page number.
474480
*
475-
* @param pageNum a number of page to remove.
481+
* @param pageNum the one-based index of the PdfPage to be removed
482+
* @return the page that was removed from the list
476483
*/
477484
public PdfPage removePage(int pageNum) {
478485
checkClosingStatus();
479-
return catalog.removePage(pageNum);
486+
PdfPage removedPage = catalog.removePage(pageNum);
487+
488+
if (removedPage != null) {
489+
catalog.removeOutlines(removedPage);
490+
removeUnusedWidgetsFromFields(removedPage);
491+
if (isTagged()) {
492+
getTagStructure().removePageTags(removedPage);
493+
}
494+
495+
if (!removedPage.getPdfObject().isFlushed()) {
496+
removedPage.getPdfObject().remove(PdfName.Parent);
497+
}
498+
removedPage.getPdfObject().getIndirectReference().setFree();
499+
500+
dispatchEvent(new PdfDocumentEvent(PdfDocumentEvent.REMOVE_PAGE, removedPage));
501+
}
502+
return removedPage;
480503
}
481504

482505
/**
@@ -1326,6 +1349,22 @@ protected void checkClosingStatus() {
13261349
}
13271350
}
13281351

1352+
/**
1353+
* This method removes all annotation entries from form fields associated with a given page.
1354+
* @param page
1355+
*/
1356+
private void removeUnusedWidgetsFromFields(PdfPage page){
1357+
if (page.isFlushed()) {
1358+
return;
1359+
}
1360+
List<PdfAnnotation> annots = page.getAnnotations();
1361+
for (PdfAnnotation annot : annots) {
1362+
if (annot.getSubtype().equals(PdfName.Widget)) {
1363+
((PdfWidgetAnnotation)annot).releaseFormFieldFromWidgetAnnotation();
1364+
}
1365+
}
1366+
}
1367+
13291368
private void copyLinkAnnotations(PdfDocument toDocument, Map<PdfPage, PdfPage> page2page) {
13301369
List<PdfName> excludedKeys = new ArrayList<>();
13311370
excludedKeys.add(PdfName.Dest);

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPage.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ public int getAnnotsSize() {
572572
*/
573573
public List<PdfOutline> getOutlines(boolean updateOutlines) {
574574
getDocument().getOutlines(updateOutlines);
575-
return getDocument().getCatalog().getPagesWithOutlines().get(getPdfObject().getIndirectReference());
575+
return getDocument().getCatalog().getPagesWithOutlines().get(getPdfObject());
576576
}
577577

578578
/**

kernel/src/main/java/com/itextpdf/kernel/pdf/PdfPagesTree.java

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -182,38 +182,14 @@ public void addPage(int index, PdfPage pdfPage) {
182182
*/
183183
public PdfPage removePage(int pageNum) {
184184
PdfPage pdfPage = getPage(pageNum);
185+
//TODO log removing flushed page
185186
if (internalRemovePage(--pageNum)) {
186-
if (!pdfPage.getPdfObject().isFlushed()) {
187-
pdfPage.getPdfObject().remove(PdfName.Parent);
188-
}
189-
pdfPage.getPdfObject().getIndirectReference().setFree();
190187
return pdfPage;
191188
} else {
192189
return null;
193190
}
194191
}
195192

196-
/**
197-
* Removes the first occurrence of the specified page from this list,
198-
* if it is present. Returns <tt>true</tt> if this list
199-
* contained the specified element (or equivalently, if this list
200-
* changed as a result of the call).
201-
*
202-
* @param pdfPage page to be removed from this list, if present
203-
* @return <tt>true</tt> if this list contained the specified page
204-
*/
205-
public boolean removePage(PdfPage pdfPage) {
206-
int pageNum = getPageNumber(pdfPage) - 1;
207-
if (pageNum < 0)
208-
return false;
209-
if (!pdfPage.getPdfObject().isFlushed()) {
210-
pdfPage.getPdfObject().remove(PdfName.Parent);
211-
}
212-
pdfPage.getPdfObject().getIndirectReference().setFree();
213-
internalRemovePage(pageNum);
214-
return true;
215-
}
216-
217193
/**
218194
* Generate PdfPages tree.
219195
*

kernel/src/main/java/com/itextpdf/kernel/pdf/navigation/PdfExplicitDestination.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public PdfExplicitDestination(PdfArray pdfObject) {
2020

2121
@Override
2222
public PdfObject getDestinationPage(Map<String, PdfObject> names) {
23-
return getPdfObject().get(0, false);
23+
return getPdfObject().get(0);
2424
}
2525

2626
@Override

kernel/src/main/java/com/itextpdf/kernel/pdf/navigation/PdfNamedDestination.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public PdfNamedDestination(PdfName pdfObject) {
2020
public PdfObject getDestinationPage(final Map<String, PdfObject> names) {
2121
PdfArray array = (PdfArray) names.get(getPdfObject().getValue());
2222

23-
return array != null ? array.get(0, false) : null;
23+
return array != null ? array.get(0) : null;
2424
}
2525

2626
@Override

kernel/src/main/java/com/itextpdf/kernel/pdf/navigation/PdfStringDestination.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public PdfStringDestination(PdfString pdfObject) {
2020
public PdfObject getDestinationPage(Map<String, PdfObject> names) {
2121
PdfArray array = (PdfArray) names.get(getPdfObject().toUnicodeString());
2222

23-
return array != null ? array.get(0, false) : null;
23+
return array != null ? array.get(0) : null;
2424
}
2525

2626
@Override

kernel/src/main/java/com/itextpdf/kernel/pdf/tagging/PdfStructTreeRoot.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@ public class PdfStructTreeRoot extends PdfObjectWrapper<PdfDictionary> implement
2424

2525
protected Map<PdfDictionary, Integer> objRefs = new HashMap<>();
2626

27+
/**
28+
* Represents parentTree in structTreeRoot. It contains only those entries that belong to the already flushed pages.
29+
*/
2730
private PdfNumTree parentTree;
2831

2932
/**
3033
* Contains marked content references lists of all pages.
3134
* <p>
3235
* When this field is initialized all new mcrs added to the tag structure are also added to this map.
33-
* The idea that this field is initialized only once, therefore the tree would be traversed only once.
36+
* The idea that this field is initialized only once, therefore the struct tree would be traversed only once.
3437
* </p>
3538
* <p>
3639
* On this field initializing the whole tag structure is traversed; this is needed for example for stamping mode.
@@ -376,15 +379,14 @@ public PdfStructElem removeAnnotationObjectReference(PdfDictionary annotDic) {
376379

377380
if (parentElem != null && objRef != null) {
378381
PdfObject k = parentElem.getK();
382+
// TODO improve removing: what if it was the last element in array, what if it is an indRef in array instead of object itself
379383
if (k.isArray()) {
380384
((PdfArray) k).remove(objRef.getPdfObject());
381385
} else {
382386
parentElem.getPdfObject().remove(PdfName.K);
383387
}
384388

385-
if (pageToPageMcrs != null) {
386-
unregisterMcr(objRef);
387-
}
389+
unregisterMcr(objRef);
388390

389391
// We don't remove the parent tree entry with given struct parent index here,
390392
// because parent tree is fully rebuilt at document closing.

0 commit comments

Comments
 (0)