Skip to content

Commit 2a10b6c

Browse files
EliotJonesBobLd
authored andcommitted
make link copying more tolerant when adding page
in UglyToad#1082 and other issues relating to annotations we're running into constraints of the current model of building a pdf document. currently we skip all link type annotations, i think we can support copying of links where the link destination is outside the current document. however the more i look at this code the more i think we need a radical redesign of how document building is done because it has been pushed far beyond its current capabilities, i'll detail my thinking in the related pr in more detail
1 parent 85fc63d commit 2a10b6c

File tree

2 files changed

+148
-64
lines changed

2 files changed

+148
-64
lines changed

src/UglyToad.PdfPig.Tokens/NameToken.Constants.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ public partial class NameToken
310310
public static readonly NameToken Last = new NameToken("Last");
311311
public static readonly NameToken LastChar = new NameToken("LastChar");
312312
public static readonly NameToken LastModified = new NameToken("LastModified");
313+
public static readonly NameToken Launch = new NameToken("Launch");
313314
public static readonly NameToken Lc = new NameToken("LC");
314315
public static readonly NameToken Le = new NameToken("LE");
315316
public static readonly NameToken Leading = new NameToken("Leading");

src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs

Lines changed: 147 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -458,72 +458,14 @@ public PdfPageBuilder AddPage(PdfDocument document, int pageNumber, AddPageOptio
458458
{
459459
continue;
460460
}
461-
462-
var val = kvp.Value;
463-
if (kvp.Value is IndirectReferenceToken ir)
464-
{
465-
ObjectToken tk = document.Structure.TokenScanner.Get(ir.Data);
466-
if (tk is null)
467-
{
468-
// malformed
469-
continue;
470-
}
471-
val = tk.Data;
472-
}
473-
474-
if (!(val is ArrayToken arr))
475-
{
476-
// should be array... ignore and remove bad dict
477-
continue;
478-
}
479-
480-
// if copyLink is unset, ignore links to resolve issues with refencing non-existing pages
481-
var toAdd = new List<IToken>();
482-
foreach (var annot in arr.Data)
483-
{
484-
DictionaryToken? tk = GetRemoteDict(annot);
485-
if (tk is null)
486-
{
487-
// malformed
488-
continue;
489-
}
490461

491-
if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
492-
{
493-
if (options.CopyLinkFunc is null)
494-
{
495-
// ignore link if don't know how to copy
496-
continue;
497-
}
498-
499-
var link = page.annotationProvider.GetAction(tk);
500-
if (link is null)
501-
{
502-
// ignore unknown link actions
503-
continue;
504-
}
505-
506-
var copiedLink = options.CopyLinkFunc(link);
507-
if (copiedLink is null)
508-
{
509-
// ignore if caller wants to skip the link
510-
continue;
511-
}
512-
513-
if (copiedLink != link)
514-
{
515-
// defer to write links when all pages are added
516-
var copiedToken = (DictionaryToken)WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs);
517-
links.Add((copiedToken, copiedLink));
518-
continue;
519-
}
462+
var copiedTokens = CopyAnnotationsFromPageSource(
463+
kvp.Value,
464+
document.Structure.TokenScanner,
465+
refs,
466+
options.CopyLinkFunc);
520467

521-
// copy as is if caller returns the same link
522-
}
523-
toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
524-
}
525-
// copy rest
526-
copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
468+
copiedPageDict[NameToken.Annots] = new ArrayToken(copiedTokens);
527469
continue;
528470
}
529471

@@ -625,6 +567,147 @@ void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDic
625567
}
626568
}
627569

570+
private IReadOnlyList<IToken> CopyAnnotationsFromPageSource(
571+
IToken val,
572+
IPdfTokenScanner sourceScanner,
573+
IDictionary<IndirectReference, IndirectReferenceToken> refs,
574+
Func<PdfAction, PdfAction?>? linkCopyFunc)
575+
{
576+
var permittedLinkActionTypes = new HashSet<NameToken>
577+
{
578+
// A web URI.
579+
NameToken.Uri,
580+
// A page in a different non-embedded document.
581+
NameToken.GoToR,
582+
// Launch an external application.
583+
NameToken.Launch,
584+
};
585+
586+
if (!DirectObjectFinder.TryGet(val, sourceScanner, out ArrayToken? annotationsArray))
587+
{
588+
return [];
589+
}
590+
591+
var copiedAnnotations = new List<IToken>();
592+
foreach (var annotEntry in annotationsArray.Data)
593+
{
594+
if (!DirectObjectFinder.TryGet(annotEntry, sourceScanner, out DictionaryToken? annotDict))
595+
{
596+
continue;
597+
}
598+
599+
var removedKeys = new List<NameToken>();
600+
601+
/*
602+
* An indirect reference to the page object with which this annotation is associated.
603+
* Note: This entry is required for screen annotations associated with rendition actions.
604+
*/
605+
if (annotDict.TryGet(NameToken.P, out _))
606+
{
607+
// If we have a page reference we should update it when this page is written.
608+
// For now, we'll remove it. This will corrupt screen annotations as noted above.
609+
removedKeys.Add(NameToken.P);
610+
}
611+
612+
// We don't copy the struct tree so skip this for now.
613+
if (annotDict.TryGet(NameToken.StructParent, out _))
614+
{
615+
removedKeys.Add(NameToken.StructParent);
616+
}
617+
618+
// We treat non-link annotations as ok for now, we should revisit this.
619+
if (!annotDict.TryGet(NameToken.Subtype, sourceScanner, out NameToken? subtype)
620+
|| subtype != NameToken.Link)
621+
{
622+
var copiedRef = WriterUtil.CopyToken(
623+
context,
624+
CopyWithSkippedKeys(annotDict, removedKeys),
625+
sourceScanner,
626+
refs);
627+
628+
copiedAnnotations.Add(copiedRef);
629+
630+
continue;
631+
}
632+
633+
// Todo copy
634+
// var copiedLogicOverride = linkCopyFunc()
635+
636+
//var link = page.annotationProvider.GetAction(tk);
637+
638+
// If the link has an action then this link can point elsewhere in this document, maybe not to a page we copied?
639+
if (annotDict.TryGet(NameToken.A, sourceScanner, out DictionaryToken? actionDict))
640+
{
641+
// If the link annotation points somewhere inside our document we can't currently maintain validity on-copy.
642+
if (!actionDict.TryGet(NameToken.S, sourceScanner, out NameToken? actionType)
643+
|| !permittedLinkActionTypes.Contains(actionType))
644+
{
645+
continue;
646+
}
647+
648+
var copiedRef = WriterUtil.CopyToken(
649+
context,
650+
CopyWithSkippedKeys(annotDict, removedKeys),
651+
sourceScanner,
652+
refs);
653+
654+
copiedAnnotations.Add(copiedRef);
655+
656+
continue;
657+
}
658+
659+
// A dest can point elsewhere in this document, maybe not to a page we copied?
660+
if (annotDict.TryGet(NameToken.Dest, out _))
661+
{
662+
// Skip for now.
663+
continue;
664+
}
665+
666+
// If neither /A nor /Dest are present then I don't really know what this link does, so it should be safe to copy:
667+
var finalCopiedRef = WriterUtil.CopyToken(
668+
context,
669+
CopyWithSkippedKeys(annotDict, removedKeys),
670+
sourceScanner,
671+
refs);
672+
673+
copiedAnnotations.Add(finalCopiedRef);
674+
}
675+
676+
return copiedAnnotations;
677+
}
678+
679+
private static DictionaryToken CopyWithSkippedKeys(
680+
DictionaryToken source,
681+
IReadOnlyList<NameToken> skipped)
682+
{
683+
var dict = new Dictionary<NameToken, IToken>();
684+
685+
foreach (var kvp in source.Data)
686+
{
687+
var name = NameToken.Create(kvp.Key);
688+
689+
var ignore = false;
690+
691+
foreach (var skippedName in skipped)
692+
{
693+
if (skippedName == name)
694+
{
695+
ignore = true;
696+
break;
697+
}
698+
}
699+
700+
if (ignore)
701+
{
702+
continue;
703+
}
704+
705+
dict[name] = kvp.Value;
706+
}
707+
708+
return new DictionaryToken(dict);
709+
}
710+
628711
private void CompleteDocument()
629712
{
630713
// write fonts to reserved object numbers

0 commit comments

Comments
 (0)