3030
3131from typing import TYPE_CHECKING , List , Optional , Tuple , Union , cast
3232
33- from . import ArrayObject , DictionaryObject , IndirectObject , PdfObject , TextStringObject
33+ from . import ArrayObject , DictionaryObject , IndirectObject , NullObject , PdfObject , TextStringObject
3434
3535if TYPE_CHECKING :
3636 from .._page import PageObject
4141class NamedReferenceLink :
4242 """Named reference link being preserved until we can resolve it correctly."""
4343
44- def __init__ (self , reference : TextStringObject , source_pdf : "PdfReader " ) -> None :
44+ def __init__ (self , reference : TextStringObject , page : "PageObject " ) -> None :
4545 """reference: TextStringObject with named reference"""
4646 self ._reference = reference
47- self ._source_pdf = source_pdf
47+
48+ # to work out where the reference points we need to find the
49+ # source PDF which the reference is pointing to. this *can*
50+ # be the PDF the page containing the link comes from, but it
51+ # may also be some other PDF merged into this page, so we need
52+ # to do a little search
53+ destination = self ._find_page_in (page .pdf )
54+
55+ if not destination :
56+ for src_page in page ._merged_in_pages :
57+ destination = self ._find_page_in (src_page .pdf )
58+ break
59+
60+ if destination and not isinstance (destination .dest_array [0 ], NullObject ):
61+ self ._referenced_page = destination .dest_array [0 ]
62+ else :
63+ self ._referenced_page = None
64+
65+ def _find_page_in (self , pdf : "Optional[PdfReader]" ):
66+ if not pdf :
67+ return None
68+ return pdf .named_destinations .get (str (self ._reference ))
4869
4970 def find_referenced_page (self ) -> Union [IndirectObject , None ]:
50- destination = self ._source_pdf . named_destinations . get ( str ( self . _reference ))
51- return destination . page if destination else None
71+ if self ._referenced_page :
72+ return self . _referenced_page . indirect_reference
5273
5374 def patch_reference (self , target_pdf : "PdfWriter" , new_page : IndirectObject ) -> None :
5475 """target_pdf: PdfWriter which the new link went into"""
@@ -90,7 +111,6 @@ def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[
90111
91112
92113def _build_link (indirect_object : IndirectObject , page : "PageObject" ) -> Optional [ReferenceLink ]:
93- src = cast ("PdfReader" , page .pdf )
94114 link = cast (DictionaryObject , indirect_object .get_object ())
95115 if (not isinstance (link , DictionaryObject )) or link .get ("/Subtype" ) != "/Link" :
96116 return None
@@ -100,17 +120,17 @@ def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional
100120 if action .get ("/S" ) != "/GoTo" :
101121 return None
102122
103- return _create_link (action ["/D" ], src )
123+ return _create_link (action ["/D" ], page )
104124
105125 if "/Dest" in link :
106- return _create_link (link ["/Dest" ], src )
126+ return _create_link (link ["/Dest" ], page )
107127
108128 return None # Nothing to do here
109129
110130
111- def _create_link (reference : PdfObject , source_pdf : "PdfReader " )-> Optional [ReferenceLink ]:
131+ def _create_link (reference : PdfObject , page : "PageObject " )-> Optional [ReferenceLink ]:
112132 if isinstance (reference , TextStringObject ):
113- return NamedReferenceLink (reference , source_pdf )
133+ return NamedReferenceLink (reference , page )
114134 if isinstance (reference , ArrayObject ):
115135 return DirectReferenceLink (reference )
116136 return None
0 commit comments