3030
3131from typing import TYPE_CHECKING , List , Optional , Tuple , Union , cast
3232
33- from . import ArrayObject , DictionaryObject , IndirectObject , PdfObject , TextStringObject
33+ from . import ArrayObject , DictionaryObject , IndirectObject , NullObject , PdfObject , TextStringObject
3434
3535if TYPE_CHECKING :
3636 from .._page import PageObject
37+ from .._protocols import PdfCommonDocProtocol
3738 from .._reader import PdfReader
3839 from .._writer import PdfWriter
40+ from ..generic import Destination
3941
4042
4143class NamedReferenceLink :
4244 """Named reference link being preserved until we can resolve it correctly."""
4345
44- def __init__ (self , reference : TextStringObject , source_pdf : "PdfReader " ) -> None :
46+ def __init__ (self , reference : TextStringObject , page : "PageObject " ) -> None :
4547 """reference: TextStringObject with named reference"""
4648 self ._reference = reference
47- self ._source_pdf = source_pdf
49+
50+ # to work out where the reference points we need to find the
51+ # source PDF which the reference is pointing to. this *can*
52+ # be the PDF the page containing the link comes from, but it
53+ # may also be some other PDF merged into this page, so we need
54+ # to do a little search
55+ destination = self ._find_page_in (page .pdf )
56+
57+ if not destination :
58+ for src_page in page ._merged_in_pages :
59+ destination = self ._find_page_in (src_page .pdf )
60+ break
61+
62+ if destination and not isinstance (destination .dest_array [0 ], NullObject ):
63+ self ._referenced_page = destination .dest_array [0 ]
64+ else :
65+ self ._referenced_page = None
66+
67+ def _find_page_in (self , pdf : "Optional[PdfCommonDocProtocol]" ) -> "Optional[Destination]" :
68+ if not pdf or not hasattr (pdf , "named_destinations" ):
69+ return None
70+ reader : PdfReader = cast ("PdfReader" , pdf )
71+ return reader .named_destinations .get (str (self ._reference ))
4872
4973 def find_referenced_page (self ) -> Union [IndirectObject , None ]:
50- destination = self ._source_pdf .named_destinations .get (str (self ._reference ))
51- return destination .page if destination else None
74+ if self ._referenced_page :
75+ return self ._referenced_page .indirect_reference
76+ return None
5277
5378 def patch_reference (self , target_pdf : "PdfWriter" , new_page : IndirectObject ) -> None :
5479 """target_pdf: PdfWriter which the new link went into"""
@@ -90,7 +115,6 @@ def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[
90115
91116
92117def _build_link (indirect_object : IndirectObject , page : "PageObject" ) -> Optional [ReferenceLink ]:
93- src = cast ("PdfReader" , page .pdf )
94118 link = cast (DictionaryObject , indirect_object .get_object ())
95119 if (not isinstance (link , DictionaryObject )) or link .get ("/Subtype" ) != "/Link" :
96120 return None
@@ -100,17 +124,17 @@ def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional
100124 if action .get ("/S" ) != "/GoTo" :
101125 return None
102126
103- return _create_link (action ["/D" ], src )
127+ return _create_link (action ["/D" ], page )
104128
105129 if "/Dest" in link :
106- return _create_link (link ["/Dest" ], src )
130+ return _create_link (link ["/Dest" ], page )
107131
108132 return None # Nothing to do here
109133
110134
111- def _create_link (reference : PdfObject , source_pdf : "PdfReader " )-> Optional [ReferenceLink ]:
135+ def _create_link (reference : PdfObject , page : "PageObject " )-> Optional [ReferenceLink ]:
112136 if isinstance (reference , TextStringObject ):
113- return NamedReferenceLink (reference , source_pdf )
137+ return NamedReferenceLink (reference , page )
114138 if isinstance (reference , ArrayObject ):
115139 return DirectReferenceLink (reference )
116140 return None
0 commit comments