3030
3131from typing import TYPE_CHECKING , List , Optional , Tuple , Union , cast
3232
33- from . import ArrayObject , DictionaryObject , IndirectObject , PdfObject , TextStringObject
33+ from . import ArrayObject , DictionaryObject , IndirectObject , NullObject , PdfObject , TextStringObject
3434
3535if TYPE_CHECKING :
3636 from .._page import PageObject
37+ from .._protocols import PdfCommonDocProtocol
3738 from .._reader import PdfReader
3839 from .._writer import PdfWriter
40+ from ..generic import Destination
3941
4042
4143class NamedReferenceLink :
4244 """Named reference link being preserved until we can resolve it correctly."""
4345
44- def __init__ (self , reference : TextStringObject , source_pdf : "PdfReader " ) -> None :
46+ def __init__ (self , reference : TextStringObject , page : "PageObject " ) -> None :
4547 """reference: TextStringObject with named reference"""
4648 self ._reference = reference
47- self ._source_pdf = source_pdf
49+
50+ # to work out where the reference points we need to find the
51+ # source PDF which the reference is pointing to. this *can*
52+ # be the PDF the page containing the link comes from, but it
53+ # may also be some other PDF merged into this page, so we need
54+ # to do a little search
55+ destination = self ._find_page_in (page .pdf )
56+
57+ if not destination :
58+ for src_page in page ._merged_in_pages :
59+ destination = self ._find_page_in (src_page .pdf )
60+ break
61+
62+ if destination and not isinstance (destination .dest_array [0 ], NullObject ):
63+ self ._referenced_page = destination .dest_array [0 ]
64+ else :
65+ self ._referenced_page = None
66+
67+ def _find_page_in (self , pdf : "Optional[PdfCommonDocProtocol]" ) -> "Optional[Destination]" :
68+ if not pdf or not hasattr (pdf , "named_destinations" ):
69+ return None
70+ reader : PdfReader = cast ("PdfReader" , pdf )
71+ return reader .named_destinations .get (str (self ._reference ))
4872
4973 def find_referenced_page (self ) -> Union [IndirectObject , None ]:
50- destination = self ._source_pdf . named_destinations . get ( str ( self . _reference ))
51- return destination . page if destination else None
74+ if self ._referenced_page :
75+ return self . _referenced_page . indirect_reference
5276
5377 def patch_reference (self , target_pdf : "PdfWriter" , new_page : IndirectObject ) -> None :
5478 """target_pdf: PdfWriter which the new link went into"""
@@ -90,7 +114,6 @@ def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[
90114
91115
92116def _build_link (indirect_object : IndirectObject , page : "PageObject" ) -> Optional [ReferenceLink ]:
93- src = cast ("PdfReader" , page .pdf )
94117 link = cast (DictionaryObject , indirect_object .get_object ())
95118 if (not isinstance (link , DictionaryObject )) or link .get ("/Subtype" ) != "/Link" :
96119 return None
@@ -100,17 +123,17 @@ def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional
100123 if action .get ("/S" ) != "/GoTo" :
101124 return None
102125
103- return _create_link (action ["/D" ], src )
126+ return _create_link (action ["/D" ], page )
104127
105128 if "/Dest" in link :
106- return _create_link (link ["/Dest" ], src )
129+ return _create_link (link ["/Dest" ], page )
107130
108131 return None # Nothing to do here
109132
110133
111- def _create_link (reference : PdfObject , source_pdf : "PdfReader " )-> Optional [ReferenceLink ]:
134+ def _create_link (reference : PdfObject , page : "PageObject " )-> Optional [ReferenceLink ]:
112135 if isinstance (reference , TextStringObject ):
113- return NamedReferenceLink (reference , source_pdf )
136+ return NamedReferenceLink (reference , page )
114137 if isinstance (reference , ArrayObject ):
115138 return DirectReferenceLink (reference )
116139 return None
0 commit comments