3030
3131from typing import TYPE_CHECKING , List , Optional , Tuple , Union , cast
3232
33- from . import ArrayObject , DictionaryObject , IndirectObject , PdfObject , TextStringObject
33+ from . import ArrayObject , DictionaryObject , IndirectObject , NullObject , PdfObject , TextStringObject
3434
3535if TYPE_CHECKING :
3636 from .._page import PageObject
3737 from .._reader import PdfReader
3838 from .._writer import PdfWriter
39+ from ..generic import Destination
3940
4041
4142class NamedReferenceLink :
4243 """Named reference link being preserved until we can resolve it correctly."""
4344
44- def __init__ (self , reference : TextStringObject , source_pdf : "PdfReader " ) -> None :
45+ def __init__ (self , reference : TextStringObject , page : "PageObject " ) -> None :
4546 """reference: TextStringObject with named reference"""
4647 self ._reference = reference
47- self ._source_pdf = source_pdf
48+
49+ # to work out where the reference points we need to find the
50+ # source PDF which the reference is pointing to. this *can*
51+ # be the PDF the page containing the link comes from, but it
52+ # may also be some other PDF merged into this page, so we need
53+ # to do a little search
54+ destination = self ._find_page_in (page .pdf )
55+
56+ if not destination :
57+ for src_page in page ._merged_in_pages :
58+ destination = self ._find_page_in (src_page .pdf )
59+ break
60+
61+ if destination and not isinstance (destination .dest_array [0 ], NullObject ):
62+ self ._referenced_page = destination .dest_array [0 ]
63+ else :
64+ self ._referenced_page = None
65+
66+ def _find_page_in (self , pdf : "Optional[PdfReader]" ) -> "Optional[Destination]" :
67+ if not pdf :
68+ return None
69+ return pdf .named_destinations .get (str (self ._reference ))
4870
4971 def find_referenced_page (self ) -> Union [IndirectObject , None ]:
50- destination = self ._source_pdf . named_destinations . get ( str ( self . _reference ))
51- return destination . page if destination else None
72+ if self ._referenced_page :
73+ return self . _referenced_page . indirect_reference
5274
5375 def patch_reference (self , target_pdf : "PdfWriter" , new_page : IndirectObject ) -> None :
5476 """target_pdf: PdfWriter which the new link went into"""
@@ -90,7 +112,6 @@ def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[
90112
91113
92114def _build_link (indirect_object : IndirectObject , page : "PageObject" ) -> Optional [ReferenceLink ]:
93- src = cast ("PdfReader" , page .pdf )
94115 link = cast (DictionaryObject , indirect_object .get_object ())
95116 if (not isinstance (link , DictionaryObject )) or link .get ("/Subtype" ) != "/Link" :
96117 return None
@@ -100,17 +121,17 @@ def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional
100121 if action .get ("/S" ) != "/GoTo" :
101122 return None
102123
103- return _create_link (action ["/D" ], src )
124+ return _create_link (action ["/D" ], page )
104125
105126 if "/Dest" in link :
106- return _create_link (link ["/Dest" ], src )
127+ return _create_link (link ["/Dest" ], page )
107128
108129 return None # Nothing to do here
109130
110131
111- def _create_link (reference : PdfObject , source_pdf : "PdfReader " )-> Optional [ReferenceLink ]:
132+ def _create_link (reference : PdfObject , page : "PageObject " )-> Optional [ReferenceLink ]:
112133 if isinstance (reference , TextStringObject ):
113- return NamedReferenceLink (reference , source_pdf )
134+ return NamedReferenceLink (reference , page )
114135 if isinstance (reference , ArrayObject ):
115136 return DirectReferenceLink (reference )
116137 return None
0 commit comments