Skip to content

Commit b04e8b2

Browse files
src/__init__.py tests/: address #4004 - avoid segv when trying to get page from annot.
The fix requires MuPDF >= 1.25, specifically this MuPDF commit: When annotation is deleted from page, remove link from annotation to page.
1 parent f6a853e commit b04e8b2

File tree

4 files changed

+94
-13
lines changed

4 files changed

+94
-13
lines changed

src/__init__.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,21 @@ def _as_pdf_page(page, required=True):
514514
assert 0, f'Unrecognised {type(page)=}'
515515

516516

517+
def _pdf_annot_page(annot):
518+
'''
519+
Wrapper for mupdf.pdf_annot_page() which raises an exception if <annot>
520+
is not bound to a page instead of returning a mupdf.PdfPage with
521+
`.m_internal=None`.
522+
523+
[Some other MuPDF functions such as pdf_update_annot()` already raise a
524+
similar exception if a pdf_annot's .page field is null.]
525+
'''
526+
page = mupdf.pdf_annot_page(annot)
527+
if not page.m_internal:
528+
raise RuntimeError('Annot is not bound to a page')
529+
return page
530+
531+
517532
# Fixme: we don't support JM_MEMORY=1.
518533
JM_MEMORY = 0
519534

@@ -599,7 +614,7 @@ def _setAP(self, buffer_, rect=0):
599614
try:
600615
annot = self.this
601616
annot_obj = mupdf.pdf_annot_obj( annot)
602-
page = mupdf.pdf_annot_page( annot)
617+
page = _pdf_annot_page(annot)
603618
apobj = mupdf.pdf_dict_getl( annot_obj, PDF_NAME('AP'), PDF_NAME('N'))
604619
if not apobj.m_internal:
605620
raise RuntimeError( MSG_BAD_APN)
@@ -619,7 +634,7 @@ def _update_appearance(self, opacity=-1, blend_mode=None, fill_color=None, rotat
619634
annot = self.this
620635
assert annot.m_internal
621636
annot_obj = mupdf.pdf_annot_obj( annot)
622-
page = mupdf.pdf_annot_page( annot)
637+
page = _pdf_annot_page(annot)
623638
pdf = page.doc()
624639
type_ = mupdf.pdf_annot_type( annot)
625640
nfcol, fcol = JM_color_FromSequence(fill_color)
@@ -830,7 +845,7 @@ def delete_responses(self):
830845
CheckParent(self)
831846
annot = self.this
832847
annot_obj = mupdf.pdf_annot_obj(annot)
833-
page = mupdf.pdf_annot_page(annot)
848+
page = _pdf_annot_page(annot)
834849
while 1:
835850
irt_annot = JM_find_annot_irt(annot)
836851
if not irt_annot.m_internal:
@@ -942,7 +957,7 @@ def get_parent(self):
942957
try:
943958
ret = getattr( self, 'parent')
944959
except AttributeError:
945-
page = mupdf.pdf_annot_page(self.this)
960+
page = _pdf_annot_page(self.this)
946961
assert isinstance( page, mupdf.PdfPage)
947962
document = Document( page.doc()) if page.m_internal else None
948963
ret = Page(page, document)
@@ -1374,7 +1389,7 @@ def set_irt_xref(self, xref):
13741389
'''
13751390
annot = self.this
13761391
annot_obj = mupdf.pdf_annot_obj( annot)
1377-
page = mupdf.pdf_annot_page( annot)
1392+
page = _pdf_annot_page(annot)
13781393
if xref < 1 or xref >= mupdf.pdf_xref_len( page.doc()):
13791394
raise ValueError( MSG_BAD_XREF)
13801395
irt = mupdf.pdf_new_indirect( page.doc(), xref, 0)
@@ -1429,7 +1444,7 @@ def set_opacity(self, opacity):
14291444
return
14301445
mupdf.pdf_set_annot_opacity(annot, opacity)
14311446
if opacity < 1.0:
1432-
page = mupdf.pdf_annot_page(annot)
1447+
page = _pdf_annot_page(annot)
14331448
page.transparency = 1
14341449

14351450
def set_open(self, is_open):
@@ -1444,7 +1459,7 @@ def set_popup(self, rect):
14441459
'''
14451460
CheckParent(self)
14461461
annot = self.this
1447-
pdfpage = mupdf.pdf_annot_page( annot)
1462+
pdfpage = _pdf_annot_page(annot)
14481463
rot = JM_rotate_page_matrix(pdfpage)
14491464
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
14501465
mupdf.pdf_set_annot_popup(annot, r)
@@ -1454,7 +1469,7 @@ def set_rect(self, rect):
14541469
CheckParent(self)
14551470
annot = self.this
14561471

1457-
pdfpage = mupdf.pdf_annot_page(annot)
1472+
pdfpage = _pdf_annot_page(annot)
14581473
rot = JM_rotate_page_matrix(pdfpage)
14591474
r = mupdf.fz_transform_rect(JM_rect_from_py(rect), rot)
14601475
if mupdf.fz_is_empty_rect(r) or mupdf.fz_is_infinite_rect(r):
@@ -1850,7 +1865,7 @@ def vertices(self):
18501865
annot = self.this
18511866
assert isinstance(annot, mupdf.PdfAnnot)
18521867
annot_obj = mupdf.pdf_annot_obj(annot)
1853-
page = mupdf.pdf_annot_page(annot)
1868+
page = _pdf_annot_page(annot)
18541869
page_ctm = mupdf.FzMatrix() # page transformation matrix
18551870
dummy = mupdf.FzRect() # Out-param for mupdf.pdf_page_transform().
18561871
mupdf.pdf_page_transform(page, dummy, page_ctm)
@@ -14470,7 +14485,7 @@ def JM_add_annot_id(annot, stem):
1447014485
Append a number to 'stem' such that the result is a unique name.
1447114486
'''
1447214487
assert isinstance(annot, mupdf.PdfAnnot)
14473-
page = mupdf.pdf_annot_page( annot)
14488+
page = _pdf_annot_page(annot)
1447414489
annot_obj = mupdf.pdf_annot_obj( annot)
1447514490
names = JM_get_annot_id_list(page)
1447614491
i = 0
@@ -15315,7 +15330,7 @@ def JM_find_annot_irt(annot):
1531515330
annot_obj = mupdf.pdf_annot_obj(annot)
1531615331
found = 0
1531715332
# loop thru MuPDF's internal annots array
15318-
page = mupdf.pdf_annot_page(annot)
15333+
page = _pdf_annot_page(annot)
1531915334
irt_annot = mupdf.pdf_first_annot(page)
1532015335
while 1:
1532115336
assert isinstance(irt_annot, mupdf.PdfAnnot)
@@ -15781,7 +15796,7 @@ def JM_get_widget_properties(annot, Widget):
1578115796
#log( '{type(annot)=}')
1578215797
annot_obj = mupdf.pdf_annot_obj(annot.this)
1578315798
#log( 'Have called mupdf.pdf_annot_obj()')
15784-
page = mupdf.pdf_annot_page(annot.this)
15799+
page = _pdf_annot_page(annot.this)
1578515800
pdf = page.doc()
1578615801
tw = annot
1578715802

@@ -17596,7 +17611,8 @@ def JM_set_widget_properties(annot, Widget):
1759617611
if isinstance( annot, Annot):
1759717612
annot = annot.this
1759817613
assert isinstance( annot, mupdf.PdfAnnot), f'{type(annot)=} {type=}'
17599-
page = mupdf.pdf_annot_page(annot)
17614+
page = _pdf_annot_page(annot)
17615+
assert page.m_internal, 'Annot is not bound to a page'
1760017616
annot_obj = mupdf.pdf_annot_obj(annot)
1760117617
pdf = page.doc()
1760217618
def GETATTR(name):

tests/resources/test_4004.pdf

6.28 KB
Binary file not shown.

tests/test_annots.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,3 +439,22 @@ def test_3758():
439439
page.apply_redactions()
440440
wt = pymupdf.TOOLS.mupdf_warnings()
441441
assert wt
442+
443+
444+
def test_parent():
445+
"""Test invalidating parent on page re-assignment."""
446+
doc = pymupdf.open()
447+
page = doc.new_page()
448+
a = page.add_highlight_annot(page.rect) # insert annotation on page 0
449+
page = doc.new_page() # make a new page, should orphanate annotation
450+
try:
451+
print(a) # should raise
452+
except Exception as e:
453+
if pymupdf.mupdf_version_tuple >= (1, 25):
454+
assert isinstance(e, pymupdf.mupdf.FzErrorArgument)
455+
assert str(e) == 'code=4: annotation not bound to any page'
456+
else:
457+
assert isinstance(e, ReferenceError)
458+
assert str(e) == 'weakly-referenced object no longer exists'
459+
else:
460+
assert 0, f'Failed to get expected exception.'

tests/test_widgets.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,3 +333,49 @@ def test_3950():
333333
'{{ policy_period_end_date }}',
334334
'{{ insurance_line }}',
335335
]
336+
337+
338+
def test_4004():
339+
if pymupdf.mupdf_version_tuple < (1, 25):
340+
print(f'test_4004(): not running because requires MuPDF >= 1.25.')
341+
return
342+
343+
import collections
344+
345+
def get_widgets_by_name(doc):
346+
"""
347+
Extracts and returns a dictionary of widgets indexed by their names.
348+
"""
349+
widgets_by_name = collections.defaultdict(list)
350+
for page_num in range(len(doc)):
351+
page = doc.load_page(page_num)
352+
for field in page.widgets():
353+
widgets_by_name[field.field_name].append({
354+
"page_num": page_num,
355+
"widget": field
356+
})
357+
return widgets_by_name
358+
359+
# Open document and get widgets
360+
path = os.path.normpath(f'{__file__}/../../tests/resources/test_4004.pdf')
361+
doc = pymupdf.open(path)
362+
widgets_by_name = get_widgets_by_name(doc)
363+
364+
# Print widget information
365+
for name, widgets in widgets_by_name.items():
366+
print(f"Widget Name: {name}")
367+
for entry in widgets:
368+
widget = entry["widget"]
369+
page_num = entry["page_num"]
370+
print(f" Page: {page_num + 1}, Type: {widget.field_type}, Value: {widget.field_value}, Rect: {widget.rect}")
371+
372+
# Attempt to update field value
373+
w = widgets_by_name["Text1"][0]
374+
field = w['widget']
375+
field.value = "1234567890"
376+
try:
377+
field.update()
378+
except Exception as e:
379+
assert str(e) == 'Annot is not bound to a page'
380+
381+
doc.close()

0 commit comments

Comments
 (0)