Skip to content

Commit 0981155

Browse files
src/ tests/ docs/: fix Annot.get_textpage() <clip> arg.
The <clip> arg was previously ignored. Also added documentation for Annot.get_textpage().
1 parent 05b0cfa commit 0981155

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

docs/annot.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ There is a parent-child relationship between an annotation and its page. If the
2222
:meth:`Annot.get_sound` get the sound of an audio annotation
2323
:meth:`Annot.get_text` extract annotation text
2424
:meth:`Annot.get_textbox` extract annotation text
25+
:meth:`Annot.get_textpage` create a TextPage for the annotation
2526
:meth:`Annot.set_border` set annotation's border properties
2627
:meth:`Annot.set_blendmode` set annotation's blend mode
2728
:meth:`Annot.set_colors` set annotation's colors
@@ -134,6 +135,22 @@ There is a parent-child relationship between an annotation and its page. If the
134135
:arg rect-like rect: the area to consider, defaults to :attr:`Annot.rect`.
135136

136137

138+
.. method:: get_textpage(clip=None, flags=3)
139+
140+
Create a :ref:`TextPage` for the annotation.
141+
142+
:arg int flags: indicator bits controlling the content available for subsequent text extractions and searches -- see the parameter of :meth:`Annot.get_text`.
143+
144+
:arg rect-like clip: restrict extracted text to this area.
145+
146+
:returns: :ref:`TextPage`
147+
148+
|history_begin|
149+
150+
* v1.25.5: fixed `clip` arg.
151+
152+
|history_end|
153+
137154
.. method:: set_info(info=None, content=None, title=None, creationDate=None, modDate=None, subject=None)
138155

139156
* Changed in version 1.16.10

src/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,8 +1022,11 @@ def get_sound(self):
10221022
def get_textpage(self, clip=None, flags=0):
10231023
"""Make annotation TextPage."""
10241024
CheckParent(self)
1025-
options = mupdf.FzStextOptions()
1026-
options.flags = flags
1025+
options = mupdf.FzStextOptions(flags)
1026+
if clip:
1027+
clip2 = JM_rect_from_py(clip)
1028+
options.clip = clip2.internal()
1029+
options.flags |= mupdf.FZ_STEXT_CLIP_RECT
10271030
annot = self.this
10281031
stextpage = mupdf.FzStextPage(annot, options)
10291032
ret = TextPage(stextpage)

tests/test_annots.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def test_2270():
276276
for page_number, page in enumerate(document):
277277
for textBox in page.annots(types=(pymupdf.PDF_ANNOT_FREE_TEXT,pymupdf.PDF_ANNOT_TEXT)):
278278
print("textBox.type :", textBox.type)
279+
print(f"{textBox.rect=}")
279280
print("textBox.get_text('words') : ", textBox.get_text('words'))
280281
print("textBox.get_text('text') : ", textBox.get_text('text'))
281282
print("textBox.get_textbox(textBox.rect) : ", textBox.get_textbox(textBox.rect))
@@ -296,6 +297,15 @@ def test_2270():
296297
text = page.get_text(textpage=textpage)
297298
print(f'{text=}')
298299
print(f'{getattr(textpage, "parent")=}')
300+
301+
# Check Annotation.get_textpage()'s <clip> arg.
302+
clip = textBox.rect
303+
clip.x1 = clip.x0 + (clip.x1 - clip.x0) / 3
304+
textpage2 = textBox.get_textpage(clip=clip)
305+
text = textpage2.extractText()
306+
print(f'With {clip=}: {text=}')
307+
assert text == 'ab\n'
308+
299309

300310
def test_2934_add_redact_annot():
301311
'''

0 commit comments

Comments
 (0)