Skip to content

Commit 0473467

Browse files
src/__init__.py tests/: fix pymupdf.get_text() with method='single'.
We were ignoring the <pages> arg. Fixes #4524.
1 parent 0d414ec commit 0473467

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

src/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20635,7 +20635,10 @@ def apply_pages(
2063520635
initfn(*initfn_args, **initfn_kwargs)
2063620636
ret = list()
2063720637
document = Document(path)
20638-
for page in document:
20638+
if pages is None:
20639+
pages = range(len(document))
20640+
for pno in pages:
20641+
page = document[pno]
2063920642
r = pagefn(page, *pagefn_args, **initfn_kwargs)
2064020643
ret.append(r)
2064120644

tests/test_textextract.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,17 @@ def llen(texts):
309309
pymupdf._log_items_clear()
310310

311311

312+
def test_4524():
313+
path = os.path.abspath(f'{__file__}/../../tests/resources/mupdf_explored.pdf')
314+
print('')
315+
document = pymupdf.Document(path)
316+
texts_single = pymupdf.get_text(path, method='single', pages=[1, 3, 5])
317+
texts_mp = pymupdf.get_text(path, method='mp', pages=[1, 3, 5])
318+
print(f'{len(texts_single)=}')
319+
print(f'{len(texts_mp)=}')
320+
assert texts_mp == texts_single
321+
322+
312323
def test_3594():
313324
verbose = 0
314325
print()

0 commit comments

Comments
 (0)