Skip to content

Commit 2c0b56a

Browse files
tests/: added test_4457(), detect differences before/after Document.subset_fonts().
Downloads input files from github. Allow a small number of remaining differences.
1 parent 2115c87 commit 2c0b56a

File tree

1 file changed

+102
-1
lines changed

1 file changed

+102
-1
lines changed

tests/test_font.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
"""
22
Tests for the Font class.
33
"""
4-
import pymupdf
54
import os
5+
import platform
6+
import pymupdf
7+
import subprocess
8+
import textwrap
9+
10+
import util
11+
612

713
def test_font1():
814
text = "PyMuPDF"
@@ -218,3 +224,98 @@ def test_3887():
218224
print(f'Have saved to: {path_pixmap=}')
219225
assert set(output)==set(text)
220226

227+
228+
def test_4457():
229+
print()
230+
files = (
231+
('https://arxiv.org/pdf/2504.13180', 'test_4457_a.pdf', None, 4),
232+
('https://arxiv.org/pdf/2504.13181', 'test_4457_b.pdf', None, 9),
233+
)
234+
for url, name, size, rms_after_max in files:
235+
path = util.download(url, name, size)
236+
237+
with pymupdf.open(path) as document:
238+
page = document[0]
239+
240+
pixmap = document[0].get_pixmap()
241+
path_pixmap = f'{path}.png'
242+
pixmap.save(path_pixmap)
243+
print(f'Have created: {path_pixmap=}')
244+
245+
text = page.get_text()
246+
path_before = f'{path}.before.pdf'
247+
path_after = f'{path}.after.pdf'
248+
document.ez_save(path_before, garbage=4)
249+
print(f'Have created {path_before=}')
250+
251+
document.subset_fonts()
252+
document.ez_save(path_after, garbage=4)
253+
print(f'Have created {path_after=}')
254+
255+
with pymupdf.open(path_before) as document:
256+
text_before = document[0].get_text()
257+
pixmap_before = document[0].get_pixmap()
258+
path_pixmap_before = f'{path_before}.png'
259+
pixmap_before.save(path_pixmap_before)
260+
print(f'Have created: {path_pixmap_before=}')
261+
262+
with pymupdf.open(path_after) as document:
263+
text_after = document[0].get_text()
264+
pixmap_after = document[0].get_pixmap()
265+
path_pixmap_after = f'{path_after}.png'
266+
pixmap_after.save(path_pixmap_after)
267+
print(f'Have created: {path_pixmap_after=}')
268+
269+
import gentle_compare
270+
rms_before = gentle_compare.pixmaps_rms(pixmap, pixmap_before)
271+
rms_after = gentle_compare.pixmaps_rms(pixmap, pixmap_after)
272+
print(f'{rms_before=}')
273+
print(f'{rms_after=}')
274+
275+
# Create .png file showing differences between <path> and <path_after>.
276+
path_pixmap_after_diff = f'{path_after}.diff.png'
277+
pixmap_after_diff = gentle_compare.pixmaps_diff(pixmap, pixmap_after)
278+
pixmap_after_diff.save(path_pixmap_after_diff)
279+
print(f'Have created: {path_pixmap_after_diff}')
280+
281+
# Extract text from <path>, <path_before> and <path_after> and write to
282+
# files so we can show differences with `diff`.
283+
path_text = os.path.normpath(f'{__file__}/../../tests/test_4457.txt')
284+
path_text_before = f'{path_text}.before.txt'
285+
path_text_after = f'{path_text}.after.txt'
286+
with open(path_text, 'w', encoding='utf8') as f:
287+
f.write(text)
288+
with open(path_text_before, 'w', encoding='utf8') as f:
289+
f.write(text_before)
290+
with open(path_text_after, 'w', encoding='utf8') as f:
291+
f.write(text_after)
292+
293+
# Can't write text to stdout on Windows because of encoding errors.
294+
if platform.system() != 'Windows':
295+
print(f'text:\n{textwrap.indent(text, " ")}')
296+
print(f'text_before:\n{textwrap.indent(text_before, " ")}')
297+
print(f'text_after:\n{textwrap.indent(text_after, " ")}')
298+
print(f'{path_text=}')
299+
print(f'{path_text_before=}')
300+
print(f'{path_text_after=}')
301+
302+
command = f'diff -u {path_text} {path_text_before}'
303+
print(f'Running: {command}', flush=1)
304+
subprocess.run(command, shell=1)
305+
306+
command = f'diff -u {path_text} {path_text_after}'
307+
print(f'Running: {command}', flush=1)
308+
subprocess.run(command, shell=1)
309+
310+
assert text_before == text
311+
assert rms_before == 0
312+
313+
# As of 2025-05-20 there are some differences in some characters, e.g.
314+
# the non-ascii characters in `Philipp Krahenbuhl`.
315+
# See <path_pixmap> and <path_pixmap_after>.
316+
assert rms_after < rms_after_max
317+
318+
# Avoid test failure caused by mupdf warnings.
319+
wt = pymupdf.TOOLS.mupdf_warnings()
320+
print(f'{wt=}')
321+
assert wt == 'bogus font ascent/descent values (0 / 0)\n... repeated 5 times...'

0 commit comments

Comments
 (0)