|
1 | 1 | """ |
2 | 2 | Tests for the Font class. |
3 | 3 | """ |
4 | | -import pymupdf |
5 | 4 | import os |
| 5 | +import platform |
| 6 | +import pymupdf |
| 7 | +import subprocess |
| 8 | +import textwrap |
| 9 | + |
| 10 | +import util |
| 11 | + |
6 | 12 |
|
7 | 13 | def test_font1(): |
8 | 14 | text = "PyMuPDF" |
@@ -218,3 +224,98 @@ def test_3887(): |
218 | 224 | print(f'Have saved to: {path_pixmap=}') |
219 | 225 | assert set(output)==set(text) |
220 | 226 |
|
| 227 | + |
| 228 | +def test_4457(): |
| 229 | + print() |
| 230 | + files = ( |
| 231 | + ('https://arxiv.org/pdf/2504.13180', 'test_4457_a.pdf', None, 4), |
| 232 | + ('https://arxiv.org/pdf/2504.13181', 'test_4457_b.pdf', None, 9), |
| 233 | + ) |
| 234 | + for url, name, size, rms_after_max in files: |
| 235 | + path = util.download(url, name, size) |
| 236 | + |
| 237 | + with pymupdf.open(path) as document: |
| 238 | + page = document[0] |
| 239 | + |
| 240 | + pixmap = document[0].get_pixmap() |
| 241 | + path_pixmap = f'{path}.png' |
| 242 | + pixmap.save(path_pixmap) |
| 243 | + print(f'Have created: {path_pixmap=}') |
| 244 | + |
| 245 | + text = page.get_text() |
| 246 | + path_before = f'{path}.before.pdf' |
| 247 | + path_after = f'{path}.after.pdf' |
| 248 | + document.ez_save(path_before, garbage=4) |
| 249 | + print(f'Have created {path_before=}') |
| 250 | + |
| 251 | + document.subset_fonts() |
| 252 | + document.ez_save(path_after, garbage=4) |
| 253 | + print(f'Have created {path_after=}') |
| 254 | + |
| 255 | + with pymupdf.open(path_before) as document: |
| 256 | + text_before = document[0].get_text() |
| 257 | + pixmap_before = document[0].get_pixmap() |
| 258 | + path_pixmap_before = f'{path_before}.png' |
| 259 | + pixmap_before.save(path_pixmap_before) |
| 260 | + print(f'Have created: {path_pixmap_before=}') |
| 261 | + |
| 262 | + with pymupdf.open(path_after) as document: |
| 263 | + text_after = document[0].get_text() |
| 264 | + pixmap_after = document[0].get_pixmap() |
| 265 | + path_pixmap_after = f'{path_after}.png' |
| 266 | + pixmap_after.save(path_pixmap_after) |
| 267 | + print(f'Have created: {path_pixmap_after=}') |
| 268 | + |
| 269 | + import gentle_compare |
| 270 | + rms_before = gentle_compare.pixmaps_rms(pixmap, pixmap_before) |
| 271 | + rms_after = gentle_compare.pixmaps_rms(pixmap, pixmap_after) |
| 272 | + print(f'{rms_before=}') |
| 273 | + print(f'{rms_after=}') |
| 274 | + |
| 275 | + # Create .png file showing differences between <path> and <path_after>. |
| 276 | + path_pixmap_after_diff = f'{path_after}.diff.png' |
| 277 | + pixmap_after_diff = gentle_compare.pixmaps_diff(pixmap, pixmap_after) |
| 278 | + pixmap_after_diff.save(path_pixmap_after_diff) |
| 279 | + print(f'Have created: {path_pixmap_after_diff}') |
| 280 | + |
| 281 | + # Extract text from <path>, <path_before> and <path_after> and write to |
| 282 | + # files so we can show differences with `diff`. |
| 283 | + path_text = os.path.normpath(f'{__file__}/../../tests/test_4457.txt') |
| 284 | + path_text_before = f'{path_text}.before.txt' |
| 285 | + path_text_after = f'{path_text}.after.txt' |
| 286 | + with open(path_text, 'w', encoding='utf8') as f: |
| 287 | + f.write(text) |
| 288 | + with open(path_text_before, 'w', encoding='utf8') as f: |
| 289 | + f.write(text_before) |
| 290 | + with open(path_text_after, 'w', encoding='utf8') as f: |
| 291 | + f.write(text_after) |
| 292 | + |
| 293 | + # Can't write text to stdout on Windows because of encoding errors. |
| 294 | + if platform.system() != 'Windows': |
| 295 | + print(f'text:\n{textwrap.indent(text, " ")}') |
| 296 | + print(f'text_before:\n{textwrap.indent(text_before, " ")}') |
| 297 | + print(f'text_after:\n{textwrap.indent(text_after, " ")}') |
| 298 | + print(f'{path_text=}') |
| 299 | + print(f'{path_text_before=}') |
| 300 | + print(f'{path_text_after=}') |
| 301 | + |
| 302 | + command = f'diff -u {path_text} {path_text_before}' |
| 303 | + print(f'Running: {command}', flush=1) |
| 304 | + subprocess.run(command, shell=1) |
| 305 | + |
| 306 | + command = f'diff -u {path_text} {path_text_after}' |
| 307 | + print(f'Running: {command}', flush=1) |
| 308 | + subprocess.run(command, shell=1) |
| 309 | + |
| 310 | + assert text_before == text |
| 311 | + assert rms_before == 0 |
| 312 | + |
| 313 | + # As of 2025-05-20 there are some differences in some characters, e.g. |
| 314 | + # the non-ascii characters in `Philipp Krahenbuhl`. |
| 315 | + # See <path_pixmap> and <path_pixmap_after>. |
| 316 | + assert rms_after < rms_after_max |
| 317 | + |
| 318 | + # Avoid test failure caused by mupdf warnings. |
| 319 | + wt = pymupdf.TOOLS.mupdf_warnings() |
| 320 | + print(f'{wt=}') |
| 321 | + assert wt == 'bogus font ascent/descent values (0 / 0)\n... repeated 5 times...' |
0 commit comments