Skip to content

Commit 49b028a

Browse files
tests/test_story.py: added test for #3813.
This is addressed in mupdf master.
1 parent 29ff321 commit 49b028a

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

tests/test_story.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,68 @@ def contentfn(positions):
227227
def test_archive_creation():
228228
s = pymupdf.Story(archive=pymupdf.Archive('.'))
229229
s = pymupdf.Story(archive='.')
230+
231+
232+
def test_3813():
233+
import pymupdf
234+
235+
HTML = """
236+
<p>Count is fine:</p>
237+
<ol>
238+
<li>Lorem
239+
<ol>
240+
<li>Sub Lorem</li>
241+
<li>Sub Lorem</li>
242+
</ol>
243+
</li>
244+
<li>Lorem</li>
245+
<li>Lorem</li>
246+
</ol>
247+
248+
<p>Broken count:</p>
249+
<ol>
250+
<li>Lorem
251+
<ul>
252+
<li>Sub Lorem</li>
253+
<li>Sub Lorem</li>
254+
</ul>
255+
</li>
256+
<li>Lorem</li>
257+
<li>Lorem</li>
258+
</ol>
259+
"""
260+
MEDIABOX = pymupdf.paper_rect("A4")
261+
WHERE = MEDIABOX + (36, 36, -36, -36)
262+
263+
story = pymupdf.Story(html=HTML)
264+
path = os.path.normpath(f'{__file__}/../../tests/test_3813_out.pdf')
265+
writer = pymupdf.DocumentWriter(path)
266+
267+
more = 1
268+
269+
while more:
270+
device = writer.begin_page(MEDIABOX)
271+
more, _ = story.place(WHERE)
272+
story.draw(device)
273+
writer.end_page()
274+
275+
writer.close()
276+
277+
with pymupdf.open(path) as document:
278+
page = document[0]
279+
text = page.get_text()
280+
text_utf8 = text.encode()
281+
282+
if pymupdf.mupdf_version_tuple < (1, 25):
283+
# MuPDF gets things wrong.
284+
text_expected_utf8 = b'Count is \xef\xac\x81ne:\n1. Lorem\n1. Sub Lorem\n2. Sub Lorem\n2. Lorem\n3. Lorem\nBroken count:\n1. Lorem\n\xe2\x80\xa2 Sub Lorem\n\xe2\x80\xa2 Sub Lorem\n4. Lorem\n5. Lorem\n'
285+
else:
286+
text_expected_utf8 = b'Count is \xef\xac\x81ne:\n1. Lorem\n1. Sub Lorem\n2. Sub Lorem\n2. Lorem\n3. Lorem\nBroken count:\n1. Lorem\n\xe2\x80\xa2 Sub Lorem\n\xe2\x80\xa2 Sub Lorem\n2. Lorem\n3. Lorem\n'
287+
text_expected = text_expected_utf8.decode()
288+
289+
print(f'text_utf8:\n {text_utf8!r}')
290+
print(f'text_expected_utf8:\n {text_expected_utf8!r}')
291+
print(f'text:\n {textwrap.indent(text, " ")}')
292+
print(f'text_expected:\n {textwrap.indent(text_expected, " ")}')
293+
294+
assert text == text_expected

0 commit comments

Comments
 (0)