@@ -514,3 +514,96 @@ def test_4182():
514514 rms = gentle_compare .pixmaps_rms (path_expected , pixmap )
515515 print (f'{ rms = } ' )
516516 assert rms < 0.01
517+
518+
519+ def test_4179 ():
520+ if os .environ .get ('PYMUPDF_USE_EXTRA' ) == '0' :
521+ # Looks like Python code doesn't behave same as C++, probably because
522+ # of the code not being correct for Python's native unicode strings.
523+ #
524+ print (f'test_4179(): Not running with PYMUPDF_USE_EXTRA=0 because known to fail.' )
525+ return
526+ # We check that using TEXT_ACCURATE_BBOXES gives the correct boxes. But
527+ # this also requires that we disable PyMuPDF quad corrections.
528+ #
529+ path = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4179.pdf' )
530+
531+ # Disable anti-aliasing to avoid our drawing of multiple identical bboxes
532+ # (from normal/accurate bboxes) giving slightly different results.
533+ aa = pymupdf .mupdf .fz_aa_level ()
534+ uqc = pymupdf ._globals .skip_quad_corrections
535+ pymupdf .TOOLS .set_aa_level (0 )
536+ pymupdf .TOOLS .unset_quad_corrections (True )
537+ assert pymupdf ._globals .skip_quad_corrections
538+ try :
539+ with pymupdf .open (path ) as document :
540+ page = document [0 ]
541+
542+ char_sqrt = b'\xe2 \x88 \x9a ' .decode ()
543+
544+ # Search with defaults.
545+ bboxes_search = page .search_for (char_sqrt )
546+ assert len (bboxes_search ) == 1
547+ print (f'bboxes_search[0]:\n { bboxes_search [0 ]!r} ' )
548+ page .draw_rect (bboxes_search [0 ], color = (1 , 0 , 0 ))
549+ rms = gentle_compare .rms (bboxes_search [0 ], (250.0489959716797 , 91.93604278564453 , 258.34783935546875 , 101.34073638916016 ))
550+ assert rms < 0.01
551+
552+ # Search with TEXT_ACCURATE_BBOXES.
553+ bboxes_search_accurate = page .search_for (
554+ char_sqrt ,
555+ flags = (0
556+ | pymupdf .TEXT_DEHYPHENATE
557+ | pymupdf .TEXT_PRESERVE_WHITESPACE
558+ | pymupdf .TEXT_PRESERVE_LIGATURES
559+ | pymupdf .TEXT_MEDIABOX_CLIP
560+ | pymupdf .TEXT_ACCURATE_BBOXES
561+ ),
562+ )
563+ assert len (bboxes_search_accurate ) == 1
564+ print (f'bboxes_search_accurate[0]\n { bboxes_search_accurate [0 ]!r} ' )
565+ page .draw_rect (bboxes_search_accurate [0 ], color = (0 , 1 , 0 ))
566+ rms = gentle_compare .rms (bboxes_search_accurate [0 ], (250.0489959716797 , 99.00948333740234 , 258.34783935546875 , 108.97208404541016 ))
567+ assert rms < 0.01
568+
569+ # Iterate with TEXT_ACCURATE_BBOXES.
570+ bboxes_iterate_accurate = list ()
571+ dict_ = page .get_text (
572+ 'rawdict' ,
573+ flags = pymupdf .TEXT_ACCURATE_BBOXES ,
574+ )
575+ linelist = []
576+ for block in dict_ ['blocks' ]:
577+ if block ['type' ] == 0 :
578+ if 'lines' in block :
579+ for line in block .get ('lines' , ()):
580+ for span in line ['spans' ]:
581+ for ch in span ['chars' ]:
582+ if ch ['c' ] == char_sqrt :
583+ bbox_iterate_accurate = ch ['bbox' ]
584+ bboxes_iterate_accurate .append (bbox_iterate_accurate )
585+ print (f'bbox_iterate_accurate:\n { bbox_iterate_accurate !r} ' )
586+ page .draw_rect (bbox_iterate_accurate , color = (0 , 0 , 1 ))
587+
588+ assert bboxes_search_accurate != bboxes_search
589+ assert bboxes_iterate_accurate == bboxes_search_accurate
590+ pixmap = page .get_pixmap ()
591+
592+ path_out = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4179_out.png' )
593+ pixmap .save (path_out )
594+ path_expected = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4179_expected.png' )
595+ rms = gentle_compare .pixmaps_rms (path_expected , pixmap )
596+ pixmap_diff = gentle_compare .pixmaps_diff (path_expected , pixmap )
597+ path_out_diff = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4179_diff.png' )
598+ pixmap_diff .save (path_out_diff )
599+ print (f'Have saved to: { path_out_diff = } ' )
600+ print (f'{ rms = } ' )
601+ if pymupdf .mupdf_version_tuple < (1 , 26 ):
602+ # Prior to fix for mupdf bug 708274, our rects are rendered slightly incorrectly.
603+ assert 3.5 < rms < 4.5
604+ else :
605+ assert rms < 0.01
606+
607+ finally :
608+ pymupdf .TOOLS .set_aa_level (aa )
609+ pymupdf .TOOLS .unset_quad_corrections (uqc )
0 commit comments