@@ -850,3 +850,55 @@ def test_4546():
850850 assert text == expected_mupdf_1_26_1
851851 else :
852852 print (f'No expected output for { pymupdf .mupdf_version_tuple = } ' )
853+
854+
855+ def test_4503 ():
856+ # Check detection of strikeout text. Behaviour is improved with
857+ # mupdf>=1.26.2, but not perfect.
858+ #
859+ path = os .path .normpath (f'{ __file__ } /../../tests/resources/test_4503.pdf' )
860+ span_0 = None
861+ text_0 = None
862+ print ()
863+ print (f'{ pymupdf .mupdf_version_tuple = } ' )
864+ with pymupdf .open (path ) as document :
865+ page = document [0 ]
866+ # Specify TEXT_COLLECT_STYLES so we collect char_flags, which contains
867+ # FZ_STEXT_STRIKEOUT etc.
868+ #
869+ text = page .get_text ('rawdict' , flags = pymupdf .TEXTFLAGS_RAWDICT | pymupdf .TEXT_COLLECT_STYLES )
870+ for i , block in enumerate (text ['blocks' ]):
871+ print (f'block { i } :' )
872+ for j , line in enumerate (block ['lines' ]):
873+ print (f' line { j } :' )
874+ for k , span in enumerate (line ['spans' ]):
875+ text = ''
876+ for char in span ['chars' ]:
877+ text += char ['c' ]
878+ print (f' span { k } : { span ["flags" ]= :#x} { span ["char_flags" ]= :#x} : { text !r} ' )
879+ if 'the right to request the state to review' in text :
880+ span_0 = span
881+ text_0 = text
882+ assert span_0
883+ #print(f'{span_0=}')
884+ print (f'{ span_0 ["flags" ]= :#x} ' )
885+ print (f'{ span_0 ["char_flags" ]= :#x} ' )
886+ print (f'{ text_0 = } ' )
887+ strikeout = span_0 ['char_flags' ] & pymupdf .mupdf .FZ_STEXT_STRIKEOUT
888+ print (f'{ strikeout = } ' )
889+
890+ if pymupdf .mupdf_version_tuple >= (1 , 26 , 2 ):
891+ # 2025-06-09: This is still incorrect - the span should include the
892+ # following text 'and, if appropriate,'. It looks like following spans
893+ # are:
894+ # strikeout=0: 'and, '
895+ # strikeout=1: 'if '
896+ # strikeout=0: 'appropri'
897+ # strikeout=1: 'ate,'
898+ #
899+ assert strikeout , f'Expected bit 0 (FZ_STEXT_STRIKEOUT) to be set in { span_0 ["char_flags" ]= :#x} .'
900+ assert text_0 == 'the right to request the state to review '
901+ else :
902+ # Expecting the bug.
903+ assert not strikeout , f'Expected bit 0 (FZ_STEXT_STRIKEOUT) to be unset in { span_0 ["char_flags" ]= :#x} .'
904+ assert text_0 == 'notice the right to request the state to review and, if appropriate,'
0 commit comments