@@ -468,6 +468,40 @@ def test_calrgb():
468468 reader .pages [0 ].images [0 ]
469469
470470
471+ @pytest .mark .enable_socket ()
472+ def test_index_lookup ():
473+ """The lookup is provided as an str and bytes"""
474+ url = "https://github.com/py-pdf/pypdf/files/12090523/2023.USDC_Circle.Examination.Report.May.2023.pdf"
475+ name = "2023USDC.pdf"
476+ reader = PdfReader (BytesIO (get_pdf_from_url (url , name = name )))
477+ # TextStringObject Lookup
478+ url_png = "https://github.com/py-pdf/pypdf/files/12144094/im1.png.txt"
479+ name_png = "iss1982_im1.png"
480+ refimg = Image .open (
481+ BytesIO (get_pdf_from_url (url_png , name = name_png ))
482+ ) # not a pdf but it works
483+ data = reader .pages [0 ].images [- 1 ]
484+ assert data .image .mode == "RGB"
485+ diff = ImageChops .difference (data .image , refimg )
486+ d = sqrt (sum ([(a * a + b * b + c * c ) for a , b , c in diff .getdata ()])) / (
487+ diff .size [0 ] * diff .size [1 ]
488+ )
489+ assert d < 0.001
490+ # ByteStringObject Lookup
491+ url_png = "https://github.com/py-pdf/pypdf/files/12144093/im2.png.txt"
492+ name_png = "iss1982_im2.png"
493+ refimg = Image .open (
494+ BytesIO (get_pdf_from_url (url_png , name = name_png ))
495+ ) # not a pdf but it works
496+ data = reader .pages [- 1 ].images [- 1 ]
497+ assert data .image .mode == "RGB"
498+ diff = ImageChops .difference (data .image , refimg )
499+ d = sqrt (sum ([(a * a + b * b + c * c ) for a , b , c in diff .getdata ()])) / (
500+ diff .size [0 ] * diff .size [1 ]
501+ )
502+ assert d < 0.001
503+
504+
471505@pytest .mark .enable_socket ()
472506def test_2bits_image ():
473507 """From #1954, test with 2bits image. TODO: 4bits also"""
0 commit comments