Skip to content

Commit 890c93a

Browse files
authored
BUG: Process lookup decoded as TextStringObjects (#2008)
Closes #1982
1 parent 74f8175 commit 890c93a

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

pypdf/filters.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,11 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
764764
data = bits2byte(data, size, 4)
765765
img = Image.frombytes(mode, size, data)
766766
if color_space == "/Indexed":
767-
from .generic import ByteStringObject
767+
from .generic import TextStringObject
768768

769-
if isinstance(lookup, ByteStringObject):
769+
if isinstance(lookup, TextStringObject):
770+
lookup = lookup.original_bytes
771+
if isinstance(lookup, bytes):
770772
try:
771773
nb, conv, mode = { # type: ignore
772774
"1": (0, "", ""),

tests/test_filters.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,40 @@ def test_calrgb():
468468
reader.pages[0].images[0]
469469

470470

471+
@pytest.mark.enable_socket()
472+
def test_index_lookup():
473+
"""The lookup is provided as an str and bytes"""
474+
url = "https://github.com/py-pdf/pypdf/files/12090523/2023.USDC_Circle.Examination.Report.May.2023.pdf"
475+
name = "2023USDC.pdf"
476+
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
477+
# TextStringObject Lookup
478+
url_png = "https://github.com/py-pdf/pypdf/files/12144094/im1.png.txt"
479+
name_png = "iss1982_im1.png"
480+
refimg = Image.open(
481+
BytesIO(get_pdf_from_url(url_png, name=name_png))
482+
) # not a pdf but it works
483+
data = reader.pages[0].images[-1]
484+
assert data.image.mode == "RGB"
485+
diff = ImageChops.difference(data.image, refimg)
486+
d = sqrt(sum([(a * a + b * b + c * c) for a, b, c in diff.getdata()])) / (
487+
diff.size[0] * diff.size[1]
488+
)
489+
assert d < 0.001
490+
# ByteStringObject Lookup
491+
url_png = "https://github.com/py-pdf/pypdf/files/12144093/im2.png.txt"
492+
name_png = "iss1982_im2.png"
493+
refimg = Image.open(
494+
BytesIO(get_pdf_from_url(url_png, name=name_png))
495+
) # not a pdf but it works
496+
data = reader.pages[-1].images[-1]
497+
assert data.image.mode == "RGB"
498+
diff = ImageChops.difference(data.image, refimg)
499+
d = sqrt(sum([(a * a + b * b + c * c) for a, b, c in diff.getdata()])) / (
500+
diff.size[0] * diff.size[1]
501+
)
502+
assert d < 0.001
503+
504+
471505
@pytest.mark.enable_socket()
472506
def test_2bits_image():
473507
"""From #1954, test with 2bits image. TODO: 4bits also"""

0 commit comments

Comments
 (0)