Skip to content

Commit 04b0a38

Browse files
authored
ROB: Resolve UnboundLocalError for xobjs in _get_image (#3684)
The xobjs variable was used outside the try-except block that defined it. If a KeyError was caught and the id started/ended with '~' (inline images), the code would continue but xobjs would remain undefined, causing UnboundLocalError when trying to access non-inline images. Initialize xobjs to None and check before using it. --------- Co-authored-by: Yuki9814 <Yuki9814@users.noreply.github.com> Co-authored-by: Yuki9814 <222397878+Yuki9814@users.noreply.github.com>
1 parent 0e5157c commit 04b0a38

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

pypdf/_page.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -637,21 +637,25 @@ def _get_image(
637637
id = list(id)
638638
if isinstance(id, list) and len(id) == 1:
639639
id = id[0]
640+
xobjs: Optional[DictionaryObject] = None
640641
try:
641642
xobjs = cast(
642643
DictionaryObject, cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT]
643644
)
644-
except KeyError:
645+
except KeyError as exc:
645646
if not (id[0] == "~" and id[-1] == "~"):
646-
raise
647+
raise KeyError(
648+
f"Cannot access image object {id} without XObject resources"
649+
) from exc
647650
if isinstance(id, str):
648651
if id[0] == "~" and id[-1] == "~":
649652
if self.inline_images is None:
650653
self.inline_images = self._get_inline_images()
651-
if self.inline_images is None: # pragma: no cover
654+
if self.inline_images is None:
652655
raise KeyError("No inline image can be found")
653656
return self.inline_images[id]
654657

658+
assert xobjs is not None
655659
from .generic._image_xobject import _xobj_to_image # noqa: PLC0415
656660
imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
657661
extension, byte_stream = imgd[:2]
@@ -662,6 +666,7 @@ def _get_image(
662666
indirect_reference=xobjs[id].indirect_reference,
663667
)
664668
# in a subobject
669+
assert xobjs is not None
665670
ids = id[1:]
666671
return self._get_image(ids, cast(DictionaryObject, xobjs[id[0]]))
667672

tests/test_images.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,34 @@ def test_image_extraction(src, page_index, image_key, expected):
227227
assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99
228228

229229

230+
def test_get_inline_image_without_xobject_resources():
231+
page = PageObject(None, None)
232+
inline_image = object()
233+
234+
with mock.patch.object(page, "_get_inline_images", return_value={"~0~": inline_image}):
235+
assert page._get_image("~0~") is inline_image
236+
237+
238+
def test_get_inline_image_without_xobject_resources_raises_when_missing():
239+
page = PageObject(None, None)
240+
241+
with (
242+
mock.patch.object(page, "_get_inline_images", return_value=None),
243+
pytest.raises(KeyError, match="No inline image can be found"),
244+
):
245+
page._get_image("~0~")
246+
247+
248+
def test_get_xobject_image_without_xobject_resources_raises():
249+
page = PageObject(None, None)
250+
251+
with pytest.raises(
252+
KeyError,
253+
match="Cannot access image object /Im0 without XObject resources",
254+
):
255+
page._get_image("/Im0")
256+
257+
230258
@pytest.mark.enable_socket
231259
@pytest.mark.timeout(30)
232260
def test_loop_in_image_keys():

0 commit comments

Comments
 (0)