Skip to content

Commit a947440

Browse files
fix: visualization of document pages without items (#271)
* fix: visualization of document pages without items Signed-off-by: Peter Staar <[email protected]> * reformatted the code Signed-off-by: Peter Staar <[email protected]> * fixed MyPy Signed-off-by: Peter Staar <[email protected]> * fixed MyPy (2) Signed-off-by: Peter Staar <[email protected]> * fixed MyPy (3) Signed-off-by: Peter Staar <[email protected]> * fixed flake Signed-off-by: Peter Staar <[email protected]> * fixed flake (2) Signed-off-by: Peter Staar <[email protected]> --------- Signed-off-by: Peter Staar <[email protected]>
1 parent d9709d0 commit a947440

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

docling_core/transforms/visualizer/layout_visualizer.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,21 @@ def _draw_doc_layout(
123123
):
124124
"""Draw the document clusters and optionaly the reading order."""
125125
clusters = []
126-
my_images = images or {}
126+
my_images: dict[Optional[int], Image] = {}
127+
128+
if images is not None:
129+
my_images = images
130+
131+
# Initialise `my_images` beforehand: sometimes, you have the
132+
# page-images but no DocItems!
133+
for page_nr, page in doc.pages.items():
134+
page_image = doc.pages[page_nr].image
135+
if page_image is None or (pil_img := page_image.pil_image) is None:
136+
raise RuntimeError("Cannot visualize document without images")
137+
elif page_nr not in my_images:
138+
image = deepcopy(pil_img)
139+
my_images[page_nr] = image
140+
127141
prev_image = None
128142
prev_page_nr = None
129143
for idx, (elem, _) in enumerate(
@@ -137,7 +151,11 @@ def _draw_doc_layout(
137151
continue # Skip elements without provenances
138152
prov = elem.prov[0]
139153
page_nr = prov.page_no
140-
image = my_images.get(page_nr)
154+
155+
if page_nr in my_images:
156+
image = my_images[page_nr]
157+
else:
158+
raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
141159

142160
if prev_page_nr is None or page_nr > prev_page_nr: # new page begins
143161
# complete previous drawing
@@ -150,13 +168,6 @@ def _draw_doc_layout(
150168
)
151169
clusters = []
152170

153-
if image is None:
154-
page_image = doc.pages[page_nr].image
155-
if page_image is None or (pil_img := page_image.pil_image) is None:
156-
raise RuntimeError("Cannot visualize document without images")
157-
else:
158-
image = deepcopy(pil_img)
159-
my_images[page_nr] = image
160171
tlo_bbox = prov.bbox.to_top_left_origin(
161172
page_height=doc.pages[prov.page_no].size.height
162173
)

0 commit comments

Comments
 (0)