Skip to content

Commit 78b7962

Browse files
authored
feat(viz): add reading order branch numbering, fix cross-page lists (#334)
Signed-off-by: Panos Vagenas <[email protected]>
1 parent 2e14a74 commit 78b7962

File tree

7 files changed

+1238
-8
lines changed

7 files changed

+1238
-8
lines changed

docling_core/transforms/visualizer/layout_visualizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ def _draw_doc_layout(
163163
else:
164164
raise RuntimeError(f"Cannot visualize page-image for {page_nr}")
165165

166-
if prev_page_nr is None or page_nr > prev_page_nr: # new page begins
167-
# complete previous drawing
166+
if prev_page_nr is None or page_nr != prev_page_nr: # changing page
167+
# dump previous drawing
168168
if prev_page_nr is not None and prev_image and clusters:
169169
self._draw_clusters(
170170
image=prev_image,

docling_core/transforms/visualizer/reading_order_visualizer.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
11
"""Define classes for reading order visualization."""
22

33
from copy import deepcopy
4-
from typing import Optional
4+
from typing import Optional, Union
55

6-
from PIL import ImageDraw
6+
from PIL import ImageDraw, ImageFont
77
from PIL.Image import Image
8+
from PIL.ImageFont import FreeTypeFont
89
from pydantic import BaseModel
910
from typing_extensions import override
1011

1112
from docling_core.transforms.visualizer.base import BaseVisualizer
1213
from docling_core.types.doc.document import ContentLayer, DocItem, DoclingDocument
1314

1415

16+
class _NumberDrawingData(BaseModel):
17+
xy: tuple[float, float]
18+
text: str
19+
20+
1521
class ReadingOrderVisualizer(BaseVisualizer):
1622
"""Reading order visualizer."""
1723

1824
class Params(BaseModel):
1925
"""Layout visualization parameters."""
2026

2127
show_label: bool = True
28+
show_branch_numbering: bool = False
2229
content_layers: set[ContentLayer] = {
2330
cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
2431
}
@@ -76,10 +83,17 @@ def _draw_doc_reading_order(
7683
images: Optional[dict[Optional[int], Image]] = None,
7784
):
7885
"""Draw the reading order."""
79-
# draw = ImageDraw.Draw(image)
86+
font: Union[ImageFont.ImageFont, FreeTypeFont]
87+
try:
88+
font = ImageFont.truetype("arial.ttf", 12)
89+
except OSError:
90+
# Fallback to default font if arial is not available
91+
font = ImageFont.load_default()
8092
x0, y0 = None, None
93+
number_data_to_draw: dict[Optional[int], list[_NumberDrawingData]] = {}
8194
my_images: dict[Optional[int], Image] = images or {}
8295
prev_page = None
96+
i = 0
8397
for elem, _ in doc.iterate_items(
8498
included_content_layers=self.params.content_layers,
8599
):
@@ -92,7 +106,10 @@ def _draw_doc_reading_order(
92106
page_no = prov.page_no
93107
image = my_images.get(page_no)
94108

95-
if image is None or prev_page is None or page_no > prev_page:
109+
if page_no not in number_data_to_draw:
110+
number_data_to_draw[page_no] = []
111+
112+
if image is None or prev_page is None or page_no != prev_page:
96113
# new page begins
97114
prev_page = page_no
98115
x0 = y0 = None
@@ -109,7 +126,7 @@ def _draw_doc_reading_order(
109126
else:
110127
image = deepcopy(pil_img)
111128
my_images[page_no] = image
112-
draw = ImageDraw.Draw(image)
129+
draw = ImageDraw.Draw(image, "RGBA")
113130

114131
tlo_bbox = prov.bbox.to_top_left_origin(
115132
page_height=doc.pages[prov.page_no].size.height
@@ -124,9 +141,20 @@ def _draw_doc_reading_order(
124141
ro_bbox.b, ro_bbox.t = ro_bbox.t, ro_bbox.b
125142

126143
if x0 is None and y0 is None:
144+
# is_root= True
127145
x0 = (ro_bbox.l + ro_bbox.r) / 2.0
128146
y0 = (ro_bbox.b + ro_bbox.t) / 2.0
147+
148+
number_data_to_draw[page_no].append(
149+
_NumberDrawingData(
150+
xy=(x0, y0),
151+
text=f"{i}",
152+
)
153+
)
154+
i += 1
155+
129156
else:
157+
# is_root = False
130158
assert x0 is not None
131159
assert y0 is not None
132160

@@ -139,7 +167,40 @@ def _draw_doc_reading_order(
139167
line_width=2,
140168
color="red",
141169
)
170+
142171
x0, y0 = x1, y1
172+
173+
if self.params.show_branch_numbering:
174+
# post-drawing the numbers to ensure they are rendered on top-layer
175+
for page in number_data_to_draw:
176+
if (image := my_images.get(page)) is None:
177+
continue
178+
draw = ImageDraw.Draw(image, "RGBA")
179+
180+
for num_item in number_data_to_draw[page]:
181+
182+
text_bbox = draw.textbbox(num_item.xy, num_item.text, font)
183+
text_bg_padding = 5
184+
draw.ellipse(
185+
[
186+
(
187+
text_bbox[0] - text_bg_padding,
188+
text_bbox[1] - text_bg_padding,
189+
),
190+
(
191+
text_bbox[2] + text_bg_padding,
192+
text_bbox[3] + text_bg_padding,
193+
),
194+
],
195+
fill="orange",
196+
)
197+
draw.text(
198+
num_item.xy,
199+
text=num_item.text,
200+
fill="black",
201+
font=font,
202+
)
203+
143204
return my_images
144205

145206
@override

docling_core/types/doc/document.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4353,7 +4353,9 @@ def add_page(
43534353
return pitem
43544354

43554355
def get_visualization(
4356-
self, show_label: bool = True
4356+
self,
4357+
show_label: bool = True,
4358+
show_branch_numbering: bool = False,
43574359
) -> dict[Optional[int], PILImage.Image]:
43584360
"""Get visualization of the document as images by page."""
43594361
from docling_core.transforms.visualizer.layout_visualizer import (
@@ -4369,6 +4371,9 @@ def get_visualization(
43694371
show_label=show_label,
43704372
),
43714373
),
4374+
params=ReadingOrderVisualizer.Params(
4375+
show_branch_numbering=show_branch_numbering,
4376+
),
43724377
)
43734378
images = visualizer.get_visualization(doc=self)
43744379

0 commit comments

Comments
 (0)