11"""Define classes for reading order visualization."""
22
33from copy import deepcopy
4- from typing import Optional
4+ from typing import Optional , Union
55
6- from PIL import ImageDraw
6+ from PIL import ImageDraw , ImageFont
77from PIL .Image import Image
8+ from PIL .ImageFont import FreeTypeFont
89from pydantic import BaseModel
910from typing_extensions import override
1011
1112from docling_core .transforms .visualizer .base import BaseVisualizer
1213from docling_core .types .doc .document import ContentLayer , DocItem , DoclingDocument
1314
1415
16+ class _NumberDrawingData (BaseModel ):
17+ xy : tuple [float , float ]
18+ text : str
19+
20+
1521class ReadingOrderVisualizer (BaseVisualizer ):
1622 """Reading order visualizer."""
1723
1824 class Params (BaseModel ):
1925 """Layout visualization parameters."""
2026
2127 show_label : bool = True
28+ show_branch_numbering : bool = False
2229 content_layers : set [ContentLayer ] = {
2330 cl for cl in ContentLayer if cl != ContentLayer .BACKGROUND
2431 }
@@ -76,10 +83,17 @@ def _draw_doc_reading_order(
7683 images : Optional [dict [Optional [int ], Image ]] = None ,
7784 ):
7885 """Draw the reading order."""
79- # draw = ImageDraw.Draw(image)
86+ font : Union [ImageFont .ImageFont , FreeTypeFont ]
87+ try :
88+ font = ImageFont .truetype ("arial.ttf" , 12 )
89+ except OSError :
90+ # Fallback to default font if arial is not available
91+ font = ImageFont .load_default ()
8092 x0 , y0 = None , None
93+ number_data_to_draw : dict [Optional [int ], list [_NumberDrawingData ]] = {}
8194 my_images : dict [Optional [int ], Image ] = images or {}
8295 prev_page = None
96+ i = 0
8397 for elem , _ in doc .iterate_items (
8498 included_content_layers = self .params .content_layers ,
8599 ):
@@ -92,7 +106,10 @@ def _draw_doc_reading_order(
92106 page_no = prov .page_no
93107 image = my_images .get (page_no )
94108
95- if image is None or prev_page is None or page_no > prev_page :
109+ if page_no not in number_data_to_draw :
110+ number_data_to_draw [page_no ] = []
111+
112+ if image is None or prev_page is None or page_no != prev_page :
96113 # new page begins
97114 prev_page = page_no
98115 x0 = y0 = None
@@ -109,7 +126,7 @@ def _draw_doc_reading_order(
109126 else :
110127 image = deepcopy (pil_img )
111128 my_images [page_no ] = image
112- draw = ImageDraw .Draw (image )
129+ draw = ImageDraw .Draw (image , "RGBA" )
113130
114131 tlo_bbox = prov .bbox .to_top_left_origin (
115132 page_height = doc .pages [prov .page_no ].size .height
@@ -124,9 +141,20 @@ def _draw_doc_reading_order(
124141 ro_bbox .b , ro_bbox .t = ro_bbox .t , ro_bbox .b
125142
126143 if x0 is None and y0 is None :
144+ # is_root= True
127145 x0 = (ro_bbox .l + ro_bbox .r ) / 2.0
128146 y0 = (ro_bbox .b + ro_bbox .t ) / 2.0
147+
148+ number_data_to_draw [page_no ].append (
149+ _NumberDrawingData (
150+ xy = (x0 , y0 ),
151+ text = f"{ i } " ,
152+ )
153+ )
154+ i += 1
155+
129156 else :
157+ # is_root = False
130158 assert x0 is not None
131159 assert y0 is not None
132160
@@ -139,7 +167,40 @@ def _draw_doc_reading_order(
139167 line_width = 2 ,
140168 color = "red" ,
141169 )
170+
142171 x0 , y0 = x1 , y1
172+
173+ if self .params .show_branch_numbering :
174+ # post-drawing the numbers to ensure they are rendered on top-layer
175+ for page in number_data_to_draw :
176+ if (image := my_images .get (page )) is None :
177+ continue
178+ draw = ImageDraw .Draw (image , "RGBA" )
179+
180+ for num_item in number_data_to_draw [page ]:
181+
182+ text_bbox = draw .textbbox (num_item .xy , num_item .text , font )
183+ text_bg_padding = 5
184+ draw .ellipse (
185+ [
186+ (
187+ text_bbox [0 ] - text_bg_padding ,
188+ text_bbox [1 ] - text_bg_padding ,
189+ ),
190+ (
191+ text_bbox [2 ] + text_bg_padding ,
192+ text_bbox [3 ] + text_bg_padding ,
193+ ),
194+ ],
195+ fill = "orange" ,
196+ )
197+ draw .text (
198+ num_item .xy ,
199+ text = num_item .text ,
200+ fill = "black" ,
201+ font = font ,
202+ )
203+
143204 return my_images
144205
145206 @override
0 commit comments