1717from docling .backend .docling_parse_v4_backend import DoclingParseV4DocumentBackend
1818from docling .datamodel .base_models import InputFormat , Page
1919from docling .datamodel .document import InputDocument
20- from docling_core .types .doc .base import BoundingBox , Size
20+ from docling_core .types .doc .base import BoundingBox , CoordOrigin , Size
2121from docling_core .types .doc .document import (
2222 DoclingDocument ,
2323 GraphData ,
@@ -177,6 +177,20 @@ def yield_cells_from_html_table(
177177 text_cell = text_cells [text_cell_id ]
178178 text = "" .join (text_cell ["tokens" ])
179179
180+ bbox = None
181+ if (
182+ text_cells is not None
183+ and text_cell_id < len (text_cells )
184+ and "bbox" in text_cells [text_cell_id ]
185+ ):
186+ bbox = BoundingBox (
187+ l = text_cells [text_cell_id ]["bbox" ][0 ],
188+ b = text_cells [text_cell_id ]["bbox" ][1 ],
189+ r = text_cells [text_cell_id ]["bbox" ][2 ],
190+ t = text_cells [text_cell_id ]["bbox" ][3 ],
191+ coord_origin = CoordOrigin .BOTTOMLEFT ,
192+ )
193+
180194 rowspan = int (cell .get ("rowspan" , 1 ))
181195 colspan = int (cell .get ("colspan" , 1 ))
182196
@@ -186,7 +200,7 @@ def yield_cells_from_html_table(
186200 grid [row_idx + r ][col_idx + c ] = text
187201
188202 # print(f"Row: {row_idx + 1}, Col: {col_idx + 1}, Text: {text}")
189- yield row_idx , col_idx , rowspan , colspan , text
203+ yield row_idx , col_idx , rowspan , colspan , text , bbox
190204
191205 col_idx += colspan # Move to next column after colspan
192206
@@ -202,9 +216,14 @@ def convert_html_table_into_docling_tabledata(
202216 cells = []
203217
204218 try :
205- for row_idx , col_idx , rowspan , colspan , text in yield_cells_from_html_table (
206- table_html = table_html , text_cells = text_cells
207- ):
219+ for (
220+ row_idx ,
221+ col_idx ,
222+ rowspan ,
223+ colspan ,
224+ text ,
225+ bbox ,
226+ ) in yield_cells_from_html_table (table_html = table_html , text_cells = text_cells ):
208227 cell = TableCell (
209228 row_span = rowspan ,
210229 col_span = colspan ,
@@ -213,6 +232,7 @@ def convert_html_table_into_docling_tabledata(
213232 start_col_offset_idx = col_idx ,
214233 end_col_offset_idx = col_idx + colspan ,
215234 text = text ,
235+ bbox = bbox ,
216236 )
217237 cells .append (cell )
218238
0 commit comments