88 ContentLayer ,
99 CoordOrigin ,
1010 DocItem ,
11+ DocItemLabel ,
1112 DoclingDocument ,
1213 DocumentOrigin ,
1314 GroupLabel ,
@@ -275,9 +276,10 @@ def _find_tables_in_sheet(
275276
276277 if self .workbook is not None :
277278 content_layer = self ._get_sheet_content_layer (sheet )
278- tables = self ._find_data_tables (sheet )
279+ # Returns list of (ExcelTable, optional_title_text) tuples
280+ tables_with_titles = self ._find_data_tables (sheet )
279281
280- for excel_table in tables :
282+ for excel_table , title_text in tables_with_titles :
281283 origin_col = excel_table .anchor [0 ]
282284 origin_row = excel_table .anchor [1 ]
283285 num_rows = excel_table .num_rows
@@ -303,9 +305,19 @@ def _find_tables_in_sheet(
303305 )
304306 table_data .table_cells .append (cell )
305307
308+ # Create caption if a title was found
309+ caption = None
310+ if title_text :
311+ caption = doc .add_text (
312+ text = title_text ,
313+ parent = self .parents [0 ],
314+ label = DocItemLabel .CAPTION ,
315+ )
316+
306317 page_no = self .workbook .index (sheet ) + 1
307318 doc .add_table (
308319 data = table_data ,
320+ caption = caption ,
309321 parent = self .parents [0 ],
310322 prov = ProvenanceItem (
311323 page_no = page_no ,
@@ -367,14 +379,18 @@ def _find_true_data_bounds(self, sheet: Worksheet) -> DataRegion:
367379
368380 return DataRegion (min_row , max_row , min_col , max_col )
369381
370- def _find_data_tables (self , sheet : Worksheet ) -> list [ExcelTable ]:
382+ def _find_data_tables (self , sheet : Worksheet ) -> list [tuple [ ExcelTable , Optional [ str ]] ]:
371383 """Find all compact rectangular data tables in an Excel worksheet.
372384
385+ Detects 1x1 tables positioned above larger tables (with at least one empty row
386+ between them) as titles, and associates them as captions.
387+
373388 Args:
374389 sheet: The Excel worksheet to be parsed.
375390
376391 Returns:
377- A list of ExcelTable objects representing the data tables.
392+ A list of tuples (ExcelTable, title_text) where title_text is None
393+ if no title was found, or a string if a 1x1 table above serves as title.
378394 """
379395 bounds : DataRegion = self ._find_true_data_bounds (
380396 sheet
@@ -405,7 +421,30 @@ def _find_data_tables(self, sheet: Worksheet) -> list[ExcelTable]:
405421 visited .update (visited_cells ) # Mark these cells as visited
406422 tables .append (table_bounds )
407423
408- return tables
424+ # detect titles (1x1 tables above larger tables)
425+ tables_with_titles : list [tuple [ExcelTable , str | None ]] = []
426+ skip_next = False
427+
428+ for i , table in enumerate (tables ):
429+ if skip_next :
430+ skip_next = False
431+ continue
432+
433+ # Check if this is a 1x1 table that could be a title
434+ if table .num_rows == 1 and table .num_cols == 1 and i + 1 < len (tables ):
435+ next_table = tables [i + 1 ]
436+ title_end_row = table .anchor [1 ] + table .num_rows
437+ next_table_start_row = next_table .anchor [1 ]
438+
439+ if next_table_start_row > title_end_row :
440+ title_text = table .data [0 ].text if table .data else ""
441+ tables_with_titles .append ((next_table , title_text ))
442+ skip_next = True
443+ continue
444+
445+ tables_with_titles .append ((table , None ))
446+
447+ return tables_with_titles
409448
410449 def _find_table_bounds (
411450 self ,
0 commit comments