88 ContentLayer ,
99 CoordOrigin ,
1010 DocItem ,
11+ DocItemLabel ,
1112 DoclingDocument ,
1213 DocumentOrigin ,
1314 GroupLabel ,
@@ -275,9 +276,10 @@ def _find_tables_in_sheet(
275276
276277 if self .workbook is not None :
277278 content_layer = self ._get_sheet_content_layer (sheet )
278- tables = self ._find_data_tables (sheet )
279+ # Returns list of (ExcelTable, optional_title_text) tuples
280+ tables_with_titles = self ._find_data_tables (sheet )
279281
280- for excel_table in tables :
282+ for excel_table , title_text in tables_with_titles :
281283 origin_col = excel_table .anchor [0 ]
282284 origin_row = excel_table .anchor [1 ]
283285 num_rows = excel_table .num_rows
@@ -303,9 +305,19 @@ def _find_tables_in_sheet(
303305 )
304306 table_data .table_cells .append (cell )
305307
308+ # Create caption if a title was found
309+ caption = None
310+ if title_text :
311+ caption = doc .add_text (
312+ text = title_text ,
313+ parent = self .parents [0 ],
314+ label = DocItemLabel .CAPTION ,
315+ )
316+
306317 page_no = self .workbook .index (sheet ) + 1
307318 doc .add_table (
308319 data = table_data ,
320+ caption = caption ,
309321 parent = self .parents [0 ],
310322 prov = ProvenanceItem (
311323 page_no = page_no ,
@@ -367,14 +379,20 @@ def _find_true_data_bounds(self, sheet: Worksheet) -> DataRegion:
367379
368380 return DataRegion (min_row , max_row , min_col , max_col )
369381
370- def _find_data_tables (self , sheet : Worksheet ) -> list [ExcelTable ]:
382+ def _find_data_tables (
383+ self , sheet : Worksheet
384+ ) -> list [tuple [ExcelTable , Optional [str ]]]:
371385 """Find all compact rectangular data tables in an Excel worksheet.
372386
387+ Detects 1x1 tables positioned above larger tables (with at least one empty row
388+ between them) as titles, and associates them as captions.
389+
373390 Args:
374391 sheet: The Excel worksheet to be parsed.
375392
376393 Returns:
377- A list of ExcelTable objects representing the data tables.
394+ A list of tuples (ExcelTable, title_text) where title_text is None
395+ if no title was found, or a string if a 1x1 table above serves as title.
378396 """
379397 bounds : DataRegion = self ._find_true_data_bounds (
380398 sheet
@@ -405,7 +423,30 @@ def _find_data_tables(self, sheet: Worksheet) -> list[ExcelTable]:
405423 visited .update (visited_cells ) # Mark these cells as visited
406424 tables .append (table_bounds )
407425
408- return tables
426+ # detect titles (1x1 tables above larger tables)
427+ tables_with_titles : list [tuple [ExcelTable , str | None ]] = []
428+ skip_next = False
429+
430+ for i , table in enumerate (tables ):
431+ if skip_next :
432+ skip_next = False
433+ continue
434+
435+ # Check if this is a 1x1 table that could be a title
436+ if table .num_rows == 1 and table .num_cols == 1 and i + 1 < len (tables ):
437+ next_table = tables [i + 1 ]
438+ title_end_row = table .anchor [1 ] + table .num_rows
439+ next_table_start_row = next_table .anchor [1 ]
440+
441+ if next_table_start_row > title_end_row :
442+ title_text = table .data [0 ].text if table .data else ""
443+ tables_with_titles .append ((next_table , title_text ))
444+ skip_next = True
445+ continue
446+
447+ tables_with_titles .append ((table , None ))
448+
449+ return tables_with_titles
409450
410451 def _find_table_bounds (
411452 self ,
0 commit comments