Skip to content

Commit 9df7208

Browse files
authored
fix(markdown): fix single-row table serialization (#385)
Signed-off-by: Panos Vagenas <[email protected]>
1 parent cf73d25 commit 9df7208

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

docling_core/transforms/serializer/markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def serialize(
332332
]
333333
for row in item.data.grid
334334
]
335-
if len(rows) > 1 and len(rows[0]) > 0:
335+
if len(rows) > 0:
336336
try:
337337
table_text = tabulate(rows[1:], headers=rows[0], tablefmt="github")
338338
except ValueError:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
| foo | bar |
2+
|-------|-------|

test/test_serialization.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@
2525
)
2626
from docling_core.transforms.visualizer.layout_visualizer import LayoutVisualizer
2727
from docling_core.types.doc.base import ImageRefMode
28-
from docling_core.types.doc.document import DoclingDocument, MiscAnnotation, TableItem
28+
from docling_core.types.doc.document import (
29+
DoclingDocument,
30+
MiscAnnotation,
31+
TableCell,
32+
TableData,
33+
TableItem,
34+
)
2935
from docling_core.types.doc.labels import DocItemLabel
3036

3137
from .test_data_gen_flag import GEN_TEST_DATA
@@ -317,6 +323,29 @@ def test_md_rich_table():
317323
verify(exp_file=exp_file, actual=actual)
318324

319325

326+
def test_md_single_row_table():
327+
exp_file = Path("./test/data/doc/single_row_table.gt.md")
328+
words = ["foo", "bar"]
329+
doc = DoclingDocument(name="")
330+
row_idx = 0
331+
table = doc.add_table(data=TableData(num_rows=1, num_cols=len(words)))
332+
for col_idx, word in enumerate(words):
333+
doc.add_table_cell(
334+
table_item=table,
335+
cell=TableCell(
336+
start_row_offset_idx=row_idx,
337+
end_row_offset_idx=row_idx + 1,
338+
start_col_offset_idx=col_idx,
339+
end_col_offset_idx=col_idx + 1,
340+
text=word,
341+
),
342+
)
343+
344+
ser = MarkdownDocSerializer(doc=doc)
345+
actual = ser.serialize().text
346+
verify(exp_file=exp_file, actual=actual)
347+
348+
320349
# ===============================
321350
# HTML tests
322351
# ===============================

0 commit comments

Comments
 (0)