Skip to content

Commit a66b0bb

Browse files
authored
fix: add REFERENCE to exported labels and remove CAPTION (#106)
Signed-off-by: Michele Dolfi <[email protected]>
1 parent 4f653c2 commit a66b0bb

File tree

1 file changed

+1
-17
lines changed

1 file changed

+1
-17
lines changed

docling_core/types/doc/document.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
DocItemLabel.DOCUMENT_INDEX,
5050
DocItemLabel.SECTION_HEADER,
5151
DocItemLabel.PARAGRAPH,
52-
DocItemLabel.CAPTION,
5352
DocItemLabel.TABLE,
5453
DocItemLabel.PICTURE,
5554
DocItemLabel.FORMULA,
@@ -58,6 +57,7 @@
5857
DocItemLabel.TEXT,
5958
DocItemLabel.LIST_ITEM,
6059
DocItemLabel.CODE,
60+
DocItemLabel.REFERENCE,
6161
}
6262

6363

@@ -2055,10 +2055,6 @@ def export_to_markdown( # noqa: C901
20552055
text = f"```\n{item.text}\n```\n"
20562056
mdtexts.append(text)
20572057

2058-
elif isinstance(item, TextItem) and item.label in [DocItemLabel.CAPTION]:
2059-
# captions are printed in picture and table ... skipping for now
2060-
continue
2061-
20622058
elif isinstance(item, ListItem) and item.label in [DocItemLabel.LIST_ITEM]:
20632059
in_list = True
20642060
# Calculate indent based on list_nesting_level
@@ -2350,10 +2346,6 @@ def close_lists(
23502346
text = f"<pre>{item.text}</pre>"
23512347
html_texts.append(text)
23522348

2353-
elif isinstance(item, TextItem) and item.label in [DocItemLabel.CAPTION]:
2354-
# captions are printed in picture and table ... skipping for now
2355-
continue
2356-
23572349
elif isinstance(item, ListItem):
23582350

23592351
text = f"<li>{item.text}</li>"
@@ -2555,10 +2547,6 @@ def close_lists(
25552547
result += f"<unordered_list>{delim}"
25562548
in_ordered_list.append(False)
25572549

2558-
elif isinstance(item, TextItem) and item.label in [DocItemLabel.CAPTION]:
2559-
# captions are printed in picture and table ... skipping for now
2560-
continue
2561-
25622550
elif isinstance(item, SectionHeaderItem):
25632551

25642552
result += item.export_to_document_tokens(
@@ -2664,10 +2652,6 @@ def get_text(text: str, max_text_len: int):
26642652
indent * level + f"item-{i} at level {level}: {item.label}: {text}"
26652653
)
26662654

2667-
elif isinstance(item, TextItem) and item.label in [DocItemLabel.CAPTION]:
2668-
# captions are printed in picture and table ... skipping for now
2669-
continue
2670-
26712655
elif isinstance(item, ListItem) and item.label in [DocItemLabel.LIST_ITEM]:
26722656
text = get_text(text=item.text, max_text_len=max_text_len)
26732657

0 commit comments

Comments
 (0)