Skip to content

Commit 6116d6a

Browse files
author
Maksym Lysak
committed
Support for inline groups for the inline type of content tags: strong, p, summary.
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
1 parent 6a21c09 commit 6116d6a

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

docling/backend/html_backend.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1794,12 +1794,27 @@ def _should_create_inline_group(
17941794
) -> bool:
17951795
if len(annotated_text_list) <= 1:
17961796
return False
1797+
# In non-render mode there are no source tag ids. Still keep mixed
1798+
# inline formatting (e.g. <p>...<strong>...</strong>) as one flow.
1799+
if all(
1800+
annotated_text.source_tag_id is None
1801+
for annotated_text in annotated_text_list
1802+
):
1803+
return True
1804+
# Allow paragraph-like block containers to contribute inline segments
1805+
# when mixed with formatting tags (e.g., <p>text <strong>bold</strong>).
1806+
inline_group_container_tags = {"p", "address", "summary"}
17971807
for annotated_text in annotated_text_list:
17981808
source_tag_id = annotated_text.source_tag_id
17991809
if source_tag_id is None:
18001810
return False
18011811
tag_name = self._get_tag_name_for_docling_id(source_tag_id)
1802-
if tag_name is None or tag_name not in _INLINE_HTML_TAGS:
1812+
if tag_name is None:
1813+
return False
1814+
if (
1815+
tag_name not in _INLINE_HTML_TAGS
1816+
and tag_name not in inline_group_container_tags
1817+
):
18031818
return False
18041819
return True
18051820

0 commit comments

Comments
 (0)