Skip to content

Commit 337ff74

Browse files
maxmnemonicMaksym Lysak
andauthored
fix: DocTags support of furniture (#209)
* During doctags conversion process page headers and footers as furniture Signed-off-by: Maksym Lysak <[email protected]> * cleaner code Signed-off-by: Maksym Lysak <[email protected]> --------- Signed-off-by: Maksym Lysak <[email protected]> Co-authored-by: Maksym Lysak <[email protected]>
1 parent 2371c11 commit 337ff74

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

docling_core/types/doc/document.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3494,18 +3494,25 @@ def parse_key_value_item(
34943494
else:
34953495
# For everything else, treat as text
34963496
text_content = extract_inner_text(full_chunk)
3497+
element_prov = (
3498+
ProvenanceItem(
3499+
bbox=bbox.resize_by_scale(pg_width, pg_height),
3500+
charspan=(0, len(text_content)),
3501+
page_no=page_no,
3502+
)
3503+
if bbox
3504+
else None
3505+
)
3506+
3507+
content_layer = ContentLayer.BODY
3508+
if tag_name in [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER]:
3509+
content_layer = ContentLayer.FURNITURE
3510+
34973511
self.add_text(
34983512
label=doc_label,
34993513
text=text_content,
3500-
prov=(
3501-
ProvenanceItem(
3502-
bbox=bbox.resize_by_scale(pg_width, pg_height),
3503-
charspan=(0, len(text_content)),
3504-
page_no=page_no,
3505-
)
3506-
if bbox
3507-
else None
3508-
),
3514+
prov=element_prov,
3515+
content_layer=content_layer,
35093516
)
35103517
return self
35113518

0 commit comments

Comments
 (0)