Skip to content
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions docling/experimental/pipeline/threaded_layout_vlm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def build_prompt(
base_prompt = self.prompt
augmented_prompt = base_prompt

# Only augment convert to docling base prompts
if base_prompt != "Convert this page to docling.":
return base_prompt

# In this layout-aware pipeline, _internal_page is always provided
if _internal_page is None:
return base_prompt
Expand All @@ -111,6 +115,14 @@ def build_prompt(
label=cluster.label
)

# Replace TABLE by otsl for consistency with doctags
if tag_name == DocumentToken.TABLE:
tag_name = "otsl"

# Remove section level details
if tag_name == "section_header_level_1":
tag_name = "section_header"

# Convert bbox to tuple and get location tokens
bbox_tuple = cluster.bbox.as_tuple()
location_tokens = DocumentToken.get_location(
Expand All @@ -126,11 +138,9 @@ def build_prompt(
if layout_elements:
# Join elements with newlines and wrap in layout tags
layout_xml = (
"<layout>" + "\n".join(layout_elements) + "</layout>"
"<layout>\n" + "\n".join(layout_elements) + "</layout>"
)
layout_injection = f"{layout_xml}"

augmented_prompt = base_prompt + layout_injection
augmented_prompt += f"\n{layout_xml}"

_log.debug(
"Enhanced Prompt with Layout Info: %s\n", augmented_prompt
Expand Down