Skip to content

Commit e38aa0f

Browse files
nikos-livathinoscau-gitUbuntu
authored
feat: Heron layout model as new default (#1971)
* feat: Switch default layout model to DOCLING_LAYOUT_HERON. Update the unit test data. Signed-off-by: Nikos Livathinos <[email protected]> * Use default layout model in model_downloader default args Signed-off-by: Christoph Auer <[email protected]> * Use default layout model in model_downloader default args Signed-off-by: Christoph Auer <[email protected]> * Update docling-models tag for TableFormer Signed-off-by: Christoph Auer <[email protected]> * Update test GT Signed-off-by: Christoph Auer <[email protected]> * Update test GT (from linux CPU) Signed-off-by: Ubuntu <[email protected]> * fix: Ensure that the visualisations happen on copies of the page image Signed-off-by: Nikos Livathinos <[email protected]> * chore: Pinpoint docling-ibm-models to the fix branch for the ReadingOrderPredictor Signed-off-by: Nikos Livathinos <[email protected]> * chore: Update uv.lock Signed-off-by: Nikos Livathinos <[email protected]> * chore: Update tests GT to match the Heron layout model and the improved reading order model in Linux Signed-off-by: Nikos Livathinos <[email protected]> * fix: Introduce the verify_doctags optional parameter in conversion tests to control if a doctags comparison should take place. Skip doctags comparisons for certain tests. Signed-off-by: Nikos Livathinos <[email protected]> * chore: Generate tests GT on Mac Signed-off-by: Nikos Livathinos <[email protected]> * chore: Remove the pinning of the docling-ibm-models and use the release 3.9.1 Signed-off-by: Nikos Livathinos <[email protected]> --------- Signed-off-by: Nikos Livathinos <[email protected]> Signed-off-by: Christoph Auer <[email protected]> Signed-off-by: Ubuntu <[email protected]> Co-authored-by: Christoph Auer <[email protected]> Co-authored-by: Ubuntu <[email protected]>
1 parent 293e81b commit e38aa0f

File tree

62 files changed

+45076
-68067
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+45076
-68067
lines changed

docling/datamodel/pipeline_options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,10 @@ class LayoutOptions(BaseModel):
283283
keep_empty_clusters: bool = (
284284
False # Whether to keep clusters that contain no text cells
285285
)
286+
model_spec: LayoutModelConfig = DOCLING_LAYOUT_HERON
286287
skip_cell_assignment: bool = (
287288
False # Skip cell-to-cluster assignment for VLM-only processing
288289
)
289-
model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
290290

291291

292292
class AsrPipelineOptions(PipelineOptions):

docling/models/layout_model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def download_models(
9191
local_dir: Optional[Path] = None,
9292
force: bool = False,
9393
progress: bool = False,
94-
layout_model_config: LayoutModelConfig = DOCLING_LAYOUT_V2,
94+
layout_model_config: LayoutModelConfig = LayoutOptions().model_spec, # use default
9595
) -> Path:
9696
return download_hf_model(
9797
repo_id=layout_model_config.repo_id,
@@ -122,8 +122,8 @@ def draw_clusters_and_cells_side_by_side(
122122
left_clusters = [c for c in clusters if c.label not in exclude_labels]
123123
right_clusters = [c for c in clusters if c.label in exclude_labels]
124124
# Create a deep copy of the original image for both sides
125-
left_image = copy.deepcopy(page.image)
126-
right_image = copy.deepcopy(page.image)
125+
left_image = page.image.copy()
126+
right_image = page.image.copy()
127127

128128
# Draw clusters on both images
129129
draw_clusters(left_image, left_clusters, scale_x, scale_y)

docling/models/page_preprocessing_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
9090

9191
# DEBUG code:
9292
def draw_text_boxes(image, cells, show: bool = False):
93-
draw = ImageDraw.Draw(image)
93+
draw = ImageDraw.Draw(image.copy())
9494
for c in cells:
9595
x0, y0, x1, y1 = (
9696
c.to_bounding_box().l,

docling/models/table_structure_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def download_models(
9494
) -> Path:
9595
return download_hf_model(
9696
repo_id="ds4sd/docling-models",
97-
revision="v2.2.0",
97+
revision="v2.3.0",
9898
local_dir=local_dir,
9999
force=force,
100100
progress=progress,

docling/utils/model_downloader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
66
from docling.datamodel.pipeline_options import (
7+
LayoutOptions,
78
granite_picture_description,
89
smolvlm_picture_description,
910
)
@@ -47,7 +48,7 @@ def download_models(
4748
if with_layout:
4849
_log.info("Downloading layout model...")
4950
LayoutModel.download_models(
50-
local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
51+
local_dir=output_dir / LayoutOptions().model_spec.model_repo_folder,
5152
force=force,
5253
progress=progress,
5354
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ dependencies = [
4646
'pydantic (>=2.0.0,<3.0.0)',
4747
'docling-core[chunking] (>=2.42.0,<3.0.0)',
4848
'docling-parse (>=4.2.2,<5.0.0)',
49-
"docling-ibm-models>=3.9.0,<4",
49+
"docling-ibm-models>=3.9.1,<4",
5050
'filetype (>=1.2.0,<2.0.0)',
5151
'pypdfium2 (>=4.30.0,!=4.30.1,<5.0.0)',
5252
'pydantic-settings (>=2.3.0,<3.0.0)',

tests/data/groundtruth/docling_v2/2203.01017v2.doctags.txt

Lines changed: 41 additions & 83 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)