Skip to content

Commit 3e134ae

Browse files
feat: Add CLI arguments to control the docling layout model (#136)
* feat: Pinpoint docling to cau/layout-model-spec. Introduce the docling_layout_model_spec, docling_layout_create_orphan_clusters CLI arguments Signed-off-by: Nikos Livathinos <[email protected]> * chore: Update docling to the latest status of cau/layout-model-spec Signed-off-by: Nikos Livathinos <[email protected]> * chore: Update docling to the latest status of cau/layout-model-spec Signed-off-by: Nikos Livathinos <[email protected]> * feat: Refactor the CLI arguments of 'evaluate' to accept optional input and ouput dirs Signed-off-by: Nikos Livathinos <[email protected]> * feat: Refactor the MultiEvaluator:load_multi_evaluation() to search for sub-experiments and if present concatenate the experiment with the sub-experiment as a key in the returned Dict. Signed-off-by: Nikos Livathinos <[email protected]> * chore: Clean up code in MultiEvaluator Signed-off-by: Nikos Livathinos <[email protected]> * chore: Pin docling from its main branch. Add code comments. Signed-off-by: Nikos Livathinos <[email protected]> * fix: In the Consolidator, add the single value mAP score for the layout evaluations Signed-off-by: Nikos Livathinos <[email protected]> * feat: Extend the CLI to introduce the --docling-layout-keep-empty-clusters parameter and propagate it to docling LayoutOptions Signed-off-by: Nikos Livathinos <[email protected]> * chore: Remove pinpoint to docling branch as the docling PR has been merged Signed-off-by: Nikos Livathinos <[email protected]> * fix: Have the Consolidator sorting the produced excel by Benchmark and Experiment Signed-off-by: Nikos Livathinos <[email protected]> * fix: Set the --docling-layout-keep-empty-clusters CLI parameter by default to False Signed-off-by: Nikos Livathinos <[email protected]> * feat: Extend the CLI to accept optional input_dir and output_dir in the visualize command Signed-off-by: Nikos Livathinos <[email protected]> * chore: Remove docling pinning. Use the latest docling release. Signed-off-by: Nikos Livathinos <[email protected]> * fix: Fix in CLI for create-eval `--docling-layout-keep-empty-clusters` Signed-off-by: Nikos Livathinos <[email protected]> --------- Signed-off-by: Nikos Livathinos <[email protected]>
1 parent af2c222 commit 3e134ae

File tree

4 files changed

+217
-40
lines changed

4 files changed

+217
-40
lines changed

docling_eval/aggregations/consolidator.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,17 +198,18 @@ def _build_dataframes(
198198
dfs: Dict[EvaluationModality, DataFrame] = {}
199199
for modality, m_data in df_data.items():
200200
df = DataFrame(m_data)
201-
df = df.sort_values(by=["Benchmark"], ascending=[True])
201+
df = df.sort_values(by=["Benchmark", "Experiment"], ascending=[True, True])
202202
dfs[modality] = df
203203

204204
return dfs
205205

206206
def _layout_metrics(self, evaluation: DatasetLayoutEvaluation) -> Dict[str, str]:
207207
r"""Get the metrics for the LayoutEvaluation"""
208208
metrics = {
209-
"mAP": export_value(evaluation.map_stats),
210-
"mAP_50": export_value(evaluation.map_50_stats),
211-
"mAP_75": export_value(evaluation.map_75_stats),
209+
"mAP": export_value(evaluation.mAP),
210+
"stat_mAP": export_value(evaluation.map_stats),
211+
"stat_mAP_50": export_value(evaluation.map_50_stats),
212+
"stat_mAP_75": export_value(evaluation.map_75_stats),
212213
"weighted_mAP_50": export_value(evaluation.weighted_map_50_stats),
213214
"weighted_mAP_75": export_value(evaluation.weighted_map_75_stats),
214215
"weighted_mAP_90": export_value(evaluation.weighted_map_90_stats),

docling_eval/aggregations/multi_evalutor.py

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,39 @@ def _create_eval(
385385

386386
@staticmethod
387387
def load_multi_evaluation(multi_evaluation_path: Path) -> MultiEvaluation:
388-
r"""Load MultiEvaluation from disk files"""
389-
# benchmark -> provider -> modality -> DatasetEvaluation
388+
r"""
389+
Load MultiEvaluation from disk files
390+
"""
391+
392+
def _get_modalities_evaluations(
393+
evaluations_root: Path,
394+
benchmark: BenchMarkNames,
395+
) -> Dict[EvaluationModality, SingleEvaluation]:
396+
r"""
397+
Scan the evaluations_root and load the evaluations for each modality
398+
"""
399+
modalities_evaluations: Dict[EvaluationModality, SingleEvaluation] = {}
400+
for modality_path in evaluations_root.iterdir():
401+
try:
402+
modality = EvaluationModality(modality_path.name)
403+
except ValueError:
404+
continue
405+
406+
# Load the evaluation
407+
evaluation = load_evaluation(benchmark, modality, modality_path)
408+
if not evaluation:
409+
continue
410+
411+
modalities_evaluations[modality] = SingleEvaluation(
412+
evaluation=evaluation,
413+
experiment=experiment,
414+
)
415+
return modalities_evaluations
416+
417+
# benchmark -> experiment_and_subexperiment -> modality-> SingleEvaluation
390418
evaluations: Dict[
391419
BenchMarkNames,
392-
Dict[Path, Dict[EvaluationModality, DatasetEvaluationType]],
420+
Dict[str, Dict[EvaluationModality, SingleEvaluation]],
393421
] = {}
394422

395423
# Get the benchmark
@@ -398,6 +426,9 @@ def load_multi_evaluation(multi_evaluation_path: Path) -> MultiEvaluation:
398426
benchmark = BenchMarkNames(benchmark_path.name)
399427
except ValueError:
400428
continue
429+
if benchmark not in evaluations:
430+
evaluations[benchmark] = {}
431+
401432
# Get the experiment
402433
for experiment_path in benchmark_path.iterdir():
403434
if not experiment_path.is_dir():
@@ -407,30 +438,35 @@ def load_multi_evaluation(multi_evaluation_path: Path) -> MultiEvaluation:
407438
if experiment == MultiEvaluator.GT_LEAF_DIR:
408439
continue
409440

410-
# Load the evaluations for each modality
411-
evaluations_path = experiment_path / MultiEvaluator.EVALUATIONS_DIR
412-
if not evaluations_path.is_dir():
413-
continue
414-
for modality_path in evaluations_path.iterdir():
415-
try:
416-
modality = EvaluationModality(modality_path.name)
417-
except ValueError:
441+
# Check if a sub-experiment is present
442+
for exp_child_path in experiment_path.iterdir():
443+
if not exp_child_path.is_dir():
418444
continue
419445

420-
# Load the evaluation
421-
evaluation = load_evaluation(benchmark, modality, modality_path)
422-
if not evaluation:
446+
subexp_candidate = exp_child_path.name
447+
if subexp_candidate == MultiEvaluator.PRED_LEAF_DIR:
423448
continue
424449

425-
if benchmark not in evaluations:
426-
evaluations[benchmark] = {}
427-
if experiment not in evaluations[benchmark]:
428-
evaluations[benchmark][experiment] = {}
450+
modalities_evaluations: Dict[EvaluationModality, SingleEvaluation]
451+
if subexp_candidate == MultiEvaluator.EVALUATIONS_DIR:
452+
modalities_evaluations = _get_modalities_evaluations(
453+
exp_child_path, benchmark
454+
)
429455

430-
evaluations[benchmark][experiment][modality] = SingleEvaluation(
431-
evaluation=evaluation,
432-
experiment=experiment,
433-
)
456+
exp_and_subexp = experiment
457+
evaluations[benchmark][exp_and_subexp] = modalities_evaluations
458+
else:
459+
subexp_candidate_evaluations = (
460+
exp_child_path / MultiEvaluator.EVALUATIONS_DIR
461+
)
462+
if not subexp_candidate_evaluations.is_dir():
463+
continue
464+
modalities_evaluations = _get_modalities_evaluations(
465+
subexp_candidate_evaluations, benchmark
466+
)
467+
468+
exp_and_subexp = f"{experiment}_{subexp_candidate}"
469+
evaluations[benchmark][exp_and_subexp] = modalities_evaluations
434470

435471
multi_evaluation: MultiEvaluation = MultiEvaluation(evaluations=evaluations)
436472
return multi_evaluation

docling_eval/cli/main.py

Lines changed: 149 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,33 @@
55
import os
66
import sys
77
from pathlib import Path
8-
from typing import Annotated, Dict, Optional, Tuple
8+
9+
# --- DoclingLayoutOptionsManager definition moved here ---
10+
from typing import Annotated, Dict, List, Optional, Tuple
911

1012
import typer
13+
from docling.datamodel.accelerator_options import AcceleratorOptions
1114
from docling.datamodel.base_models import InputFormat
15+
from docling.datamodel.layout_model_specs import (
16+
DOCLING_LAYOUT_EGRET_LARGE,
17+
DOCLING_LAYOUT_EGRET_MEDIUM,
18+
DOCLING_LAYOUT_EGRET_XLARGE,
19+
DOCLING_LAYOUT_HERON,
20+
DOCLING_LAYOUT_HERON_101,
21+
DOCLING_LAYOUT_V2,
22+
LayoutModelConfig,
23+
)
1224
from docling.datamodel.pipeline_options import (
13-
AcceleratorDevice,
14-
AcceleratorOptions,
25+
LayoutOptions,
1526
PaginatedPipelineOptions,
1627
PdfPipelineOptions,
1728
VlmPipelineOptions,
18-
smoldocling_vlm_conversion_options,
19-
smoldocling_vlm_mlx_conversion_options,
29+
)
30+
from docling.datamodel.vlm_model_specs import (
31+
SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
32+
)
33+
from docling.datamodel.vlm_model_specs import (
34+
SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
2035
)
2136
from docling.document_converter import FormatOption, PdfFormatOption
2237
from docling.models.factories import get_ocr_factory
@@ -103,6 +118,26 @@
103118
TableFormerPredictionProvider,
104119
)
105120

121+
122+
class DoclingLayoutOptionsManager:
123+
layout_model_configs = {
124+
"docling_layout_v2": DOCLING_LAYOUT_V2,
125+
"docling_layout_heron": DOCLING_LAYOUT_HERON,
126+
"docling_layout_heron_101": DOCLING_LAYOUT_HERON_101,
127+
"docling_layout_egret_medium": DOCLING_LAYOUT_EGRET_MEDIUM,
128+
"docling_layout_egret_large": DOCLING_LAYOUT_EGRET_LARGE,
129+
"docling_layout_egret_xlarge": DOCLING_LAYOUT_EGRET_XLARGE,
130+
}
131+
132+
@staticmethod
133+
def get_layout_model_config(model_spec: str) -> LayoutModelConfig:
134+
return DoclingLayoutOptionsManager.layout_model_configs[model_spec]
135+
136+
@staticmethod
137+
def get_layout_model_config_names() -> List[str]:
138+
return list(DoclingLayoutOptionsManager.layout_model_configs.keys())
139+
140+
106141
# Configure logging
107142
logging_level = logging.WARNING
108143
# logging_level = logging.DEBUG
@@ -125,6 +160,32 @@
125160
)
126161

127162

163+
def derive_input_output_dirs(
164+
benchmark: BenchMarkNames,
165+
modality: EvaluationModality,
166+
input_dir: Optional[Path],
167+
output_dir: Optional[Path],
168+
) -> Tuple[Path, Path]:
169+
r"""
170+
One of the input or output dirs must be non None.
171+
In case one of them is None, it can be derived from the other one.
172+
"""
173+
if input_dir and output_dir:
174+
return input_dir, output_dir
175+
if not input_dir and not output_dir:
176+
raise ValueError("Either input_dir or output_dir must be provided")
177+
178+
if not input_dir and output_dir:
179+
# Derive input and output paths based on the directory structure in test_dataset_builder.py
180+
input_dir = output_dir / "eval_dataset" / benchmark.value / modality.value
181+
182+
if not output_dir and input_dir:
183+
output_dir = input_dir.parent
184+
assert input_dir is not None
185+
assert output_dir is not None
186+
return input_dir, output_dir
187+
188+
128189
def log_and_save_stats(
129190
odir: Path,
130191
benchmark: BenchMarkNames,
@@ -259,6 +320,9 @@ def get_prediction_provider(
259320
do_table_structure: bool = True,
260321
artifacts_path: Optional[Path] = None,
261322
image_scale_factor: Optional[float] = None,
323+
docling_layout_model_spec: Optional[LayoutModelConfig] = None,
324+
docling_layout_create_orphan_clusters: Optional[bool] = None,
325+
docling_layout_keep_empty_clusters: Optional[bool] = None,
262326
):
263327
pipeline_options: PaginatedPipelineOptions
264328
"""Get the appropriate prediction provider with default settings."""
@@ -289,8 +353,17 @@ def get_prediction_provider(
289353
pipeline_options.generate_parsed_pages = True
290354
pipeline_options.accelerator_options = accelerator_options
291355

292-
pipeline_options.layout_options.create_orphan_clusters = False
293-
pipeline_options.layout_options.keep_empty_clusters = True
356+
# Layout options
357+
layout_options: LayoutOptions = LayoutOptions()
358+
if docling_layout_model_spec is not None:
359+
layout_options.model_spec = docling_layout_model_spec
360+
if docling_layout_create_orphan_clusters is not None:
361+
layout_options.create_orphan_clusters = (
362+
docling_layout_create_orphan_clusters
363+
)
364+
if docling_layout_keep_empty_clusters is not None:
365+
layout_options.keep_empty_clusters = docling_layout_keep_empty_clusters
366+
pipeline_options.layout_options = layout_options
294367

295368
if artifacts_path is not None:
296369
pipeline_options.artifacts_path = artifacts_path
@@ -1038,6 +1111,24 @@ def create_eval(
10381111
help="Directory for local model artifacts. Will only be passed to providers supporting this."
10391112
),
10401113
] = None,
1114+
docling_layout_model_spec: Annotated[
1115+
Optional[str],
1116+
typer.Option(
1117+
help="Layout model spec for Docling. Supported values: {}".format(
1118+
DoclingLayoutOptionsManager.get_layout_model_config_names()
1119+
)
1120+
),
1121+
] = "docling_layout_heron",
1122+
docling_layout_create_orphan_clusters: Annotated[
1123+
Optional[bool],
1124+
typer.Option(
1125+
help="Enable orphan clusters creation in Docling layout post-processing"
1126+
),
1127+
] = True,
1128+
docling_layout_keep_empty_clusters: Annotated[
1129+
Optional[bool],
1130+
typer.Option(help="Keep the empty clusters in Docling layout post-processing"),
1131+
] = False,
10411132
do_visualization: Annotated[
10421133
bool, typer.Option(help="visualize the predictions")
10431134
] = True,
@@ -1070,6 +1161,14 @@ def create_eval(
10701161
)
10711162

10721163
# Create the appropriate prediction provider
1164+
docling_layout_model_spec_obj = (
1165+
DoclingLayoutOptionsManager.get_layout_model_config(
1166+
docling_layout_model_spec
1167+
)
1168+
if docling_layout_model_spec
1169+
else None
1170+
)
1171+
10731172
provider = get_prediction_provider(
10741173
provider_type=prediction_provider,
10751174
file_source_path=file_source_path,
@@ -1080,6 +1179,9 @@ def create_eval(
10801179
do_visualization=do_visualization,
10811180
image_scale_factor=image_scale_factor,
10821181
do_table_structure=do_table_structure,
1182+
docling_layout_model_spec=docling_layout_model_spec_obj,
1183+
docling_layout_create_orphan_clusters=docling_layout_create_orphan_clusters,
1184+
docling_layout_keep_empty_clusters=docling_layout_keep_empty_clusters,
10831185
)
10841186

10851187
# Get the dataset name from the benchmark
@@ -1173,13 +1275,32 @@ def create(
11731275
@app.command(name="evaluate")
11741276
def evaluate_cmd(
11751277
modality: Annotated[EvaluationModality, typer.Option(help="Evaluation modality")],
1176-
benchmark: Annotated[BenchMarkNames, typer.Option(help="Benchmark name")],
1177-
output_dir: Annotated[Path, typer.Option(help="Base output directory")],
1278+
benchmark: Annotated[
1279+
BenchMarkNames,
1280+
typer.Option(
1281+
help="Benchmark name. It is used only to set the filename of the evaluation json file."
1282+
),
1283+
],
1284+
input_dir: Annotated[
1285+
Optional[Path],
1286+
typer.Option(
1287+
help="Directory with evaluation dataset. If not provided, the input directory will be derived from the output directory."
1288+
),
1289+
] = None,
1290+
output_dir: Annotated[
1291+
Optional[Path],
1292+
typer.Option(
1293+
help="Base output directory. If not provided, the output directory will be derived from the input directory."
1294+
),
1295+
] = None,
11781296
split: Annotated[str, typer.Option(help="Dataset split")] = "test",
11791297
):
11801298
"""Evaluate predictions against ground truth."""
1181-
# Derive input and output paths based on the directory structure in test_dataset_builder.py
1182-
input_dir = output_dir / "eval_dataset"
1299+
input_dir, output_dir = derive_input_output_dirs(
1300+
benchmark, modality, input_dir, output_dir
1301+
)
1302+
assert input_dir is not None
1303+
assert output_dir is not None
11831304
eval_output_dir = output_dir / "evaluations" / modality.value
11841305

11851306
# Create output directory
@@ -1201,16 +1322,30 @@ def visualize_cmd(
12011322
EvaluationModality, typer.Option(help="Visualization modality")
12021323
],
12031324
benchmark: Annotated[BenchMarkNames, typer.Option(help="Benchmark name")],
1204-
output_dir: Annotated[Path, typer.Option(help="Base output directory")],
1325+
input_dir: Annotated[
1326+
Optional[Path],
1327+
typer.Option(
1328+
help="Directory with evaluation dataset. If not provided, the input directory will be derived from the output directory."
1329+
),
1330+
] = None,
1331+
output_dir: Annotated[
1332+
Optional[Path],
1333+
typer.Option(
1334+
help="Base output directory. If not provided, the output directory will be derived from the input directory."
1335+
),
1336+
] = None,
12051337
split: Annotated[str, typer.Option(help="Dataset split")] = "test",
12061338
begin_index: Annotated[int, typer.Option(help="Begin index (inclusive)")] = 0,
12071339
end_index: Annotated[
12081340
int, typer.Option(help="End index (exclusive), -1 for all")
12091341
] = -1,
12101342
):
12111343
"""Visualize evaluation results."""
1212-
# Derive input and output paths based on the directory structure in test_dataset_builder.py
1213-
input_dir = output_dir / "eval_dataset"
1344+
input_dir, output_dir = derive_input_output_dirs(
1345+
benchmark, modality, input_dir, output_dir
1346+
)
1347+
assert input_dir is not None
1348+
assert output_dir is not None
12141349
eval_output_dir = output_dir / "evaluations" / modality.value
12151350

12161351
# Create output directory

docling_eval/utils/coco_exporter.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,11 @@ def main(args):
308308
log_format = "%(asctime)s - %(levelname)s - %(message)s"
309309
logging.basicConfig(level=logging.INFO, format=log_format)
310310

311+
_log.info("Export eval-dataset in COCO-tools format")
312+
_log.info("COCO dataset: %s", str(coco_path))
313+
_log.info("eval-dataset: %s", str(docling_eval_path))
314+
_log.info("Save path: %s", str(save_path))
315+
311316
# Create the COCO exporter
312317
exporter = DoclingEvalCOCOExporter(docling_eval_path)
313318
exporter.export_predictions_wrt_original_COCO(

0 commit comments

Comments
 (0)