Skip to content

Commit 373f959

Browse files
cau-gitnikos-livathinosCopilot
authored
feat: Visualizer tool and command for datasets (#186)
* chore: Move the teds.py inside the subdir evaluators/table Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Introduce the external_predictions_path in BaseEvaluator and dummy entries in all evaluators. Extend the CLI to support the --external-predictions-path Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend test_dataset_builder.py to save document predictions in various formats Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend MarkDownTextEvaluator to support external_predictions_path. Add unit test Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend LayoutEvaluator to support external_predictions_path. Add unit test. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Add missing pytest dependencies in tests Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Fix loading the external predictions in LayoutEvaluator Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Introduce external predictions in DocStructureEvaluator. Add unit test. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend the TableEvaluator to support external predictions. Add unit test Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend the KeyValueEvaluator to support external predictions. Add unit test. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend the PixelLayoutEvaluator to support external predictions. Add unit test Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Extend the BboxTextEvaluator to support external predictions. Add unit test Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Disable the OCREvaluator when using the external predictions Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Fixing guard for external predictions in TimingsEvaluator, ReadingOrderEvaluator. Fix main Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Export the doctag files with the correct file extension Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Refactor the ExternalDoclingDocumentLoader to properly load a DoclingDocument from doctags and the GT image. - Introduce the staticmethod load_doctags() which covers all cases on page image loading. - Refactor the FilePredictionProvider to use the load_doctags() from ExternalDoclingDocumentLoader. - Refactor all evaluators to use the new ExternalDoclingDocumentLoader. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * chore: Rename code file as external_docling_document_loader.py Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Fix typo Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Introduce examples how to evaluate using external predictions using the API and the CLI. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * feat: Prediction vizualizer Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update docling_eval/utils/external_predictions_visualizer.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> * feat: Update examples bash script to demonstrate visualisations on external predictions Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> --------- Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Co-authored-by: Nikos Livathinos <nli@zurich.ibm.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 53dbd95 commit 373f959

File tree

4 files changed

+325
-6
lines changed

4 files changed

+325
-6
lines changed

docling_eval/cli/main.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
from docling_eval.prediction_providers.tableformer_provider import (
127127
TableFormerPredictionProvider,
128128
)
129+
from docling_eval.utils.external_predictions_visualizer import PredictionsVisualizer
129130

130131

131132
class DoclingLayoutOptionsManager:
@@ -362,7 +363,7 @@ def get_prediction_provider(
362363
docling_layout_keep_empty_clusters: Optional[bool] = None,
363364
# Controls orphan text cells only for the programmatic Docling pipeline (PDF_DOCLING)
364365
docling_programmatic_add_orphan_text_cells: Optional[bool] = None,
365-
docling_force_full_page_ocr: Optional[bool] = None,
366+
docling_force_full_page_ocr: bool = False,
366367
granite_docling_vlm_options: Optional[InlineVlmOptions] = None,
367368
max_new_tokens: Optional[int] = None,
368369
):
@@ -376,7 +377,7 @@ def get_prediction_provider(
376377
ocr_factory = get_ocr_factory()
377378

378379
ocr_options: OcrOptions = ocr_factory.create_options( # type: ignore
379-
kind="easyocr",
380+
kind="rapidocr",
380381
force_full_page_ocr=docling_force_full_page_ocr,
381382
)
382383
# Use all CPU cores
@@ -1578,6 +1579,67 @@ def visualize_cmd(
15781579
)
15791580

15801581

1582+
@app.command(name="create_viz")
1583+
def create_viz(
1584+
dataset_dir: Annotated[
1585+
Path,
1586+
typer.Option(
1587+
help=(
1588+
"Dataset directory (GT parquet or eval_dataset parquet with predictions) "
1589+
"containing the split folder with parquet shards."
1590+
)
1591+
),
1592+
],
1593+
split: Annotated[str, typer.Option(help="Dataset split to visualize")] = "test",
1594+
external_predictions_path: Annotated[
1595+
Optional[Path],
1596+
typer.Option(
1597+
help=(
1598+
"Directory with DoclingDocument predictions named as <doc_id>.[json|dt|yaml|yml]. "
1599+
"If omitted, predictions are taken from the dataset parquet."
1600+
)
1601+
),
1602+
] = None,
1603+
output_dir: Annotated[
1604+
Optional[Path],
1605+
typer.Option(
1606+
help=(
1607+
"Directory where HTML visualizations are written. Defaults to "
1608+
"<dataset_dir>/visualizations when omitted."
1609+
)
1610+
),
1611+
] = None,
1612+
begin_index: Annotated[int, typer.Option(help="Begin index (inclusive)")] = 0,
1613+
end_index: Annotated[
1614+
int, typer.Option(help="End index (exclusive), -1 for all")
1615+
] = -1,
1616+
ignore_missing_predictions: Annotated[
1617+
bool,
1618+
typer.Option(
1619+
help="Skip documents without a matching prediction instead of failing"
1620+
),
1621+
] = False,
1622+
):
1623+
"""
1624+
Create paired GT vs. prediction HTML visualizations without generating parquet output.
1625+
"""
1626+
visualizations_dir = (
1627+
output_dir if output_dir is not None else dataset_dir / "visualizations"
1628+
)
1629+
1630+
visualizer = PredictionsVisualizer(
1631+
visualizations_dir=visualizations_dir,
1632+
external_predictions_dir=external_predictions_path,
1633+
ignore_missing_predictions=ignore_missing_predictions,
1634+
)
1635+
visualizer.create_visualizations(
1636+
dataset_dir=dataset_dir,
1637+
split=split,
1638+
begin_index=begin_index,
1639+
end_index=end_index,
1640+
)
1641+
1642+
15811643
@app.callback()
15821644
def main():
15831645
"""Docling Evaluation CLI for benchmarking document processing tasks."""
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
import logging
2+
from pathlib import Path
3+
from typing import List, Optional, Tuple
4+
5+
from datasets import Dataset, load_dataset
6+
from docling.datamodel.base_models import ConversionStatus
7+
from docling_core.types.doc.document import DoclingDocument
8+
from PIL import Image
9+
from tqdm import tqdm # type: ignore
10+
11+
from docling_eval.datamodels.dataset_record import DatasetRecordWithPrediction
12+
from docling_eval.datamodels.types import BenchMarkColumns, PredictionFormats
13+
from docling_eval.utils.external_docling_document_loader import (
14+
ExternalDoclingDocumentLoader,
15+
)
16+
from docling_eval.utils.utils import extract_images, insert_images_from_pil
17+
from docling_eval.visualisation.visualisations import save_comparison_html_with_clusters
18+
19+
_LOGGER = logging.getLogger(__name__)
20+
21+
22+
class PredictionsVisualizer:
23+
"""
24+
Render ground-truth vs. prediction visualizations for an existing dataset.
25+
26+
Works with either:
27+
- A dataset that already embeds predictions (DatasetRecordWithPrediction parquet)
28+
- A ground-truth-only dataset paired with an external predictions directory
29+
containing DoclingDocument files named <doc_id>.[json|dt|yaml|yml]
30+
"""
31+
32+
def __init__(
33+
self,
34+
visualizations_dir: Path,
35+
*,
36+
external_predictions_dir: Optional[Path] = None,
37+
ignore_missing_predictions: bool = False,
38+
):
39+
self._loader = (
40+
ExternalDoclingDocumentLoader(external_predictions_dir)
41+
if external_predictions_dir is not None
42+
else None
43+
)
44+
self._visualizations_dir = visualizations_dir
45+
self._ignore_missing_predictions = ignore_missing_predictions
46+
47+
def create_visualizations(
48+
self,
49+
dataset_dir: Path,
50+
split: str = "test",
51+
begin_index: int = 0,
52+
end_index: int = -1,
53+
) -> None:
54+
"""
55+
Generate paired HTML visualizations between ground truth and predictions.
56+
"""
57+
dataset = self._load_split(dataset_dir, split)
58+
dataset = self._slice_dataset(dataset, begin_index, end_index)
59+
self._visualizations_dir.mkdir(parents=True, exist_ok=True)
60+
61+
for _, row in tqdm(
62+
enumerate(dataset),
63+
desc="Rendering visualizations",
64+
total=len(dataset),
65+
ncols=120,
66+
):
67+
record = DatasetRecordWithPrediction.model_validate(row)
68+
pred_doc = self._resolve_prediction_document(record)
69+
if pred_doc is None:
70+
message = f"Missing prediction for document {record.doc_id}"
71+
if self._ignore_missing_predictions:
72+
_LOGGER.warning(message)
73+
continue
74+
raise FileNotFoundError(message)
75+
76+
pred_doc, pred_pictures, pred_page_images = self._prepare_prediction_assets(
77+
record, pred_doc
78+
)
79+
80+
record_for_viz = record.model_copy(deep=True)
81+
record_for_viz.predicted_doc = pred_doc
82+
record_for_viz.predicted_pictures = pred_pictures
83+
record_for_viz.predicted_page_images = pred_page_images
84+
record_for_viz.prediction_format = PredictionFormats.DOCLING_DOCUMENT
85+
record_for_viz.status = ConversionStatus.SUCCESS
86+
87+
self._save_visualization(record_for_viz)
88+
89+
def _resolve_prediction_document(
90+
self, record: DatasetRecordWithPrediction
91+
) -> Optional[DoclingDocument]:
92+
if self._loader is not None:
93+
return self._loader(record)
94+
return record.predicted_doc
95+
96+
def _prepare_prediction_assets(
97+
self, record: DatasetRecordWithPrediction, pred_doc: DoclingDocument
98+
) -> Tuple[DoclingDocument, List[Image.Image], List[Image.Image]]:
99+
if self._loader is None and (
100+
record.predicted_pictures or record.predicted_page_images
101+
):
102+
return (
103+
pred_doc.model_copy(deep=True),
104+
list(record.predicted_pictures),
105+
list(record.predicted_page_images),
106+
)
107+
108+
prepared_doc, pred_pictures, pred_page_images = extract_images(
109+
document=pred_doc.model_copy(deep=True),
110+
pictures_column=BenchMarkColumns.PREDICTION_PICTURES.value,
111+
page_images_column=BenchMarkColumns.PREDICTION_PAGE_IMAGES.value,
112+
)
113+
return prepared_doc, pred_pictures, pred_page_images
114+
115+
def _load_split(self, dataset_dir: Path, split: str) -> Dataset:
116+
split_dir = dataset_dir / split
117+
split_files = sorted(split_dir.glob("*.parquet"))
118+
if not split_files:
119+
raise FileNotFoundError(f"No parquet files found under {split_dir}")
120+
dataset = load_dataset(
121+
"parquet", data_files={split: [str(path) for path in split_files]}
122+
)
123+
return dataset[split]
124+
125+
def _slice_dataset(
126+
self, dataset: Dataset, begin_index: int, end_index: int
127+
) -> Dataset:
128+
total = len(dataset)
129+
begin = max(begin_index, 0)
130+
end = total if end_index < 0 else min(end_index, total)
131+
132+
if begin >= end:
133+
return dataset.select([])
134+
if begin == 0 and end == total:
135+
return dataset
136+
return dataset.select(range(begin, end))
137+
138+
def _save_visualization(self, record: DatasetRecordWithPrediction) -> None:
139+
if record.predicted_doc is None:
140+
return
141+
142+
gt_doc = insert_images_from_pil(
143+
record.ground_truth_doc.model_copy(deep=True),
144+
record.ground_truth_pictures,
145+
record.ground_truth_page_images,
146+
)
147+
pred_doc = insert_images_from_pil(
148+
record.predicted_doc.model_copy(deep=True),
149+
record.predicted_pictures,
150+
record.predicted_page_images,
151+
)
152+
153+
try:
154+
save_comparison_html_with_clusters(
155+
filename=self._visualizations_dir / f"{record.doc_id}.html",
156+
true_doc=gt_doc,
157+
pred_doc=pred_doc,
158+
draw_reading_order=True,
159+
)
160+
except (IndexError, ValueError) as e:
161+
_LOGGER.warning(
162+
f"Failed to save visualization for doc_id {record.doc_id}: {e}"
163+
)

docs/examples/evaluate_dpbench_on_external_predictions.sh

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ evaluate() {
3838
fi
3939

4040
for modality in "${MODALITIES[@]}"; do
41-
echo "Evaluation modality: ${modality}, predictions: ${pred_dir}"
41+
echo "Evaluate: modality: ${modality}: predictions: ${pred_dir}"
4242
uv run docling-eval evaluate \
4343
--benchmark DPBench \
4444
--modality "${modality}" \
@@ -49,24 +49,54 @@ evaluate() {
4949
}
5050

5151

52+
visualize() {
53+
local pred_dir save_dir modality
54+
pred_dir="$1"
55+
save_dir="$2"
56+
57+
# Check if the GT/preds dirs exist
58+
if [ ! -d "${GT_DIR}" ]; then
59+
echo "Missing GT dir: ${GT_DIR}"
60+
exit 1
61+
fi
62+
if [ ! -d "${pred_dir}" ]; then
63+
echo "Missing predictions dir: ${pred_dir}"
64+
exit 2
65+
fi
66+
67+
echo "Visualize predictions: ${pred_dir}"
68+
uv run docling-eval create_viz \
69+
--dataset-dir "${GT_DIR}" \
70+
--external-predictions-path "${pred_dir}" \
71+
--output-dir "${save_dir}"
72+
}
73+
5274
###########################################################################################
5375
# Main
5476
#
5577

78+
# Predictions
79+
5680
# json predictions
5781
evaluate \
5882
scratch/DPBench/predicted_documents/json \
59-
scratch/DPBench/external_evaluations_jsons
83+
scratch/DPBench/external_predictions_jsons
6084

6185

6286
# doctags predictions
6387
evaluate \
6488
scratch/DPBench/predicted_documents/doctag \
65-
scratch/DPBench/external_evaluations_doctags
89+
scratch/DPBench/external_predictions_doctags
6690

6791

6892
# yaml predictions
6993
evaluate \
7094
scratch/DPBench/predicted_documents/yaml \
71-
scratch/DPBench/external_evaluations_yaml
95+
scratch/DPBench/external_predictions_yaml
96+
97+
98+
# Visualisations
99+
visualize \
100+
scratch/DPBench/predicted_documents/json \
101+
scratch/DPBench/external_predictions_visualisations
72102

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
from datasets import load_dataset
5+
6+
from docling_eval.datamodels.dataset_record import DatasetRecordWithPrediction
7+
from docling_eval.utils.external_predictions_visualizer import PredictionsVisualizer
8+
9+
10+
def _first_doc_id(parquet_root: Path) -> str:
11+
split_files = sorted((parquet_root / "test").glob("*.parquet"))
12+
ds = load_dataset(
13+
"parquet", data_files={"test": [str(path) for path in split_files]}
14+
)
15+
record = DatasetRecordWithPrediction.model_validate(ds["test"][0])
16+
return record.doc_id
17+
18+
19+
@pytest.mark.dependency(
20+
depends=["tests/test_dataset_builder.py::test_run_dpbench_e2e"],
21+
scope="session",
22+
)
23+
def test_predictions_visualizer_with_embedded_predictions() -> None:
24+
dataset_dir = Path("scratch/DPBench/eval_dataset_e2e")
25+
output_dir = Path("scratch/DPBench/visualizer_tests/embedded")
26+
output_dir.mkdir(parents=True, exist_ok=True)
27+
28+
visualizer = PredictionsVisualizer(visualizations_dir=output_dir)
29+
visualizer.create_visualizations(
30+
dataset_dir=dataset_dir,
31+
split="test",
32+
begin_index=0,
33+
end_index=1,
34+
)
35+
36+
doc_id = _first_doc_id(dataset_dir)
37+
layout_file = output_dir / f"{doc_id}_layout.html"
38+
assert layout_file.is_file()
39+
40+
41+
@pytest.mark.dependency(
42+
depends=["tests/test_dataset_builder.py::test_run_dpbench_e2e"],
43+
scope="session",
44+
)
45+
def test_predictions_visualizer_with_external_predictions() -> None:
46+
gt_dir = Path("scratch/DPBench/gt_dataset")
47+
external_predictions_dir = Path("scratch/DPBench/predicted_documents/json")
48+
output_dir = Path("scratch/DPBench/visualizer_tests/external")
49+
output_dir.mkdir(parents=True, exist_ok=True)
50+
51+
visualizer = PredictionsVisualizer(
52+
visualizations_dir=output_dir,
53+
external_predictions_dir=external_predictions_dir,
54+
)
55+
visualizer.create_visualizations(
56+
dataset_dir=gt_dir,
57+
split="test",
58+
begin_index=0,
59+
end_index=1,
60+
)
61+
62+
doc_id = _first_doc_id(gt_dir)
63+
layout_file = output_dir / f"{doc_id}_layout.html"
64+
assert layout_file.is_file()

0 commit comments

Comments
 (0)