feat: Extend MarkDownTextEvaluator to support external_predictions_path. Add unit test

nikos-livathinos · nikos-livathinos · commit 2e202b1393f5 · 2025-12-04T16:45:19.000+01:00
Signed-off-by: Nikos Livathinos &lt;nli@zurich.ibm.com&gt;
diff --git a/docling_eval/evaluators/markdown_text_evaluator.py b/docling_eval/evaluators/markdown_text_evaluator.py
@@ -26,6 +26,7 @@
     UnitEvaluation,
 )
 from docling_eval.evaluators.stats import DatasetStatistics, compute_stats
+from docling_eval.utils.external_docling_doc_loader import ExternalDoclingDocLoader
 
 _log = logging.getLogger(__name__)
 
@@ -116,6 +117,11 @@ def __call__(
         ds_path: Path to load the parquet files of the dataset
         split: Split of the dataset to load
         """
+        if external_predictions_path is not None:
+            external_docling_doc_loader = ExternalDoclingDocLoader(
+                external_predictions_path
+            )
+
         parquet_files = str(ds_path / split / "*.parquet")
         ds = load_dataset("parquet", data_files={split: parquet_files})
         _log.info(f"Overview of the dataset: {ds}")
@@ -146,16 +152,28 @@ def __call__(
         ):
             data_record = DatasetRecordWithPrediction.model_validate(data)
             doc_id = data_record.doc_id
-            if data_record.status not in self._accepted_status:
-                _log.error(
-                    "Skipping record without successfull conversion status: %s", doc_id
-                )
-                rejected_samples[EvaluationRejectionType.INVALID_CONVERSION_STATUS] += 1
-                continue
-
             true_doc = data_record.ground_truth_doc
             true_md = self._docling_document_to_md(true_doc)
-            pred_md = self._get_pred_md(data_record)
+
+            # Get the predicted markdown from the external predictions path
+            if external_predictions_path is not None:
+                pred_doc = external_docling_doc_loader(doc_id)
+                if pred_doc is None:
+                    _log.error("No external prediction found for doc_id=%s", doc_id)
+                    rejected_samples[EvaluationRejectionType.MISSING_PREDICTION] += 1
+                    continue
+                pred_md = self._docling_document_to_md(pred_doc)
+            else:
+                if data_record.status not in self._accepted_status:
+                    _log.error(
+                        "Skipping record without successfull conversion status: %s",
+                        doc_id,
+                    )
+                    rejected_samples[
+                        EvaluationRejectionType.INVALID_CONVERSION_STATUS
+                    ] += 1
+                    continue
+                pred_md = self._get_pred_md(data_record)  # type: ignore
 
             if not pred_md:
                 _log.error("There is no markdown prediction for doc_id=%s", doc_id)
diff --git a/docling_eval/utils/external_docling_doc_loader.py b/docling_eval/utils/external_docling_doc_loader.py
@@ -0,0 +1,28 @@
+from pathlib import Path
+from typing import Optional
+
+from docling_core.types.doc.document import DoclingDocument
+
+
+class ExternalDoclingDocLoader:
+    def __init__(self, external_predictions_dir: Path):
+        self._external_predictions_dir = external_predictions_dir
+
+    def __call__(self, doc_id: str) -> Optional[DoclingDocument]:
+        r"""
+        Load the DoclingDocument from the external predictions path
+        """
+        json_path = self._external_predictions_dir / f"{doc_id}.json"
+        dt_path = self._external_predictions_dir / f"{doc_id}.dt"
+        yaml_path = self._external_predictions_dir / f"{doc_id}.yaml"
+        yml_path = self._external_predictions_dir / f"{doc_id}.yml"
+
+        if json_path.is_file():
+            return DoclingDocument.load_from_json(json_path)
+        if dt_path.is_file():
+            return DoclingDocument.load_from_doctags(dt_path)
+        if yaml_path.is_file():
+            return DoclingDocument.load_from_yaml(yaml_path)
+        if yml_path.is_file():
+            return DoclingDocument.load_from_yaml(yml_path)
+        return None
diff --git a/tests/test_markdown_text_evaluator.py b/tests/test_markdown_text_evaluator.py
@@ -34,5 +34,16 @@ def test_markdown_text_evaluator():
     assert is_exception
 
 
-# if __name__ == "__main__":
-#     test_markdown_text_evaluator()
+def test_markdown_text_evaluator_external_predictions():
+    r"""Testing the evaluator with external predictions"""
+    eval4 = MarkdownTextEvaluator()
+    gt_path = Path("scratch/DPBench/gt_dataset")
+    preds_path = Path("scratch/DPBench/predicted_documents/json")
+
+    v4 = eval4(gt_path, external_predictions_path=preds_path)
+    assert v4 is not None
+
+
+if __name__ == "__main__":
+    # test_markdown_text_evaluator()
+    test_markdown_text_evaluator_external_predictions()