Skip to content

Commit 8f33420

Browse files
feat: Add more fine-grained control in the DoclingEvalCOCOExporter (#149)
Extend the DoclingEvalCOCOExporter to either export the gt_doc or the pre_doc from the parquet files. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
1 parent 693c224 commit 8f33420

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

docling_eval/utils/coco_exporter.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def export_COCO(
131131
extra_doc_label_to_valid_label_mapping: dict[
132132
DocItemLabel, Optional[DocItemLabel]
133133
],
134-
source_doc_column: str = "GT",
134+
use_pred_doc: bool = False, # If True the gt_doc is used, otherwise the pred_doc
135135
):
136136
r"""
137137
Export COCO dataset
@@ -181,7 +181,7 @@ def export_COCO(
181181
data_record = DatasetRecordWithPrediction.model_validate(data)
182182
doc_id = data_record.doc_id
183183

184-
if data_record.predicted_doc is not None and source_doc_column == "pred":
184+
if data_record.predicted_doc is not None and use_pred_doc:
185185
doc = data_record.predicted_doc
186186
_log.info("Dataset document to export: 'predicted_doc'")
187187
else:
@@ -576,7 +576,7 @@ def main():
576576
"--operation",
577577
required=True,
578578
type=str,
579-
help="Operation to perform. One of ['coco']",
579+
help="Operation to perform. One of ['coco_gt_doc', 'coco_pred_doc', 'predictions']",
580580
)
581581
parser.add_argument(
582582
"-s",
@@ -614,7 +614,8 @@ def main():
614614
exporter = DoclingEvalCOCOExporter(args.docling_eval_dir)
615615

616616
# Run the operation
617-
if args.operation.upper() == "COCO":
617+
op = args.operation.lower()
618+
if op in ["coco_gt_doc", "coco_pred_doc"]:
618619
# Mapping from the parquet document label to the valid docling labels
619620
doc_label_to_valid_label_mapping: dict[DocItemLabel, DocItemLabel] = {
620621
DocItemLabel.PAGE_FOOTER: DocItemLabel.TEXT,
@@ -629,8 +630,9 @@ def main():
629630
"test",
630631
args.save_dir,
631632
doc_label_to_valid_label_mapping,
633+
use_pred_doc="coco_pred_doc" == op,
632634
)
633-
elif args.operation.upper() == "PREDICTIONS":
635+
elif op == "predictions":
634636
exporter.export_predictions_wrt_original_COCO(
635637
"test",
636638
args.save_dir,

0 commit comments

Comments
 (0)