-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathbase_evaluator.py
More file actions
140 lines (114 loc) · 4.42 KB
/
base_evaluator.py
File metadata and controls
140 lines (114 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import json
import logging
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Generic, List, Optional, TypeVar
from docling.datamodel.base_models import ConversionStatus
from docling_core.types.doc.document import (
DoclingDocument,
DocTagsDocument,
DocTagsPage,
)
from pydantic import BaseModel
from docling_eval.datamodels.dataset_record import DatasetRecordWithPrediction
from docling_eval.datamodels.types import PredictionFormats
_log = logging.getLogger(__name__)
class UnitEvaluation(BaseModel):
pass
class EvaluationRejectionType(str, Enum):
INVALID_CONVERSION_STATUS = "invalid_conversion_status"
MISSING_PREDICTION = "missing_prediction"
MISMATHCED_DOCUMENT = "mismatched_document"
BROKEN_PREDICTION = "broken_prediction"
EVALUATION_ERROR = "evaluation_error"
class DatasetEvaluation(BaseModel):
evaluated_samples: int = -1
rejected_samples: Dict[EvaluationRejectionType, int] = {}
UnitEvaluationType = TypeVar("UnitEvaluationType", bound=UnitEvaluation)
DatasetEvaluationType = TypeVar("DatasetEvaluationType", bound=DatasetEvaluation)
def docling_document_from_doctags(
data_record: DatasetRecordWithPrediction,
) -> DoclingDocument:
r""" """
doc_id = data_record.doc_id
doctags = data_record.original
if not isinstance(doctags, str):
raise RuntimeError("Invalid format of original prediction")
page_image = (
data_record.ground_truth_page_images[0]
if data_record.ground_truth_page_images
else None
)
doctags_page = DocTagsPage(tokens=doctags, image=page_image)
doctags_doc = DocTagsDocument(pages=[doctags_page])
pred_doc = DoclingDocument.load_from_doctags(doctags_doc, document_name=doc_id)
return pred_doc
class BaseEvaluator(Generic[UnitEvaluationType, DatasetEvaluationType]):
r"""
Base class for all evaluators
"""
def __init__(
self,
intermediate_evaluations_path: Optional[Path] = None,
prediction_sources: List[PredictionFormats] = [
PredictionFormats.DOCLING_DOCUMENT
],
supported_prediction_formats: List[PredictionFormats] = [
PredictionFormats.DOCLING_DOCUMENT
],
concurrency: int = 4,
):
r"""
Parameters
----------
intermediate_evaluations_path: When True the evalution per example will be saved in a file
"""
self._concurrency = concurrency
self._intermediate_evaluations_path = intermediate_evaluations_path
# Validate the prediction_sources
if set(prediction_sources) - set(supported_prediction_formats):
msg = "Unsupported prediction_sources. "
msg += f"It should be something out of {supported_prediction_formats}"
raise RuntimeError(msg)
self._prediction_sources = prediction_sources
self._supported_prediction_sources = supported_prediction_formats
self._accepted_status: List[ConversionStatus] = [
ConversionStatus.SUCCESS,
ConversionStatus.PARTIAL_SUCCESS,
]
def __call__(
self,
ds_path: Path,
split: str = "test",
external_predictions_path: Optional[Path] = None,
) -> DatasetEvaluationType:
r"""
Perform the evaluation
"""
return None # type: ignore
def supported_prediction_formats(self) -> List[PredictionFormats]:
r"""
Return the supported formats for predictions
"""
return self._supported_prediction_sources
def save_intermediate_evaluations(
self,
evaluation_name: str,
enunumerate_id: int,
doc_id: str,
evaluations: List[UnitEvaluationType],
) -> Optional[Path]:
r"""
Utility method to save intermediate evaluation results
Return immediatelly if the intermediate_evaluation_path is not set
It returns the file Path with the intermediate results or None
"""
if self._intermediate_evaluations_path:
return None
evals = [ev.model_dump() for ev in evaluations]
evaluation_filename = f"{evaluation_name}_{enunumerate_id:05d}_{doc_id}.json"
evaluation_fn = self._intermediate_evaluations_path / evaluation_filename # type: ignore
_log.info("Saving intermediate evaluations: %s", evaluation_fn)
with open(evaluation_fn, "w") as fd:
json.dump(evals, fd)
return evaluation_fn