Skip to content

Commit 629a451

Browse files
authored
feat: Layout evaluation fixes, mode control and cleanup (#133)
* Misc fixes Signed-off-by: Christoph Auer <[email protected]> * Make DatasetRecord tolerant to old parquet files Signed-off-by: Christoph Auer <[email protected]> * Make DatasetRecord tolerant to old parquet files (2) Signed-off-by: Christoph Auer <[email protected]> * Fix docvqa test, more cleanup Signed-off-by: Christoph Auer <[email protected]> * Important fixes for layout mAP computation Signed-off-by: Christoph Auer <[email protected]> * Adding modes for missing_prediction_strategy and label_filtering_strategy Signed-off-by: Christoph Auer <[email protected]> * Fixes for mismatched docs Signed-off-by: Christoph Auer <[email protected]> * Add F1 no_picture metrics to layout evaluator Signed-off-by: Christoph Auer <[email protected]> * Fixed commands on all READMEs Signed-off-by: Christoph Auer <[email protected]> * Remove extract_images ambiguity, use utility and fix errors on visualizer Signed-off-by: Christoph Auer <[email protected]> * Upgrade to latest docling_core Signed-off-by: Christoph Auer <[email protected]> * Fix ocrmac dep, upgrade uv.lock Signed-off-by: Christoph Auer <[email protected]> * Fix for tableformer provider Signed-off-by: Christoph Auer <[email protected]> * Remove code redundancy Signed-off-by: Christoph Auer <[email protected]> --------- Signed-off-by: Christoph Auer <[email protected]>
1 parent 54f7c81 commit 629a451

20 files changed

+2091
-987
lines changed

docling_eval/cli/main.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@
5757
from docling_eval.evaluators.doc_structure_evaluator import DocStructureEvaluator
5858
from docling_eval.evaluators.layout_evaluator import (
5959
DatasetLayoutEvaluation,
60+
LabelFilteringStrategy,
6061
LayoutEvaluator,
62+
MissingPredictionStrategy,
6163
)
6264
from docling_eval.evaluators.markdown_text_evaluator import (
6365
DatasetMarkdownEvaluation,
@@ -488,7 +490,10 @@ def evaluate(
488490
json.dump(evaluation.model_dump(), fd, indent=2, sort_keys=True)
489491

490492
elif modality == EvaluationModality.LAYOUT:
491-
layout_evaluator = LayoutEvaluator()
493+
layout_evaluator = LayoutEvaluator(
494+
# missing_prediction_strategy=MissingPredictionStrategy.PENALIZE,
495+
# label_filtering_strategy=LabelFilteringStrategy.INTERSECTION,
496+
)
492497
evaluation = layout_evaluator( # type: ignore
493498
idir,
494499
split=split,

docling_eval/datamodels/dataset_record.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, model_validator
1515

1616
from docling_eval.datamodels.types import EvaluationModality, PredictionFormats
17+
from docling_eval.utils.utils import extract_images
1718

1819
seg_adapter = TypeAdapter(Dict[int, SegmentedPage])
1920

@@ -77,25 +78,14 @@ def _extract_images(
7778
pictures_field_prefix: str,
7879
pages_field_prefix: str,
7980
):
80-
pictures = []
81-
page_images = []
82-
83-
# Save page images
84-
for img_no, picture in enumerate(document.pictures):
85-
if picture.image is not None:
86-
# img = picture.image.pil_image
87-
# pictures.append(to_pil(picture.image.uri))
88-
pictures.append(picture.image.pil_image)
89-
picture.image.uri = Path(f"{pictures_field_prefix}/{img_no}")
90-
91-
# Save page images
92-
for page_no, page in document.pages.items():
93-
if page.image is not None:
94-
# img = page.image.pil_image
95-
# img.show()
96-
page_images.append(page.image.pil_image)
97-
page.image.uri = Path(f"{pages_field_prefix}/{page_no}")
98-
81+
"""
82+
Extract images using the global utility implementation.
83+
"""
84+
_, pictures, page_images = extract_images(
85+
document=document,
86+
pictures_column=pictures_field_prefix,
87+
page_images_column=pages_field_prefix,
88+
)
9989
return pictures, page_images
10090

10191
def as_record_dict(self):
@@ -175,10 +165,17 @@ def validate_record_dict(cls, data: dict):
175165
data[gt_pic_img_alias][ix] = Features_Image().decode_example(item)
176166

177167
gt_binary = cls.get_field_alias("original")
178-
if gt_binary in data and isinstance(data[gt_binary], bytes):
179-
data[gt_binary] = DocumentStream(
180-
name="file", stream=BytesIO(data[gt_binary])
181-
)
168+
if gt_binary in data:
169+
if isinstance(data[gt_binary], bytes):
170+
data[gt_binary] = DocumentStream(
171+
name="file", stream=BytesIO(data[gt_binary])
172+
)
173+
elif isinstance(data[gt_binary], PIL.Image.Image):
174+
# Handle PIL Images by converting to bytes
175+
img_buffer = BytesIO()
176+
data[gt_binary].save(img_buffer, format="PNG")
177+
img_buffer.seek(0)
178+
data[gt_binary] = DocumentStream(name="image.png", stream=img_buffer)
182179

183180
return data
184181

@@ -196,7 +193,9 @@ class DatasetRecordWithPrediction(DatasetRecord):
196193
)
197194

198195
original_prediction: Optional[str] = None
199-
prediction_format: PredictionFormats # some enum type
196+
prediction_format: PredictionFormats = (
197+
PredictionFormats.DOCLING_DOCUMENT
198+
) # default for old files
200199
prediction_timings: Optional[Dict] = Field(alias="prediction_timings", default=None)
201200

202201
predicted_page_images: List[PIL.Image.Image] = Field(

0 commit comments

Comments
 (0)