Skip to content

Commit b39f2e7

Browse files
feat: integrate OCR visualization (#121)
Signed-off-by: samiullahchattha <[email protected]> Co-authored-by: samiullahchattha <[email protected]>
1 parent c4e7de0 commit b39f2e7

File tree

2 files changed

+15
-40
lines changed

2 files changed

+15
-40
lines changed

docling_eval/cli/main.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
from docling_eval.evaluators.ocr_evaluator import (
6666
OcrDatasetEvaluationResult,
6767
OCREvaluator,
68+
OCRVisualizer,
6869
)
6970
from docling_eval.evaluators.readingorder_evaluator import (
7071
DatasetReadingOrderEvaluation,
@@ -820,8 +821,18 @@ def visualize(
820821
fd.write(f"F1 Score: {ocr_evaluation.f1_score:.2f}\n")
821822
fd.write(f"Recall: {ocr_evaluation.recall:.2f}\n")
822823
fd.write(f"Precision: {ocr_evaluation.precision:.2f}\n")
824+
825+
_log.info(f"OCR evaluation stats saved to {log_filename}")
826+
827+
ocr_visualizer = OCRVisualizer()
828+
ocr_visualizer(
829+
dataset_path=idir,
830+
ocr_evaluation_report_path=metrics_filename,
831+
output_directory=odir,
832+
data_split_name=split,
833+
)
823834
except Exception as e:
824-
_log.error(f"Error processing markdown text evaluation: {str(e)}")
835+
_log.error(f"Error processing OCR evaluation: {str(e)}")
825836

826837
else:
827838
_log.error(f"Unsupported modality for visualization: {modality}")

docling_eval/evaluators/ocr_evaluator.py

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,6 @@ def __call__(
248248
)
249249
visualizations_output_path.mkdir(parents=True, exist_ok=True)
250250

251-
document_evaluations_map: Dict[str, DocumentEvaluationEntry] = {}
252-
if ocr_evaluation_report_path and ocr_evaluation_report_path.exists():
253-
with open(ocr_evaluation_report_path, "r") as report_file:
254-
report_content: Dict[str, Any] = json.load(report_file)
255-
for eval_item_data in report_content.get("evaluations", []):
256-
try:
257-
doc_entry = DocumentEvaluationEntry.model_validate(
258-
eval_item_data
259-
)
260-
document_evaluations_map[doc_entry.doc_id] = doc_entry
261-
except Exception as e_parse:
262-
_log.warning(
263-
f"Failed to parse document evaluation item: {eval_item_data}. Error: {e_parse}"
264-
)
265-
266251
path_to_parquet_files: str = str(dataset_path / data_split_name / "*.parquet")
267252
hf_dataset: Dataset = load_dataset(
268253
"parquet", data_files={data_split_name: path_to_parquet_files}
@@ -283,20 +268,6 @@ def __call__(
283268
BenchMarkColumns.GROUNDTRUTH_PAGE_IMAGES
284269
)
285270

286-
page_image_bytes_list: List[Dict[str, bytes]] = []
287-
if isinstance(page_images_data, list) and page_images_data:
288-
if (
289-
isinstance(page_images_data[0], dict)
290-
and "bytes" in page_images_data[0]
291-
):
292-
page_image_bytes_list = page_images_data
293-
294-
if (
295-
ocr_evaluation_report_path
296-
and doc_id_val not in document_evaluations_map
297-
):
298-
continue
299-
300271
ground_truth_segmented_pages: Dict[int, SegmentedPage] = {}
301272
prediction_segmented_pages: Dict[int, SegmentedPage] = {}
302273

@@ -316,16 +287,9 @@ def __call__(
316287
if parsed_pred_pages:
317288
prediction_segmented_pages = parsed_pred_pages
318289

319-
if not page_image_bytes_list:
320-
_log.warning(
321-
f"No page images found for document {doc_id_val}. Skipping visualization."
322-
)
323-
continue
324-
325-
image_raw_bytes: bytes = page_image_bytes_list[0]["bytes"]
326-
base_image: Image.Image = Image.open(BytesIO(image_raw_bytes)).convert(
327-
"RGB"
328-
)
290+
base_image: Image.Image = page_images_data[0]
291+
if base_image.mode != "RGB":
292+
base_image = base_image.convert("RGB")
329293

330294
comparison_image: Image.Image = self._render_ocr_comparison_on_image(
331295
doc_id_val,

0 commit comments

Comments
 (0)