Skip to content
2 changes: 2 additions & 0 deletions docling_eval/datamodels/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class BenchMarkNames(str, Enum):
DOCLING_DPBENCH = "DoclingDPBench"
OMNIDOCBENCH = "OmniDocBench"
WORDSCAPE = "WordScape"
CANVA_A = "canva_a"
CANVA_B = "canva_b"

# Layout
PUBLAYNET = "PubLayNet"
Expand Down
1 change: 1 addition & 0 deletions docling_eval/evaluators/pixel/pixel_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class MultiLabelMatrixEvaluation(BaseModel):
class PagePixelLayoutEvaluation(BaseModel):
doc_id: str
page_no: int
num_pixels: int
matrix_evaluation: MultiLabelMatrixEvaluation


Expand Down
45 changes: 28 additions & 17 deletions docling_eval/evaluators/pixel_layout_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def __call__(

# Compute confusion matrices
pages_confusion_matrices: Dict[int, np.ndarray]
pages_confusion_matrices, num_pixels = (
pages_pixels: Dict[int, int]
pages_confusion_matrices, doc_num_pixels, pages_pixels = (
self._compute_document_confusion_matrix(true_doc, pred_doc)
)

Expand All @@ -263,6 +264,7 @@ def __call__(
page_evaluation = PagePixelLayoutEvaluation(
doc_id=doc_id,
page_no=page_no,
num_pixels=pages_pixels[page_no],
matrix_evaluation=page_matrix_evaluation,
)
doc_page_id = f"{doc_id}-{page_no}"
Expand All @@ -276,7 +278,7 @@ def __call__(
page_matrix_evaluation.collapsed.agg_metrics.classes_f1_mean
)

ds_num_pixels += num_pixels
ds_num_pixels += doc_num_pixels

# Compute metrics for the dataset and each document
ds_matrix_evaluation: MultiLabelMatrixEvaluation = self._mlcm.compute_metrics(
Expand All @@ -287,7 +289,7 @@ def __call__(
ds_evaluation = DatasetPixelLayoutEvaluation(
layout_model_name=self._layout_model_name,
num_pages=len(all_pages_evaluations),
num_pixels=num_pixels,
num_pixels=ds_num_pixels,
rejected_samples=rejected_samples,
matrix_evaluation=ds_matrix_evaluation,
page_evaluations=all_pages_evaluations,
Expand Down Expand Up @@ -386,7 +388,8 @@ def _compute_document_confusion_matrix(
pred_doc: DoclingDocument,
) -> Tuple[
Dict[int, np.ndarray], # page_no -> page confusion matrix
int, # num_pixels
int, # document num_pixels
Dict[int, int], # page_no -> page num_pixels
]:
r"""
Compute the confusion matrix for the given documents.
Expand All @@ -403,10 +406,11 @@ def _compute_document_confusion_matrix(
_log.debug(f"GT pages: {sorted(gt_pages)}, Pred pages: {sorted(pred_pages)}")

matrix_categories_ids: List[int] = list(self._matrix_id_to_name.keys())
num_categories = len(matrix_categories_ids)
off_diagonal_cells = num_categories * num_categories - num_categories
page_confusion_matrices: Dict[int, np.ndarray] = {}
num_pixels = 0
page_confusion_matrices: Dict[int, np.ndarray] = (
{}
) # page_no -> page confusion_matrix
all_pages_pixels: Dict[int, int] = {} # page_no -> page num_pixels
doc_pixels = 0

for page_no in sorted(gt_pages):
page_size = true_doc.pages[page_no].size
Expand Down Expand Up @@ -440,22 +444,29 @@ def _compute_document_confusion_matrix(
page_confusion_matrix = self._mlcm.generate_confusion_matrix(
gt_binary, preds_binary, matrix_categories_ids
)
num_pixels += pg_width * pg_height
page_pixels = pg_width * pg_height
doc_pixels += page_pixels
all_pages_pixels[page_no] = page_pixels
page_confusion_matrices[page_no] = page_confusion_matrix
else:
# No prediction data for this page
if (
self._missing_prediction_strategy
== MissingPredictionStrategy.PENALIZE
):
# Create a penalty confusion matrix by distributing all pixels outside of diagonal
image_pixels = pg_width * pg_height
penalty_value = image_pixels / off_diagonal_cells
page_confusion_matrix = penalty_value * (
np.ones((num_categories, num_categories))
- np.eye(num_categories)
gt_binary = self._mlcm.make_binary_representation(
pg_width, pg_height, gt_layouts
)
num_pixels += image_pixels

# Make an all-one binary representation for the prediction and evaluate as usual
preds_binary = np.ones((pg_height, pg_width), dtype=np.uint64)
page_confusion_matrix = self._mlcm.generate_confusion_matrix(
gt_binary, preds_binary, matrix_categories_ids
)

page_pixels = pg_width * pg_height
doc_pixels += page_pixels
all_pages_pixels[page_no] = page_pixels
page_confusion_matrices[page_no] = page_confusion_matrix
elif (
self._missing_prediction_strategy
Expand All @@ -467,7 +478,7 @@ def _compute_document_confusion_matrix(
raise ValueError(
f"Unknown missing prediction strategy: {self._missing_prediction_strategy}"
)
return page_confusion_matrices, num_pixels
return page_confusion_matrices, doc_pixels, all_pages_pixels

def _get_page_layout_resolution(
self,
Expand Down
Loading