Skip to content

Commit d4a0ef6

Browse files
authored
perf: consistenty and perf improvements (#170)
* Add validation-only mode to cvat_evaluation_pipeline and make output consistent Signed-off-by: Christoph Auer <[email protected]> * Revise CVAT XML parsing for efficiency Signed-off-by: Christoph Auer <[email protected]> --------- Signed-off-by: Christoph Auer <[email protected]>
1 parent 74e7b3e commit d4a0ef6

File tree

9 files changed

+338
-79
lines changed

9 files changed

+338
-79
lines changed

docling_eval/campaign_tools/cvat_evaluation_pipeline.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@
3636
find_xml_files_by_pattern,
3737
parse_cvat_folder,
3838
)
39+
from docling_eval.cvat_tools.models import (
40+
CVATValidationError,
41+
CVATValidationReport,
42+
CVATValidationRunReport,
43+
ValidationSeverity,
44+
)
45+
from docling_eval.cvat_tools.parser import get_all_images_from_cvat_xml, parse_cvat_file
46+
from docling_eval.cvat_tools.validator import Validator, validate_cvat_sample
3947
from docling_eval.datamodels.types import (
4048
BenchMarkNames,
4149
EvaluationModality,
@@ -139,8 +147,6 @@ def _convert_cvat_set_to_json(
139147
Returns:
140148
List of created JSON file paths
141149
"""
142-
from docling_eval.cvat_tools.models import CVATValidationRunReport
143-
144150
folder_structure = self._load_folder_structure(xml_pattern)
145151

146152
if output_json_dir.exists():
@@ -280,6 +286,78 @@ def merge_annotation_xmls(
280286

281287
return gt_xml, pred_xml
282288

289+
def regenerate_validation_reports_from_merged(
290+
self,
291+
merged_dir: Optional[Path] = None,
292+
) -> None:
293+
"""Rebuild validation reports using pre-merged annotation XMLs."""
294+
295+
if merged_dir is None:
296+
merged_dir = self.output_dir / "merged_xml"
297+
298+
set_to_filename = {
299+
"set_A": merged_dir / "combined_set_A.xml",
300+
"set_B": merged_dir / "combined_set_B.xml",
301+
}
302+
303+
validator = Validator()
304+
305+
for set_label, xml_path in set_to_filename.items():
306+
if not xml_path.exists():
307+
raise FileNotFoundError(
308+
f"Missing merged annotations for {set_label}: {xml_path}"
309+
)
310+
311+
parsed_file = parse_cvat_file(xml_path)
312+
image_names = sorted(parsed_file.image_names)
313+
314+
reports: list[CVATValidationReport] = []
315+
for image_name in image_names:
316+
try:
317+
validated = validate_cvat_sample(
318+
xml_path,
319+
image_name,
320+
validator=validator,
321+
parsed_file=parsed_file,
322+
)
323+
reports.append(validated.report)
324+
except Exception as exc: # noqa: BLE001
325+
_log.error(
326+
"Validation failed for %s (%s): %s",
327+
set_label,
328+
image_name,
329+
exc,
330+
)
331+
reports.append(
332+
CVATValidationReport(
333+
sample_name=image_name,
334+
errors=[
335+
CVATValidationError(
336+
error_type="processing_error",
337+
message=f"Validation failed: {exc}",
338+
severity=ValidationSeverity.FATAL,
339+
)
340+
],
341+
)
342+
)
343+
344+
run_report = CVATValidationRunReport(
345+
samples=reports,
346+
statistics=CVATValidationRunReport.compute_statistics(reports),
347+
)
348+
349+
output_path = self.output_dir / f"validation_report_{set_label}.json"
350+
output_path.write_text(
351+
run_report.model_dump_json(indent=2),
352+
encoding="utf-8",
353+
)
354+
_log.info(
355+
"✓ Regenerated %s validation report with %d sample(s): %s",
356+
set_label,
357+
len(reports),
358+
output_path,
359+
)
360+
283361
def create_ground_truth_dataset(self) -> None:
284362
"""
285363
Step 1: Create ground truth dataset from CVAT folder exports.

docling_eval/campaign_tools/evaluate_cvat_tables.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from docling_eval.cvat_tools.document import DocumentStructure
1515
from docling_eval.cvat_tools.models import CVATElement, TableStructLabel
16+
from docling_eval.cvat_tools.parser import ParsedCVATFile, parse_cvat_file
1617

1718
DEFAULT_TABLE_PAIR_IOU: float = 0.20
1819
DEFAULT_CONTAINMENT_THRESH: float = 0.50
@@ -296,10 +297,19 @@ def evaluate_image(
296297
containment_thresh: float,
297298
table_pair_iou: float,
298299
sem_match_iou: float,
300+
*,
301+
parsed_set_a: Optional[ParsedCVATFile] = None,
302+
parsed_set_b: Optional[ParsedCVATFile] = None,
299303
) -> Optional[ImageTablesEvaluation]:
300304
try:
301-
doc_a = DocumentStructure.from_cvat_xml(set_a_xml, image_name)
302-
doc_b = DocumentStructure.from_cvat_xml(set_b_xml, image_name)
305+
parsed_a = (
306+
parsed_set_a if parsed_set_a is not None else parse_cvat_file(set_a_xml)
307+
)
308+
parsed_b = (
309+
parsed_set_b if parsed_set_b is not None else parse_cvat_file(set_b_xml)
310+
)
311+
doc_a = DocumentStructure.from_parsed_image(parsed_a.get_image(image_name))
312+
doc_b = DocumentStructure.from_parsed_image(parsed_b.get_image(image_name))
303313
except Exception:
304314
return None
305315

@@ -352,6 +362,8 @@ def evaluate_tables(
352362
Returns the full evaluation model (no file I/O, no Typer types).
353363
"""
354364
imgs = sorted(set(list_images_in_xml(set_a)) & set(list_images_in_xml(set_b)))
365+
parsed_set_a = parse_cvat_file(set_a)
366+
parsed_set_b = parse_cvat_file(set_b)
355367
evals: list[ImageTablesEvaluation] = []
356368
for name in imgs:
357369
res = evaluate_image(
@@ -361,6 +373,8 @@ def evaluate_tables(
361373
containment_thresh=containment_thresh,
362374
table_pair_iou=table_pair_iou,
363375
sem_match_iou=sem_match_iou,
376+
parsed_set_a=parsed_set_a,
377+
parsed_set_b=parsed_set_b,
364378
)
365379
if res is not None:
366380
evals.append(res)

docling_eval/campaign_tools/run_cvat_deliveries_pipeline.py

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class ExecutionPlan:
2727
run_merge: bool
2828
run_dataset_creation: bool
2929
run_evaluation: bool
30+
run_validation_reports: bool
3031
force_rerun: bool
3132
modalities: List[str]
3233

@@ -35,6 +36,7 @@ def from_args(
3536
cls,
3637
merge_only: bool,
3738
eval_only: bool,
39+
validation_only: bool,
3840
force: bool,
3941
modalities: Optional[Sequence[str]],
4042
) -> ExecutionPlan:
@@ -52,13 +54,40 @@ def from_args(
5254
Raises:
5355
ValueError: If incompatible flags are combined
5456
"""
55-
if merge_only and eval_only:
56-
raise ValueError("Cannot combine --merge-only and --eval-only")
57+
exclusive_flags = [
58+
flag for flag in (merge_only, eval_only, validation_only) if flag
59+
]
60+
if len(exclusive_flags) > 1:
61+
raise ValueError(
62+
"Cannot combine --merge-only, --eval-only, or --validation-only"
63+
)
64+
65+
if merge_only:
66+
run_merge = True
67+
run_dataset_creation = False
68+
run_evaluation = False
69+
run_validation_reports = False
70+
elif eval_only:
71+
run_merge = False
72+
run_dataset_creation = False
73+
run_evaluation = True
74+
run_validation_reports = False
75+
elif validation_only:
76+
run_merge = False
77+
run_dataset_creation = False
78+
run_evaluation = False
79+
run_validation_reports = True
80+
else:
81+
run_merge = True
82+
run_dataset_creation = True
83+
run_evaluation = True
84+
run_validation_reports = False
5785

5886
return cls(
59-
run_merge=merge_only or not eval_only,
60-
run_dataset_creation=not merge_only and not eval_only,
61-
run_evaluation=not merge_only,
87+
run_merge=run_merge,
88+
run_dataset_creation=run_dataset_creation,
89+
run_evaluation=run_evaluation,
90+
run_validation_reports=run_validation_reports,
6291
force_rerun=force,
6392
modalities=(
6493
list(modalities)
@@ -76,6 +105,11 @@ def should_skip_job(self, job: SubmissionSubsetJob) -> tuple[bool, str]:
76105
Returns:
77106
Tuple of (should_skip, reason_message)
78107
"""
108+
if self.run_validation_reports and not (
109+
self.run_merge or self.run_dataset_creation or self.run_evaluation
110+
):
111+
return False, ""
112+
79113
if self.force_rerun:
80114
return False, ""
81115

@@ -104,11 +138,19 @@ def get_description(self) -> str:
104138
"""Get human-readable description of what will be executed."""
105139
if self.run_merge and not (self.run_dataset_creation or self.run_evaluation):
106140
return "merge annotations for"
141+
if self.run_validation_reports and not (
142+
self.run_merge or self.run_dataset_creation or self.run_evaluation
143+
):
144+
return "regenerate validation reports for"
107145
elif not self.run_dataset_creation and self.run_evaluation:
108146
return "run evaluation for"
109147
else:
110148
return "evaluate"
111149

150+
def should_aggregate_validation(self) -> bool:
151+
"""Determine if validation reports should be aggregated."""
152+
return self.run_evaluation or self.run_validation_reports
153+
112154

113155
@dataclass(frozen=True)
114156
class SubmissionSubsetJob:
@@ -173,6 +215,12 @@ def _execute_job(
173215
if not (plan.run_dataset_creation or plan.run_evaluation):
174216
return None
175217

218+
if plan.run_validation_reports:
219+
pipeline.regenerate_validation_reports_from_merged(
220+
merged_dir=job.get_merged_xml_dir(),
221+
)
222+
return None
223+
176224
# Stage 2: Create datasets
177225
if plan.run_dataset_creation:
178226
pipeline.create_ground_truth_dataset()
@@ -328,6 +376,7 @@ def run_jobs(
328376
force: bool = False,
329377
merge_only: bool = False,
330378
eval_only: bool = False,
379+
validation_only: bool = False,
331380
force_ocr: bool = False,
332381
ocr_scale: float = 1.0,
333382
storage_scale: float = 2.0,
@@ -341,6 +390,7 @@ def run_jobs(
341390
plan = ExecutionPlan.from_args(
342391
merge_only=merge_only,
343392
eval_only=eval_only,
393+
validation_only=validation_only,
344394
force=force,
345395
modalities=modalities,
346396
)
@@ -431,7 +481,7 @@ def run_jobs(
431481
_LOGGER.debug("Subset failure details", exc_info=True)
432482

433483
# Aggregate validation reports across successfully completed subsets only
434-
if plan.run_evaluation:
484+
if plan.should_aggregate_validation():
435485
if completed_jobs:
436486
_LOGGER.info(
437487
"Aggregating validation reports for submission %s (%d/%d subsets completed)",
@@ -599,6 +649,11 @@ def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace:
599649
action="store_true",
600650
help="Skip dataset creation and rerun only the evaluation stage.",
601651
)
652+
parser.add_argument(
653+
"--validation-only",
654+
action="store_true",
655+
help="Regenerate validation reports using existing merged annotations.",
656+
)
602657
parser.add_argument(
603658
"--force-ocr",
604659
action="store_true",
@@ -641,6 +696,7 @@ def main(argv: Optional[Sequence[str]] = None) -> None:
641696
force=args.force,
642697
merge_only=args.merge_only,
643698
eval_only=args.eval_only,
699+
validation_only=args.validation_only,
644700
force_ocr=args.force_ocr,
645701
ocr_scale=args.ocr_scale,
646702
storage_scale=args.storage_scale,

docling_eval/cli/cvat_validator_cli.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,35 @@
11
import argparse
22
import json
33
from pathlib import Path
4-
from typing import List, Tuple
4+
from typing import Dict, List, Tuple
55

66
from ..cvat_tools.models import CVATValidationReport, CVATValidationRunReport
7-
from ..cvat_tools.parser import find_samples_in_directory, get_all_images_from_cvat_xml
7+
from ..cvat_tools.parser import (
8+
ParsedCVATFile,
9+
find_samples_in_directory,
10+
get_all_images_from_cvat_xml,
11+
parse_cvat_file,
12+
)
813
from ..cvat_tools.validator import Validator, validate_cvat_sample
914

1015

1116
def process_samples(samples: List[Tuple[str, Path, str]]) -> CVATValidationRunReport:
1217
"""Process a list of samples and return a validation report."""
1318
validator = Validator()
1419
reports: List[CVATValidationReport] = []
20+
parsed_cache: Dict[Path, ParsedCVATFile] = {}
1521

1622
for sample_name, xml_path, image_filename in samples:
1723
try:
24+
parsed_file = parsed_cache.get(xml_path)
25+
if parsed_file is None:
26+
parsed_file = parse_cvat_file(xml_path)
27+
parsed_cache[xml_path] = parsed_file
1828
validated = validate_cvat_sample(
19-
xml_path, image_filename, validator=validator
29+
xml_path,
30+
image_filename,
31+
validator=validator,
32+
parsed_file=parsed_file,
2033
)
2134
if validated.report.errors:
2235
reports.append(validated.report)

docling_eval/cvat_tools/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@
3737
CVATValidationRunReport,
3838
ValidationSeverity,
3939
)
40-
from .parser import find_samples_in_directory
40+
from .parser import (
41+
ParsedCVATFile,
42+
ParsedCVATImage,
43+
find_samples_in_directory,
44+
get_all_images_from_cvat_xml,
45+
parse_cvat_file,
46+
)
4147
from .path_mappings import (
4248
PathMappings,
4349
associate_paths_to_containers,
@@ -89,6 +95,10 @@
8995
"ValidationSeverity",
9096
# Parser
9197
"find_samples_in_directory",
98+
"get_all_images_from_cvat_xml",
99+
"parse_cvat_file",
100+
"ParsedCVATFile",
101+
"ParsedCVATImage",
92102
# Tree
93103
"TreeNode",
94104
"build_containment_tree",

0 commit comments

Comments
 (0)