diff --git a/docling_eval/campaign_tools/README_cvat_evaluation_pipeline.md b/docling_eval/campaign_tools/README_cvat_evaluation_pipeline.md
index 6539a226..1ca02129 100644
--- a/docling_eval/campaign_tools/README_cvat_evaluation_pipeline.md
+++ b/docling_eval/campaign_tools/README_cvat_evaluation_pipeline.md
@@ -38,6 +38,7 @@ python cvat_evaluation_pipeline.py <images_dir> <output_dir> [OPTIONS]
 - `--pred-xml PATH`: Path to prediction CVAT XML file
 - `--step {gt,pred,eval,full}`: Pipeline step to run (default: full)
 - `--modalities {layout,document_structure}`: Evaluation modalities to run (default: both)
+- `--strict`: Strict mode - require all images to have annotations in XML files (default: allow partial annotation batches)
 - `--verbose, -v`: Enable verbose logging
 
 ## Examples
@@ -104,6 +105,25 @@ python cvat_evaluation_pipeline.py \
     --modalities document_structure
 ```
 
+### 5. Strict Mode
+
+By default, the pipeline allows partial annotation batches where not all images need to have annotations in the XML file. This is useful when you have a large set of images but only a subset has been annotated.
+
+To enforce that ALL images must have annotations, use the `--strict` flag:
+
+```bash
+python cvat_evaluation_pipeline.py \
+    /path/to/images \
+    /path/to/output \
+    --gt-xml /path/to/complete_annotations.xml \
+    --strict
+```
+
+In strict mode:
+- The pipeline will fail with an error if any image lacks annotations
+- Useful for validating complete annotation batches
+- Helps catch missing annotations early in the process
+
 ## Output Structure
 
 The pipeline creates the following directory structure in the output directory:
diff --git a/docling_eval/campaign_tools/combine_cvat_evaluations.py b/docling_eval/campaign_tools/combine_cvat_evaluations.py
index c6dd4034..c72154b2 100644
--- a/docling_eval/campaign_tools/combine_cvat_evaluations.py
+++ b/docling_eval/campaign_tools/combine_cvat_evaluations.py
@@ -8,9 +8,10 @@
 * evaluation_CVAT_layout.json  - layout-level metrics (`evaluations_per_image`)
 * evaluation_CVAT_document_structure.json - document-structure metrics
   (`evaluations`)
+* evaluation_CVAT_key_value.json - key-value extraction metrics (`evaluations`)
 * file_name_user_id.csv - staff self-confidence / provenance table
 
-The script matches the three sources by a **document id** that is derived from
+The script matches the four sources by a **document id** that is derived from
 an image / doc name **without the file-extension** and we produde single table.
 
 Usage
@@ -18,6 +19,7 @@
     python combine_cvat_evaluations.py \
         --layout_json evaluation_results/evaluation_CVAT_layout.json \
         --docstruct_json evaluation_results/evaluation_CVAT_document_structure.json \
+        --keyvalue_json evaluation_results/evaluation_CVAT_key_value.json \
         --user_csv file_name_user_id.csv \
         --out combined_evaluation.xlsx
 
@@ -45,6 +47,20 @@ def _to_doc_id(path_like: str) -> str:
     return stem
 
 
+def load_tables(json_path: Path) -> pd.DataFrame:
+    """Load evaluation_CVAT_tables.json and return a DataFrame."""
+    with open(json_path, "r", encoding="utf-8") as fh:
+        data = json.load(fh)
+    if "evaluations" not in data:
+        raise KeyError(
+            "The supplied tables evaluation JSON does not contain the 'evaluations' field."
+        )
+    df = pd.DataFrame(data["evaluations"])
+    # The evaluator writes consistent doc_id (image stem). No further mapping needed.
+    df["doc_id"] = df["doc_id"].astype(str)
+    return df
+
+
 def load_layout(json_path: Path) -> pd.DataFrame:
     """Load *evaluation_CVAT_layout.json* and return a DataFrame."""
     with open(json_path, "r", encoding="utf-8") as fh:
@@ -103,6 +119,28 @@ def load_doc_structure(json_path: Path) -> pd.DataFrame:
     return df
 
 
+def load_key_value(json_path: Path) -> pd.DataFrame:
+    """Load *evaluation_CVAT_key_value.json* and return a DataFrame."""
+    with open(json_path, "r", encoding="utf-8") as fh:
+        data = json.load(fh)
+
+    if "evaluations" not in data:
+        raise KeyError(
+            "The supplied key-value evaluation JSON does not contain the "
+            "'evaluations' field."
+        )
+
+    # Convert the evaluations list to a DataFrame
+    # The evaluations is a list of KeyValueEvaluation objects
+    evaluations_list = []
+    for eval_data in data["evaluations"]:
+        evaluations_list.append(eval_data)
+
+    df = pd.DataFrame(evaluations_list)
+
+    return df
+
+
 def load_user_table(csv_path: Path) -> pd.DataFrame:
     """Load *file_name_user_id.csv* (staff provenance) and return a DataFrame."""
     df = pd.read_csv(csv_path)
@@ -130,15 +168,53 @@ def load_user_table(csv_path: Path) -> pd.DataFrame:
 def merge_tables(
     layout_df: pd.DataFrame,
     doc_df: pd.DataFrame,
+    keyvalue_df: Optional[pd.DataFrame] = None,
     user_df: Optional[pd.DataFrame] = None,
+    tables_df: Optional[pd.DataFrame] = None,
 ) -> pd.DataFrame:
-    """
-    Merge layout, document structure, and optionally user dataframes.
-    If user_df is None, skip merging user columns.
-    """
     df = layout_df.merge(
         doc_df[["doc_id", "edit_distance_struct"]], on="doc_id", how="outer"
     )
+
+    if keyvalue_df is not None:
+        df = df.merge(
+            keyvalue_df[
+                [
+                    "doc_id",
+                    "entity_f1",
+                    "relation_f1",
+                    "num_entity_diff",
+                    "num_entity_diff_normalized",
+                    "num_link_diff",
+                    "num_link_diff_normalized",
+                ]
+            ],
+            on="doc_id",
+            how="left",
+        )
+
+    if tables_df is not None:
+        df = df.merge(
+            tables_df[
+                [
+                    "doc_id",
+                    "row_count_abs_diff_sum",
+                    "col_count_abs_diff_sum",
+                    "merge_count_abs_diff_sum",
+                    "sem_body_f1",
+                    "sem_row_section_f1",
+                    "sem_row_header_f1",
+                    "sem_col_header_f1",
+                    "tables_unmatched",
+                    "table_pairs",
+                    "orphan_table_annotation_A",
+                    "orphan_table_annotation_B",
+                ]
+            ],
+            on="doc_id",
+            how="left",
+        )
+
     if user_df is not None:
         df = df.merge(
             user_df[["doc_id", "annotator_id", "self_confidence", "image_name"]],
@@ -146,47 +222,67 @@ def merge_tables(
             how="left",
             suffixes=("", "_user"),
         )
-        # the self_confidence column numeric
         df["self_confidence"] = pd.to_numeric(df["self_confidence"], errors="coerce")
-        # confidence difference between the annotators
         df["diff_self_confidence"] = df.groupby("doc_id")["self_confidence"].transform(
             lambda x: x.max() - x.min()
         )
 
-        # to check the self-confidence values
-        avg_self_confidence = df["self_confidence"].mean()
-        std_self_confidence = df["self_confidence"].std()
-        quantiles = df["self_confidence"].quantile([0.01, 0.5, 0.99])
-
-        print(f"Average self-confidence: {avg_self_confidence:.4f}")
-        print(f"Standard deviation: {std_self_confidence:.4f}")
-        print(
-            f"Quantiles (1%, 50%, 99%): {quantiles[0.01]:.4f}, {quantiles[0.5]:.4f}, {quantiles[0.99]:.4f}"
-        )
-
-    # we can re-order the most relevant columns towards the front.
     preferred_order = [
         "doc_id",
-        "image_name",  # from user table (includes extension)
-        "avg_weighted_label_matched_iou_50",  # "value" in layout JSON file
-        "segmentation_f1",  # may or may not exist, depending on evaluation config
+        "image_name",
+        "avg_weighted_label_matched_iou_50",
+        "segmentation_f1",
         "edit_distance_struct",
+        # table metrics (consolidated)
+        "row_count_abs_diff_sum",
+        "col_count_abs_diff_sum",
+        "merge_count_abs_diff_sum",
+        "sem_body_f1",
+        "sem_row_section_f1",
+        "sem_row_header_f1",
+        "sem_col_header_f1",
+        "tables_unmatched",
+        "table_pairs",
+        "orphan_table_annotation_A",
+        "orphan_table_annotation_B",
+        # key-values & misc
+        "entity_f1",
+        "relation_f1",
         "map_val",
         "annotator_id",
         "self_confidence",
     ]
-    ordered_cols = [c for c in preferred_order if c in df.columns] + [
+    ordered = [c for c in preferred_order if c in df.columns] + [
         c for c in df.columns if c not in preferred_order
     ]
-    df = df[ordered_cols]
+    df = df[ordered]
 
-    # filter columns to be present, and remove the rest
     filter_cols = [
         "doc_id",
         "segmentation_f1",
         "segmentation_f1_no_pictures",
         "avg_weighted_label_matched_iou_50",
         "edit_distance_struct",
+        # consolidated table metrics
+        "row_count_abs_diff_sum",
+        "col_count_abs_diff_sum",
+        "merge_count_abs_diff_sum",
+        "sem_body_f1",
+        "sem_row_section_f1",
+        "sem_row_header_f1",
+        "sem_col_header_f1",
+        "tables_unmatched",
+        "table_pairs",
+        "orphan_table_annotation_A",
+        "orphan_table_annotation_B",
+        # key-values
+        "entity_f1",
+        "relation_f1",
+        "num_entity_diff",
+        "num_entity_diff_normalized",
+        "num_link_diff",
+        "num_link_diff_normalized",
+        # existing counts
         "annotator_id",
         "self_confidence",
         "diff_self_confidence",
@@ -200,53 +296,68 @@ def merge_tables(
         "table_count_diff",
         "picture_count_diff",
     ]
-    df = df[[col for col in filter_cols if col in df.columns]]
-
+    df = df[[c for c in filter_cols if c in df.columns]]
     return df
 
 
 def _write_as_excel_table(df: pd.DataFrame, path: Path) -> None:
     """
-    Write *df* to *path* as an Excel **Table** and append five derived columns:
-
-    layout_different   → 1 if segmentation_f1_no_pictures < 0.9   else ""
-    structure_different→ 1 if edit_distance_struct      ≥ 10      else ""
-    tables_different   → 1 if table_count_diff          ≥ 1       else ""
-    pictures_different → 1 if picture_count_diff        ≥ 2       else ""
-    need_review        → SUM of the four flags above                (0-4)
+    Write *df* to *path* as an Excel **Table** and append derived columns:
+
+    layout_different            → 1 if segmentation or label-IoU is low
+    structure_different         → 1 if edit distance is high
+    tables_different            → 1 if table count differs
+    pictures_different          → 1 if picture count differs
+    key_values_different        → 1 if entity/relation F1 is low (and non-zero)
+    table_struct_different      → 1 if any table structure count diffs sum to > 0
+    table_semantic_different    → 1 if table_pairs > 0 and any semantic F1 < 0.9
+    need_review                 → SUM of selected flags (kept as in your current sheet)
     """
+    from xlsxwriter.utility import xl_range
+
     df = df.copy()
 
-    # append empty placeholders – the formulas will fill them
     extra_cols = [
         "layout_different",
         "structure_different",
         "tables_different",
         "pictures_different",
+        "key_values_different",
+        "table_struct_different",
+        "table_semantic_different",
         "need_review",
+        "need_task2_review",
     ]
     for col in extra_cols:
-        df[col] = ""
+        if col not in df.columns:
+            df[col] = ""
 
     with pd.ExcelWriter(path, engine="xlsxwriter") as writer:
-        # 1️⃣  dump the data frame
         df.to_excel(writer, sheet_name="Evaluation", index=False)
 
-        # 2️⃣  grab handles we need
-        wb = writer.book
         ws = writer.sheets["Evaluation"]
-        n_rows, n_cols = df.shape  # includes derived columns
+        n_rows, n_cols = df.shape
 
-        # 3️⃣  build column specs with formulas for the derived columns
         column_settings = [{"header": h} for h in df.columns]
 
-        # Tune these thresholds to your liking:
         col_formula = {
-            "layout_different": '=IF(OR([@[segmentation_f1_no_pictures]]<0.9, [@[avg_weighted_label_matched_iou_50]]<0.9),1,"")',
+            "layout_different": '=IF(OR([@[segmentation_f1_no_pictures]]<0.9,[@[avg_weighted_label_matched_iou_50]]<0.9),1,"")',
             "structure_different": '=IF([@[edit_distance_struct]]>=10,1,"")',
             "tables_different": '=IF([@[table_count_diff]]>=1,1,"")',
             "pictures_different": '=IF([@[picture_count_diff]]>=2,1,"")',
+            "key_values_different": '=IF(OR([@[num_entity_diff]]>2,[@[num_link_diff]]>2,AND([@[entity_f1]]<>0,[@[entity_f1]]<0.95),AND([@[relation_f1]]<>0,[@[relation_f1]]<0.95)),1,"")',
+            # 1) any structure count diffs sum to > 0
+            "table_struct_different": (
+                "=IF(SUM([@[row_count_abs_diff_sum]],[@[col_count_abs_diff_sum]],"
+                '[@[merge_count_abs_diff_sum]])>0,1,"")'
+            ),
+            # 2) gate semantics on table_pairs > 0, then flag if any F1 < 0.9
+            "table_semantic_different": (
+                "=IF(AND([@[table_pairs]]>0,OR([@[sem_col_header_f1]]<0.9,"
+                '[@[sem_row_header_f1]]<0.9,[@[sem_row_section_f1]]<0.9,[@[sem_body_f1]]<0.9)),1,"")'
+            ),
             "need_review": "=SUM([@[layout_different]]:[@[pictures_different]])",
+            "need_task2_review": "=SUM([@[key_values_different]]:[@[table_semantic_different]],[@[tables_unmatched]])",
         }
 
         for spec in column_settings:
@@ -254,27 +365,24 @@ def _write_as_excel_table(df: pd.DataFrame, path: Path) -> None:
             if hdr in col_formula:
                 spec["formula"] = col_formula[hdr]
 
-        # 4️⃣  add the Excel Table (auto-filter, style, formulas copied down)
-        table_range = xl_range(0, 0, n_rows, n_cols - 1)  # zero-based, incl. header
+        table_range = xl_range(0, 0, n_rows, n_cols - 1)
         ws.add_table(
             table_range,
             {
                 "name": "EvaluationTbl",
                 "header_row": True,
                 "columns": column_settings,
-                # 👇 style must be a string such as "TableStyleMedium2"
                 "style": "TableStyleMedium2",
             },
         )
 
-        # 5️⃣  quality-of-life tweaks
         ws.freeze_panes(1, 0)
-        ws.autofit()
+        ws.autofit()  # type: ignore[attr-defined]
 
 
 def build_arg_parser() -> argparse.ArgumentParser:
     p = argparse.ArgumentParser(
-        description="Combine CVAT layout & document-structure evaluation JSONs "
+        description="Combine CVAT layout, document-structure, and key-value evaluation JSONs "
         "with the staff provenance CSV into a single spreadsheet.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
@@ -290,6 +398,12 @@ def build_arg_parser() -> argparse.ArgumentParser:
         default=Path("evaluation_results/evaluation_CVAT_document_structure.json"),
         help="Path to evaluation_CVAT_document_structure.json",
     )
+    p.add_argument(
+        "--keyvalue_json",
+        type=Path,
+        default=Path("evaluation_results/evaluation_CVAT_key_value.json"),
+        help="Path to evaluation_CVAT_key_value.json",
+    )
     p.add_argument(
         "--user_csv",
         type=Path,
@@ -312,15 +426,18 @@ def build_arg_parser() -> argparse.ArgumentParser:
 def combine_cvat_evaluations(
     layout_json: Path,
     docstruct_json: Path,
+    keyvalue_json: Optional[Path] = None,
     user_csv: Optional[Path] = None,
+    tables_json: Optional[Path] = None,
     out: Path = Path("combined_evaluation.xlsx"),
 ) -> pd.DataFrame:
     """
-    Combine CVAT layout & document-structure evaluation JSONs with the staff provenance CSV into a single spreadsheet.
+    Combine CVAT layout, document-structure, and key-value evaluation JSONs with the staff provenance CSV into a single spreadsheet.
 
     Args:
         layout_json: Path to evaluation_CVAT_layout.json
         docstruct_json: Path to evaluation_CVAT_document_structure.json
+        keyvalue_json: Optional path to evaluation_CVAT_key_value.json
         user_csv: Optional path to file_name_user_id.csv
         out: Output file path; extension decides format (.xlsx for Excel, otherwise CSV)
 
@@ -329,11 +446,16 @@ def combine_cvat_evaluations(
     """
     layout_df = load_layout(layout_json)
     doc_df = load_doc_structure(docstruct_json)
+    tables_df = load_tables(tables_json) if tables_json is not None else None
+
+    keyvalue_df: Optional[pd.DataFrame] = None
+    if keyvalue_json is not None:
+        keyvalue_df = load_key_value(keyvalue_json)
     user_df: Optional[pd.DataFrame] = None
     if user_csv is not None:
         user_df = load_user_table(user_csv)
 
-    combined_df = merge_tables(layout_df, doc_df, user_df)
+    combined_df = merge_tables(layout_df, doc_df, keyvalue_df, user_df, tables_df)
 
     if out.suffix.lower() == ".xlsx":
         # combined_df.to_excel(out, index=False)
@@ -350,6 +472,7 @@ def main() -> None:
     combine_cvat_evaluations(
         layout_json=args.layout_json,
         docstruct_json=args.docstruct_json,
+        keyvalue_json=args.keyvalue_json,
         user_csv=args.user_csv,
         out=args.out,
     )
diff --git a/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py b/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py
index 6d786277..8ee8b755 100644
--- a/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py
+++ b/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py
@@ -10,12 +10,13 @@
 This is useful for preparing large-scale annotation tasks for CVAT or similar tools.
 
 Usage:
-    uv run python scratches/scratch_46.py --input-directory <input_dir> --output-directory <output_dir> [--sliding-window <int>]
+    uv run python docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py batch-prepare --input-directory <input_dir> --output-directory <output_dir> [--sliding-window <int>] [--use-predictions/--no-use-predictions]
 
 Arguments:
     input_directory: Root directory containing subdirectories with files to process
     output_directory: Where to store the generated datasets (one subdir per input subdir)
     sliding_window: Number of pages per CVAT task (default: 1)
+    use_predictions: Whether to create prediction dataset and use predictions in CVAT (default: True)
 """
 
 from pathlib import Path
@@ -30,7 +31,10 @@
 
 
 def process_subdirectories(
-    input_directory: Path, output_directory: Path, sliding_window: int = 1
+    input_directory: Path,
+    output_directory: Path,
+    sliding_window: int = 1,
+    use_predictions: bool = True,
 ) -> None:
     """
     For each subdirectory in input_directory, create gt_dataset, eval_dataset, and cvat_dataset_preannotated
@@ -40,6 +44,7 @@ def process_subdirectories(
         input_directory: Root directory with subdirectories to process
         output_directory: Where to store generated datasets
         sliding_window: Number of pages per CVAT task (default: 1)
+        use_predictions: Whether to create prediction dataset and use predictions in CVAT
     """
     input_directory = input_directory.expanduser().resolve()
     output_directory = output_directory.expanduser().resolve()
@@ -71,26 +76,33 @@ def process_subdirectories(
         else:
             typer.echo(f"  GT dataset already exists, skipping.")
 
-        if not eval_dir.exists():
-            typer.echo(f"  Creating prediction dataset (Docling)...")
-            create_eval(
-                benchmark=BenchMarkNames.PLAIN_FILES,
-                output_dir=odir,
-                prediction_provider=PredictionProviderType.DOCLING,
-                do_visualization=True,
-                image_scale_factor=2.0,
-                do_table_structure=False,
-            )
+        if use_predictions:
+            if not eval_dir.exists():
+                typer.echo(f"  Creating prediction dataset (Docling)...")
+                create_eval(
+                    benchmark=BenchMarkNames.PLAIN_FILES,
+                    output_dir=odir,
+                    prediction_provider=PredictionProviderType.DOCLING,
+                    do_visualization=True,
+                    image_scale_factor=2.0,
+                    do_table_structure=False,
+                )
+            else:
+                typer.echo(f"  Prediction dataset already exists, skipping.")
         else:
-            typer.echo(f"  Prediction dataset already exists, skipping.")
+            typer.echo(
+                f"  Skipping prediction dataset creation (use_predictions=False)."
+            )
 
         if not cvat_dir.exists():
             typer.echo(f"  Creating CVAT pre-annotated dataset...")
+            # Use gt_dir when no predictions, eval_dir when using predictions
+            source_dir = (eval_dir / "test") if use_predictions else (gt_dir / "test")
             create_cvat(
-                gt_dir=eval_dir / "test",
+                gt_dir=source_dir,
                 output_dir=cvat_dir,
                 bucket_size=100,
-                use_predictions=True,
+                use_predictions=use_predictions,
                 sliding_window=sliding_window,
             )
         else:
@@ -114,11 +126,16 @@ def batch_prepare(
     sliding_window: int = typer.Option(
         1, help="Number of pages per CVAT task (default: 1)"
     ),
+    use_predictions: bool = typer.Option(
+        True, help="Whether to create prediction dataset and use predictions in CVAT"
+    ),
 ) -> None:
     """
     Batch-create Docling evaluation datasets for all subdirectories in input_directory.
     """
-    process_subdirectories(input_directory, output_directory, sliding_window)
+    process_subdirectories(
+        input_directory, output_directory, sliding_window, use_predictions
+    )
     typer.echo("\nAll benchmarks created successfully!")
 
 
diff --git a/docling_eval/campaign_tools/cvat_evaluation_pipeline.py b/docling_eval/campaign_tools/cvat_evaluation_pipeline.py
index 600c2ac2..e5a573ef 100755
--- a/docling_eval/campaign_tools/cvat_evaluation_pipeline.py
+++ b/docling_eval/campaign_tools/cvat_evaluation_pipeline.py
@@ -20,9 +20,13 @@
 from docling_eval.campaign_tools.combine_cvat_evaluations import (
     combine_cvat_evaluations,
 )
+from docling_eval.campaign_tools.evaluate_cvat_tables import evaluate_tables
 from docling_eval.cli.main import evaluate
 from docling_eval.cvat_tools.cvat_to_docling import convert_cvat_to_docling
-from docling_eval.cvat_tools.parser import MissingImageInCVATXML
+from docling_eval.cvat_tools.parser import (
+    MissingImageInCVATXML,
+    get_all_images_from_cvat_xml,
+)
 from docling_eval.datamodels.types import (
     BenchMarkNames,
     EvaluationModality,
@@ -41,16 +45,18 @@
 class CVATEvaluationPipeline:
     """Pipeline for CVAT annotation evaluation."""
 
-    def __init__(self, images_dir: Path, output_dir: Path):
+    def __init__(self, images_dir: Path, output_dir: Path, strict: bool = False):
         """
         Initialize the pipeline.
 
         Args:
             images_dir: Directory containing PNG image files
             output_dir: Base directory for all pipeline outputs
+            strict: If True, require all images to have annotations (default: False)
         """
         self.images_dir = Path(images_dir)
         self.output_dir = Path(output_dir)
+        self.strict = strict
 
         # Create subdirectories
         self.gt_json_dir = self.output_dir / "ground_truth_json"
@@ -93,9 +99,58 @@ def _convert_cvat_to_json(
         output_json_dir.mkdir(parents=True, exist_ok=True)
         json_files = []
 
+        if self.strict:
+            _log.info("Running in STRICT mode: all images must have annotations")
+        else:
+            _log.info("Running in NORMAL mode: partial annotation batches allowed")
+
+        # Get all images available in the directory
         image_files = self._find_image_files()
 
-        for image_path in image_files:
+        # Get all images that have annotations in the CVAT XML
+        try:
+            annotated_images = set(get_all_images_from_cvat_xml(cvat_xml_path))
+        except Exception as e:
+            _log.error(f"Failed to read CVAT XML {cvat_xml_path}: {e}")
+            return []
+
+        # Filter to only process images that have annotations (unless in strict mode)
+        if self.strict:
+            # In strict mode, require all images to have annotations
+            missing_images = [
+                img.name for img in image_files if img.name not in annotated_images
+            ]
+            if missing_images:
+                _log.error(
+                    f"Strict mode: Found {len(missing_images)} images without annotations in {cvat_xml_path.name}"
+                )
+                _log.error(
+                    f"Missing annotations for: {', '.join(missing_images[:10])}"
+                    + ("..." if len(missing_images) > 10 else "")
+                )
+                raise ValueError(
+                    f"Strict mode enabled: {len(missing_images)} images lack annotations in {cvat_xml_path.name}"
+                )
+            images_to_process = image_files
+            _log.info(
+                f"Strict mode: All {len(image_files)} image files have annotations in {cvat_xml_path.name}"
+            )
+        else:
+            # Normal mode: allow partial annotation batches
+            images_to_process = [
+                img for img in image_files if img.name in annotated_images
+            ]
+            skipped_count = len(image_files) - len(images_to_process)
+
+            _log.info(
+                f"Found {len(image_files)} image files, {len(images_to_process)} have annotations in {cvat_xml_path.name}"
+            )
+            if skipped_count > 0:
+                _log.info(
+                    f"Skipping {skipped_count} images without annotations (expected for partial annotation batches)"
+                )
+
+        for image_path in images_to_process:
             _log.info(
                 f"Converting {image_path.name} with annotations from {cvat_xml_path.name}"
             )
@@ -122,11 +177,19 @@ def _convert_cvat_to_json(
                     _log.warning(f"\u26a0 Failed to convert {image_path.name}")
 
             except MissingImageInCVATXML:
-                _log.warning(
-                    f"Image {image_path.name} not found in {cvat_xml_path.name}. "
-                    "This is expected for partial annotation batches. Skipping."
-                )
-                continue
+                if self.strict:
+                    # In strict mode, this is a fatal error
+                    _log.error(
+                        f"Strict mode: Image {image_path.name} not found in {cvat_xml_path.name}"
+                    )
+                    raise
+                else:
+                    # In normal mode, this should be unexpected due to pre-filtering
+                    _log.error(
+                        f"Unexpected: Image {image_path.name} was pre-filtered but not found in {cvat_xml_path.name}. "
+                        "This suggests an issue with the filtering logic."
+                    )
+                    continue
             except ValueError as ve:
                 _log.error(f"\u2717 Error processing {image_path.name}: {ve}")
                 continue
@@ -204,6 +267,42 @@ def create_prediction_dataset(self, pred_cvat_xml: Path) -> None:
         )
         _log.info(f"✓ Prediction dataset created: {self.eval_dataset_dir}")
 
+    def run_table_evaluation(
+        self,
+        gt_cvat_xml: Path,
+        pred_cvat_xml: Path,
+        out_json: Optional[Path] = None,
+        containment_thresh: float = 0.50,
+        table_pair_iou: float = 0.20,
+        sem_match_iou: float = 0.30,
+    ) -> Path:
+        """
+        Run the table structure/semantics evaluation directly on the two CVAT XMLs.
+
+        Writes a JSON file (default: evaluation_results/evaluation_CVAT_tables.json) and returns its path.
+        """
+        _log.info("=== Running Table Evaluation ===")
+
+        if out_json is None:
+            out_json = self.evaluation_results_dir / "evaluation_CVAT_tables.json"
+
+        self.evaluation_results_dir.mkdir(parents=True, exist_ok=True)
+
+        result = evaluate_tables(
+            set_a=gt_cvat_xml,
+            set_b=pred_cvat_xml,
+            containment_thresh=containment_thresh,
+            table_pair_iou=table_pair_iou,
+            sem_match_iou=sem_match_iou,
+        )
+
+        out_json.write_text(
+            json.dumps(result.model_dump(mode="json"), ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+        _log.info(f"✓ Tables evaluation written to: {out_json}")
+        return out_json
+
     def run_evaluation(
         self, modalities: Optional[List[str]] = None, user_csv: Optional[Path] = None
     ) -> None:
@@ -279,11 +378,15 @@ def run_evaluation(
         docstruct_json = (
             self.evaluation_results_dir / "evaluation_CVAT_document_structure.json"
         )
+        key_value_json = self.evaluation_results_dir / "evaluation_CVAT_key_value.json"
+        tables_json = self.evaluation_results_dir / "evaluation_CVAT_tables.json"
         _log.info(f"Combining evaluation results to {combined_out}")
         combine_cvat_evaluations(
             layout_json=layout_json,
             docstruct_json=docstruct_json,
+            keyvalue_json=key_value_json,
             user_csv=user_csv,
+            tables_json=tables_json,
             out=combined_out,
         )
 
@@ -309,6 +412,7 @@ def run_full_pipeline(
         try:
             self.create_ground_truth_dataset(gt_cvat_xml)
             self.create_prediction_dataset(pred_cvat_xml)
+            self.run_table_evaluation(gt_cvat_xml, pred_cvat_xml)
             self.run_evaluation(modalities, user_csv)
 
             # Combine results if user_csv is provided
@@ -317,11 +421,18 @@ def run_full_pipeline(
             docstruct_json = (
                 self.evaluation_results_dir / "evaluation_CVAT_document_structure.json"
             )
+            key_value_json = (
+                self.evaluation_results_dir / "evaluation_CVAT_key_value.json"
+            )
+            tables_json = self.evaluation_results_dir / "evaluation_CVAT_tables.json"
+
             _log.info(f"Combining evaluation results to {combined_out}")
             combine_cvat_evaluations(
                 layout_json=layout_json,
                 docstruct_json=docstruct_json,
+                keyvalue_json=key_value_json,
                 user_csv=user_csv,
+                tables_json=tables_json,
                 out=combined_out,
             )
 
@@ -364,16 +475,16 @@ def main():
 
     parser.add_argument(
         "--step",
-        choices=["gt", "pred", "eval", "full"],
+        choices=["gt", "pred", "tables", "eval", "full"],
         default="full",
-        help="Pipeline step to run: gt (ground truth), pred (predictions), eval (evaluation), full (all steps)",
+        help="Pipeline step to run: gt (ground truth), pred (predictions), tables (table eval only), eval, or full.",
     )
 
     parser.add_argument(
         "--modalities",
         nargs="+",
-        choices=["layout", "document_structure"],
-        default=["layout", "document_structure"],
+        choices=["layout", "document_structure", "key_value"],
+        default=["layout", "document_structure", "key_value"],
         help="Evaluation modalities to run",
     )
 
@@ -381,6 +492,12 @@ def main():
         "--verbose", "-v", action="store_true", help="Enable verbose logging"
     )
 
+    parser.add_argument(
+        "--strict",
+        action="store_true",
+        help="Strict mode: require all images to have annotations in XML files (default: allow partial annotation batches)",
+    )
+
     args = parser.parse_args()
 
     if args.verbose:
@@ -392,7 +509,9 @@ def main():
         sys.exit(1)
 
     # Initialize pipeline
-    pipeline = CVATEvaluationPipeline(args.images_dir, args.output_dir)
+    pipeline = CVATEvaluationPipeline(
+        args.images_dir, args.output_dir, strict=args.strict
+    )
 
     if args.step == "gt":
         if not args.gt_xml:
@@ -411,7 +530,17 @@ def main():
             _log.error(f"Prediction XML file does not exist: {args.pred_xml}")
             sys.exit(1)
         pipeline.create_prediction_dataset(args.pred_xml)
-
+    elif args.step == "tables":
+        if not args.gt_xml or not args.pred_xml:
+            _log.error("Both --gt-xml and --pred-xml are required for tables step")
+            sys.exit(1)
+        if not args.gt_xml.exists():
+            _log.error(f"Ground truth XML file does not exist: {args.gt_xml}")
+            sys.exit(1)
+        if not args.pred_xml.exists():
+            _log.error(f"Prediction XML file does not exist: {args.pred_xml}")
+            sys.exit(1)
+        pipeline.run_table_evaluation(args.gt_xml, args.pred_xml)
     elif args.step == "eval":
         pipeline.run_evaluation(args.modalities, user_csv=args.user_csv)
 
diff --git a/docling_eval/campaign_tools/evaluate_cvat_tables.py b/docling_eval/campaign_tools/evaluate_cvat_tables.py
new file mode 100644
index 00000000..60f99d07
--- /dev/null
+++ b/docling_eval/campaign_tools/evaluate_cvat_tables.py
@@ -0,0 +1,401 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+
+import typer
+from docling_core.types.doc.base import BoundingBox
+from docling_core.types.doc.labels import DocItemLabel
+from pydantic import BaseModel, Field
+
+from docling_eval.cvat_tools.document import DocumentStructure
+from docling_eval.cvat_tools.models import CVATElement, TableStructLabel
+
+DEFAULT_TABLE_PAIR_IOU: float = 0.20
+DEFAULT_CONTAINMENT_THRESH: float = 0.50
+DEFAULT_SEM_MATCH_IOU: float = 0.30
+
+
+def iou(a: BoundingBox, b: BoundingBox) -> float:
+    return a.intersection_over_union(b)
+
+
+def inter_area(a: BoundingBox, b: BoundingBox) -> float:
+    return a.intersection_area_with(b)
+
+
+def area(bb: BoundingBox) -> float:
+    return bb.area()
+
+
+def inside_with_tolerance(
+    child: BoundingBox, parent: BoundingBox, thresh: float
+) -> bool:
+    a = area(child)
+    if a <= 0.0:
+        return False
+    return (inter_area(child, parent) / a) >= thresh
+
+
+class SemClass(str, Enum):
+    COL_HEADER = "col_header"
+    ROW_HEADER = "row_header"
+    ROW_SECTION = "row_section"
+    BODY = "body"
+
+
+SEM_TO_TABLE_LABEL: dict[SemClass, TableStructLabel] = {
+    SemClass.COL_HEADER: TableStructLabel.COL_HEADER,
+    SemClass.ROW_HEADER: TableStructLabel.ROW_HEADER,
+    SemClass.ROW_SECTION: TableStructLabel.ROW_SECTION,
+    SemClass.BODY: TableStructLabel.BODY,
+}
+
+
+@dataclass
+class TableStruct:
+    table_el: CVATElement
+    rows: list[CVATElement]
+    cols: list[CVATElement]
+    merges: list[CVATElement]
+    sem: dict[SemClass, list[CVATElement]]
+
+
+class TablePairMetrics(BaseModel):
+    row_count_diff: int
+    col_count_diff: int
+    merge_count_diff: int
+    sem_f1: dict[SemClass, float]
+
+
+class ImageTablesEvaluation(BaseModel):
+    # identifier used for joining in the combiner
+    doc_id: str
+
+    # kept metrics
+    row_count_abs_diff_sum: int = 0
+    col_count_abs_diff_sum: int = 0
+    merge_count_abs_diff_sum: int = 0
+
+    sem_body_f1: float = 0.0
+    sem_row_section_f1: float = 0.0
+    sem_row_header_f1: float = 0.0
+    sem_col_header_f1: float = 0.0
+
+    table_pairs: int = 0
+    tables_unmatched: int = 0
+
+    orphan_table_annotation_A: int = 0
+    orphan_table_annotation_B: int = 0
+
+
+class TablesEvaluationRun(BaseModel):
+    evaluations: list[ImageTablesEvaluation] = Field(default_factory=list)
+
+
+def list_images_in_xml(xml_path: Path) -> list[str]:
+    import xml.etree.ElementTree as ET
+
+    root = ET.parse(xml_path).getroot()
+    result: list[str] = []
+    for img in root.findall(".//image"):
+        name = img.get("name")
+        if name:
+            result.append(name)
+    return result
+
+
+def _elements_by_label(
+    elements: Sequence[CVATElement], label: object
+) -> list[CVATElement]:
+    return [e for e in elements if e.label == label]
+
+
+def _collect_tables(
+    doc: DocumentStructure, contain_thresh: float
+) -> tuple[list[TableStruct], list[CVATElement]]:
+    tables = _elements_by_label(doc.elements, DocItemLabel.TABLE)
+    result: list[TableStruct] = []
+
+    pool_rows = _elements_by_label(doc.elements, TableStructLabel.TABLE_ROW)
+    pool_cols = _elements_by_label(doc.elements, TableStructLabel.TABLE_COLUMN)
+    pool_merges = _elements_by_label(doc.elements, TableStructLabel.TABLE_MERGED_CELL)
+    pool_sem: dict[SemClass, list[CVATElement]] = {
+        sc: _elements_by_label(doc.elements, lab)
+        for sc, lab in SEM_TO_TABLE_LABEL.items()
+    }
+
+    for t in tables:
+        tb = t.bbox
+        rows = [
+            e for e in pool_rows if inside_with_tolerance(e.bbox, tb, contain_thresh)
+        ]
+        cols = [
+            e for e in pool_cols if inside_with_tolerance(e.bbox, tb, contain_thresh)
+        ]
+        merges = [
+            e for e in pool_merges if inside_with_tolerance(e.bbox, tb, contain_thresh)
+        ]
+        sem = {
+            sc: [
+                e
+                for e in pool_sem[sc]
+                if inside_with_tolerance(e.bbox, tb, contain_thresh)
+            ]
+            for sc in SemClass
+        }
+        result.append(
+            TableStruct(table_el=t, rows=rows, cols=cols, merges=merges, sem=sem)
+        )
+
+    all_tables_bb = [t.table_el.bbox for t in result]
+
+    def not_in_any_table(el: CVATElement) -> bool:
+        return not any(
+            inside_with_tolerance(el.bbox, tb, contain_thresh) for tb in all_tables_bb
+        )
+
+    orphans = [
+        e
+        for e in pool_rows + pool_cols + pool_merges + sum(pool_sem.values(), [])
+        if not_in_any_table(e)
+    ]
+    return result, orphans
+
+
+def _pair_tables(
+    a: list[TableStruct],
+    b: list[TableStruct],
+    iou_thresh: float,
+) -> tuple[list[tuple[TableStruct, TableStruct]], list[TableStruct], list[TableStruct]]:
+    if not a or not b:
+        return [], a[:], b[:]
+
+    candidates: list[tuple[int, int, float]] = []
+    for i, ta in enumerate(a):
+        for j, tb in enumerate(b):
+            candidates.append((i, j, iou(ta.table_el.bbox, tb.table_el.bbox)))
+    candidates.sort(key=lambda t: t[2], reverse=True)
+
+    used_a: set[int] = set()
+    used_b: set[int] = set()
+    matched: list[tuple[TableStruct, TableStruct]] = []
+    for i, j, s in candidates:
+        if s < iou_thresh:
+            break
+        if i in used_a or j in used_b:
+            continue
+        matched.append((a[i], b[j]))
+        used_a.add(i)
+        used_b.add(j)
+
+    unmatched_a = [a[i] for i in range(len(a)) if i not in used_a]
+    unmatched_b = [b[j] for j in range(len(b)) if j not in used_b]
+    return matched, unmatched_a, unmatched_b
+
+
+def _greedy_intersection_sum(
+    a: Sequence[BoundingBox], b: Sequence[BoundingBox], iou_thresh: float
+) -> float:
+    if not a or not b:
+        return 0.0
+    pairs: list[tuple[int, int, float]] = []
+    for i, ba in enumerate(a):
+        for j, bb in enumerate(b):
+            v = iou(ba, bb)
+            if v >= iou_thresh:
+                pairs.append((i, j, v))
+    pairs.sort(key=lambda t: t[2], reverse=True)
+    used_i: set[int] = set()
+    used_j: set[int] = set()
+    inter_sum = 0.0
+    for i_idx, j_idx, _ in pairs:
+        if i_idx in used_i or j_idx in used_j:
+            continue
+        used_i.add(i_idx)
+        used_j.add(j_idx)
+        inter_sum += inter_area(a[i_idx], b[j_idx])
+    return inter_sum
+
+
+def _sem_f1(
+    a_boxes: list[BoundingBox], b_boxes: list[BoundingBox], iou_thresh: float
+) -> float:
+    if not a_boxes and not b_boxes:
+        return 1.0
+    if not a_boxes or not b_boxes:
+        return 0.0
+    inter = _greedy_intersection_sum(a_boxes, b_boxes, iou_thresh=iou_thresh)
+    a_area = sum(area(bb) for bb in a_boxes)
+    b_area = sum(area(bb) for bb in b_boxes)
+    if a_area <= 0.0 or b_area <= 0.0:
+        return 0.0
+    p = inter / a_area
+    r = inter / b_area
+    return 0.0 if (p + r) == 0.0 else (2.0 * p * r) / (p + r)
+
+
+def _pair_metrics(ta: TableStruct, tb: TableStruct, sem_iou: float) -> TablePairMetrics:
+    sem_f1: dict[SemClass, float] = {}
+    for sc in SemClass:
+        a_boxes = [e.bbox for e in ta.sem.get(sc, [])]
+        b_boxes = [e.bbox for e in tb.sem.get(sc, [])]
+        sem_f1[sc] = _sem_f1(a_boxes, b_boxes, iou_thresh=sem_iou)
+    tpm = TablePairMetrics(
+        row_count_diff=abs(len(ta.rows) - len(tb.rows)),
+        col_count_diff=abs(len(ta.cols) - len(tb.cols)),
+        merge_count_diff=abs(len(ta.merges) - len(tb.merges)),
+        sem_f1=sem_f1,
+    )
+    # print(f"Rows: A: {len(ta.rows)}, B: {len(tb.rows)}")
+    # print(f"Cols: A: {len(ta.cols)}, B: {len(tb.cols)}")
+    # print(f"Merges: A: {len(ta.merges)}, B: {len(tb.merges)}")
+
+    return tpm
+
+
+def _doc_id_from_image_name(image_name: str) -> str:
+    return Path(image_name).stem
+
+
+def _orphans_count(orphans: list[CVATElement]) -> dict[str, int]:
+    out: dict[str, int] = {
+        "rows": 0,
+        "cols": 0,
+        "merges": 0,
+        "sem_col_header": 0,
+        "sem_row_header": 0,
+        "sem_row_section": 0,
+        "sem_body": 0,
+    }
+    for el in orphans:
+        if el.label == TableStructLabel.TABLE_ROW:
+            out["rows"] += 1
+        elif el.label == TableStructLabel.TABLE_COLUMN:
+            out["cols"] += 1
+        elif el.label == TableStructLabel.TABLE_MERGED_CELL:
+            out["merges"] += 1
+        elif el.label == TableStructLabel.COL_HEADER:
+            out["sem_col_header"] += 1
+        elif el.label == TableStructLabel.ROW_HEADER:
+            out["sem_row_header"] += 1
+        elif el.label == TableStructLabel.ROW_SECTION:
+            out["sem_row_section"] += 1
+        elif el.label == TableStructLabel.BODY:
+            out["sem_body"] += 1
+    return out
+
+
+def evaluate_image(
+    set_a_xml: Path,
+    set_b_xml: Path,
+    image_name: str,
+    containment_thresh: float,
+    table_pair_iou: float,
+    sem_match_iou: float,
+) -> Optional[ImageTablesEvaluation]:
+    try:
+        doc_a = DocumentStructure.from_cvat_xml(set_a_xml, image_name)
+        doc_b = DocumentStructure.from_cvat_xml(set_b_xml, image_name)
+    except Exception:
+        return None
+
+    tables_a, orphans_a = _collect_tables(doc_a, containment_thresh)
+    tables_b, orphans_b = _collect_tables(doc_b, containment_thresh)
+
+    matched, ua, ub = _pair_tables(tables_a, tables_b, iou_thresh=table_pair_iou)
+    pair_metrics = [
+        _pair_metrics(ta, tb, sem_iou=sem_match_iou) for (ta, tb) in matched
+    ]
+
+    # Sums of absolute differences across matched table pairs
+    row_diff_sum = int(sum(pm.row_count_diff for pm in pair_metrics))
+    col_diff_sum = int(sum(pm.col_count_diff for pm in pair_metrics))
+    merge_diff_sum = int(sum(pm.merge_count_diff for pm in pair_metrics))
+
+    # Average semantic F1 over matched pairs (0 when no pairs)
+    def mean_f1(key: SemClass) -> float:
+        seq = [pm.sem_f1[key] for pm in pair_metrics]
+        return float(sum(seq)) / float(len(seq)) if seq else 0.0
+
+    return ImageTablesEvaluation(
+        doc_id=_doc_id_from_image_name(image_name),
+        row_count_abs_diff_sum=row_diff_sum,
+        col_count_abs_diff_sum=col_diff_sum,
+        merge_count_abs_diff_sum=merge_diff_sum,
+        sem_body_f1=mean_f1(SemClass.BODY),
+        sem_row_section_f1=mean_f1(SemClass.ROW_SECTION),
+        sem_row_header_f1=mean_f1(SemClass.ROW_HEADER),
+        sem_col_header_f1=mean_f1(SemClass.COL_HEADER),
+        table_pairs=len(matched),
+        tables_unmatched=(len(ua) + len(ub)),
+        orphan_table_annotation_A=len(orphans_a),
+        orphan_table_annotation_B=len(orphans_b),
+    )
+
+
+app = typer.Typer(help="Compare table structure/semantics between two CVAT XMLs.")
+
+
+def evaluate_tables(
+    set_a: Path,
+    set_b: Path,
+    containment_thresh: float = DEFAULT_CONTAINMENT_THRESH,
+    table_pair_iou: float = DEFAULT_TABLE_PAIR_IOU,
+    sem_match_iou: float = DEFAULT_SEM_MATCH_IOU,
+) -> "TablesEvaluationRun":
+    """
+    Library entrypoint: evaluate tables across images present in both CVAT XMLs.
+    Returns the full evaluation model (no file I/O, no Typer types).
+    """
+    imgs = sorted(set(list_images_in_xml(set_a)) & set(list_images_in_xml(set_b)))
+    evals: list[ImageTablesEvaluation] = []
+    for name in imgs:
+        res = evaluate_image(
+            set_a_xml=set_a,
+            set_b_xml=set_b,
+            image_name=name,
+            containment_thresh=containment_thresh,
+            table_pair_iou=table_pair_iou,
+            sem_match_iou=sem_match_iou,
+        )
+        if res is not None:
+            evals.append(res)
+    return TablesEvaluationRun(evaluations=evals)
+
+
+@app.command()
+def run(
+    set_a: Path = typer.Option(
+        ..., exists=True, readable=True, help="CVAT XML (Set A)"
+    ),
+    set_b: Path = typer.Option(
+        ..., exists=True, readable=True, help="CVAT XML (Set B)"
+    ),
+    out: Path = typer.Option(
+        Path("evaluation_results/evaluation_CVAT_tables.json"), help="Output JSON"
+    ),
+    containment_thresh: float = typer.Option(
+        DEFAULT_CONTAINMENT_THRESH, min=0.0, max=1.0
+    ),
+    table_pair_iou: float = typer.Option(DEFAULT_TABLE_PAIR_IOU, min=0.0, max=1.0),
+    sem_match_iou: float = typer.Option(DEFAULT_SEM_MATCH_IOU, min=0.0, max=1.0),
+) -> None:
+    result = evaluate_tables(
+        set_a=set_a,
+        set_b=set_b,
+        containment_thresh=containment_thresh,
+        table_pair_iou=table_pair_iou,
+        sem_match_iou=sem_match_iou,
+    )
+    out.parent.mkdir(parents=True, exist_ok=True)
+    payload = result.model_dump(mode="json")
+    out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+    print(f"Wrote {out.resolve()} ({len(result.evaluations)} images)")
+
+
+if __name__ == "__main__":
+    app()
diff --git a/docling_eval/cvat_tools/cvat_to_docling.py b/docling_eval/cvat_tools/cvat_to_docling.py
index e910acd8..1ad15649 100644
--- a/docling_eval/cvat_tools/cvat_to_docling.py
+++ b/docling_eval/cvat_tools/cvat_to_docling.py
@@ -44,6 +44,7 @@
 
 from docling_eval.cvat_tools.document import DocumentStructure
 from docling_eval.cvat_tools.models import CVATElement
+from docling_eval.cvat_tools.parser import MissingImageInCVATXML
 from docling_eval.cvat_tools.tree import (
     TreeNode,
     apply_reading_order_to_tree,
@@ -750,7 +751,7 @@ def _create_item_by_label(
         parent: Optional[NodeItem],
     ) -> Optional[DocItem]:
         """Create appropriate DocItem based on element label."""
-        content_layer = ContentLayer(element.content_layer.lower())
+        content_layer = element.content_layer
 
         if doc_label == DocItemLabel.TITLE:
             return self.doc.add_title(
@@ -1124,7 +1125,6 @@ def convert_cvat_to_docling(
         DoclingDocument or None if conversion fails
     """
     try:
-
         # Create DocumentStructure
         doc_structure = DocumentStructure.from_cvat_xml(xml_path, input_path.name)
 
@@ -1199,6 +1199,9 @@ def convert_cvat_to_docling(
         # Convert
         return converter.convert()
 
+    except MissingImageInCVATXML:
+        # Re-raise so that calling code can handle with appropriate messaging
+        raise
     except Exception as e:
         _logger.error(f"Failed to convert CVAT to DoclingDocument: {e}")
         import traceback
diff --git a/docling_eval/cvat_tools/models.py b/docling_eval/cvat_tools/models.py
index 6e5d6031..cce71861 100644
--- a/docling_eval/cvat_tools/models.py
+++ b/docling_eval/cvat_tools/models.py
@@ -15,11 +15,21 @@ class ValidationSeverity(str, Enum):
     FATAL = "fatal"
 
 
+class TableStructLabel(str, Enum):
+    TABLE_ROW = "table_row"
+    TABLE_COLUMN = "table_column"
+    TABLE_MERGED_CELL = "table_merged_cell"
+    COL_HEADER = "col_header"
+    ROW_HEADER = "row_header"
+    ROW_SECTION = "row_section"
+    BODY = "body"
+
+
 class CVATElement(BaseModel):
     """A rectangle element (box) in CVAT annotation, using BoundingBox from docling_core."""
 
     id: int
-    label: Union[DocItemLabel, GraphCellLabel]
+    label: Union[DocItemLabel, GraphCellLabel, TableStructLabel]
     bbox: BoundingBox
     content_layer: ContentLayer
     type: Optional[str] = None
diff --git a/docling_eval/cvat_tools/parser.py b/docling_eval/cvat_tools/parser.py
index 0be616c3..413ccf22 100644
--- a/docling_eval/cvat_tools/parser.py
+++ b/docling_eval/cvat_tools/parser.py
@@ -12,6 +12,7 @@
     CVATAnnotationPath,
     CVATElement,
     CVATImageInfo,
+    TableStructLabel,
 )
 
 logger = logging.getLogger("docling_eval.cvat_tools.")
@@ -76,58 +77,74 @@ def _parse_image_element(
     Parse a single <image> element and extract elements and paths.
     Returns (elements, paths, image_info).
     """
+    # Local import to avoid touching the module-level imports if you prefer
+    from docling_eval.cvat_tools.models import TableStructLabel
+
     image_info = CVATImageInfo(
         width=float(image_el.attrib["width"]),
         height=float(image_el.attrib["height"]),
         name=image_el.attrib["name"],
     )
-    elements = []
-    paths = []
+
+    elements: list[CVATElement] = []
+    paths: list[CVATAnnotationPath] = []
     box_id = box_id_start
     path_id = path_id_start
+
+    # ---- parse <box> (rectangles) with strict labels (DocItemLabel | GraphCellLabel | TableStructLabel)
     for box in image_el.findall("box"):
         label_str = box.attrib["label"]
+
+        # Parse into one of the known enums; skip if unknown
+        label_obj: Optional[object] = None
         try:
-            label = DocItemLabel(label_str)
+            label_obj = DocItemLabel(label_str)
         except ValueError:
             try:
-                label = GraphCellLabel(label_str)  # type: ignore
+                label_obj = GraphCellLabel(label_str)  # type: ignore[assignment]
             except ValueError:
-                # Skip invalid labels
-                logger.debug(f"Skipping invalid label: {label_str}")
-                continue
+                try:
+                    label_obj = TableStructLabel(label_str)  # type: ignore[assignment]
+                except ValueError:
+                    logger.debug(f"Skipping invalid label: {label_str}")
+                    continue
+
         xtl = float(box.attrib["xtl"])
         ytl = float(box.attrib["ytl"])
         xbr = float(box.attrib["xbr"])
         ybr = float(box.attrib["ybr"])
-        bbox = cvat_box_to_bbox(xtl, ytl, xbr, ybr)
-        attributes = {}
+        bbox = cvat_box_to_bbox(xtl, ytl, xbr, ybr)  # -> BoundingBox(l,t,r,b) TOPLEFT
+
+        # Parse child <attribute> tags; default content_layer to BODY
+        attributes: dict[str, str | None] = {}
         content_layer = None
-        type_ = None
-        level = None
+        type_: Optional[str] = None
+        level: Optional[int] = None
+
         for attr in box.findall("attribute"):
             name = attr.attrib["name"]
             value = attr.text.strip() if attr.text else None
             attributes[name] = value
             if name == "content_layer" and value is not None:
                 try:
-                    content_layer = ContentLayer(value)
+                    content_layer = ContentLayer(value.lower())
                 except Exception:
                     content_layer = ContentLayer.BODY
             elif name == "type":
                 type_ = value
-            elif name == "level":
-                if value is not None:
-                    try:
-                        level = int(value)
-                    except Exception:
-                        level = None
+            elif name == "level" and value is not None:
+                try:
+                    level = int(value)
+                except Exception:
+                    level = None
+
         if content_layer is None:
             content_layer = ContentLayer.BODY
+
         elements.append(
             CVATElement(
                 id=box_id,
-                label=label,
+                label=label_obj,  # Union[DocItemLabel, GraphCellLabel, TableStructLabel]
                 bbox=bbox,
                 content_layer=content_layer,
                 type=type_,
@@ -136,12 +153,15 @@ def _parse_image_element(
             )
         )
         box_id += 1
+
+    # ---- parse <polyline> (paths)
     for poly in image_el.findall("polyline"):
         poly_label = poly.attrib["label"]
         points_str = poly.attrib["points"]
         points = [tuple(map(float, pt.split(","))) for pt in points_str.split(";")]
-        attributes = {}
-        level = None
+
+        attributes: dict[str, str | None] = {}  # type: ignore
+        level: Optional[int] = None  # type: ignore
         for attr in poly.findall("attribute"):
             name = attr.attrib["name"]
             value = attr.text.strip() if attr.text else None
@@ -151,6 +171,7 @@ def _parse_image_element(
                     level = int(value)
                 except Exception:
                     level = None
+
         paths.append(
             CVATAnnotationPath(
                 id=path_id,
@@ -161,6 +182,7 @@ def _parse_image_element(
             )
         )
         path_id += 1
+
     return elements, paths, image_info
 
 
diff --git a/docling_eval/cvat_tools/validator.py b/docling_eval/cvat_tools/validator.py
index a8d3bfe8..c785102c 100644
--- a/docling_eval/cvat_tools/validator.py
+++ b/docling_eval/cvat_tools/validator.py
@@ -3,6 +3,8 @@
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Set, Type
 
+from docling_core.types.doc.document import ContentLayer
+
 from .document import DocumentStructure
 from .models import (
     CVATElement,
@@ -176,7 +178,7 @@ def validate(self, doc: DocumentStructure) -> List[CVATValidationError]:
         # Collect all elements that would fail the reading order validation
         untouched_elements = []
         for el in doc.elements:
-            if el.content_layer.upper() == "BACKGROUND":
+            if el.content_layer == ContentLayer.BACKGROUND:
                 continue
 
             # Skip validation for elements inside table containers