Change PascalVOC input interface to InstanceSegmentationInpuT

michal-lightly · michal-lightly · commit 1f79c9475794 · 2026-01-15T17:41:12.000+01:00
diff --git a/src/labelformat/formats/semantic_segmentation/pascalvoc.py b/src/labelformat/formats/semantic_segmentation/pascalvoc.py
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 """Pascal VOC semantic segmentation input.
 
 Assumptions:
@@ -8,6 +6,9 @@
 - Masks are PNGs with pixel values equal to class IDs.
 """
 
+from __future__ import annotations
+
+from argparse import ArgumentParser
 from collections.abc import Iterable, Mapping
 from dataclasses import dataclass
 from pathlib import Path
@@ -19,8 +20,12 @@
 from labelformat import utils
 from labelformat.model.category import Category
 from labelformat.model.image import Image
+from labelformat.model.instance_segmentation import (
+    ImageInstanceSegmentation,
+    InstanceSegmentationInput,
+    SingleInstanceSegmentation,
+)
 from labelformat.model.semantic_segmentation import (
-    SemanticSegmentationInput,
     SemanticSegmentationMask,
 )
 
@@ -34,12 +39,19 @@
 
 
 @dataclass
-class PascalVOCSemanticSegmentationInput(SemanticSegmentationInput):
+class PascalVOCSemanticSegmentationInput(InstanceSegmentationInput):
+    """Pascal VOC semantic segmentation input format."""
+
     _images_dir: Path
     _masks_dir: Path
     _filename_to_image: dict[str, Image]
     _categories: list[Category]
 
+    @staticmethod
+    def add_cli_arguments(parser: ArgumentParser) -> None:
+        # TODO(Michal, 01/2026): Implement when needed.
+        raise NotImplementedError()
+
     @classmethod
     def from_dirs(
         cls,
@@ -91,7 +103,30 @@ def get_categories(self) -> Iterable[Category]:
     def get_images(self) -> Iterable[Image]:
         yield from self._filename_to_image.values()
 
-    def get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
+    def get_labels(self) -> Iterable[ImageInstanceSegmentation]:
+        """Get semantic segmentation labels.
+
+        Yields an object per image, with one binary mask per category present in the mask.
+        The order of objects is sorted by category ID. Reuses the ImageInstanceSegmentation
+        as the return type for convenience.
+        """
+        category_id_to_category = {c.id: c for c in self._categories}
+        for image in self.get_images():
+            mask = self._get_mask(image_filepath=image.filename)
+            category_ids_in_mask = mask.category_ids()
+            objects = [
+                SingleInstanceSegmentation(
+                    category=category_id_to_category[cid],
+                    segmentation=mask.to_binary_mask(category_id=cid),
+                )
+                for cid in sorted(category_ids_in_mask)
+            ]
+            yield ImageInstanceSegmentation(
+                image=image,
+                objects=objects,
+            )
+
+    def _get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
         # Validate image exists in our index.
         image_obj = self._filename_to_image.get(image_filepath)
         if image_obj is None:
diff --git a/src/labelformat/model/semantic_segmentation.py b/src/labelformat/model/semantic_segmentation.py
@@ -1,23 +1,15 @@
 from __future__ import annotations
 
-from typing import List, Optional, Tuple
-
 from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
-from labelformat.model.instance_segmentation import SingleInstanceSegmentation
 
 """Semantic segmentation core types and input interface.
 """
 
-from abc import ABC, abstractmethod
-from collections.abc import Iterable
 from dataclasses import dataclass
 
 import numpy as np
 from numpy.typing import NDArray
 
-from labelformat.model.category import Category
-from labelformat.model.image import Image
-
 
 @dataclass
 class SemanticSegmentationMask:
@@ -29,7 +21,7 @@ class SemanticSegmentationMask:
         array: The 2D numpy array with integer class IDs of shape (H, W).
     """
 
-    category_id_rle: List[Tuple[int, int]]
+    category_id_rle: list[tuple[int, int]]
     """The mask as a run-length encoding (RLE) list of (category_id, run_length) tuples."""
     width: int
     height: int
@@ -40,9 +32,9 @@ def from_array(cls, array: NDArray[np.int_]) -> "SemanticSegmentationMask":
         if array.ndim != 2:
             raise ValueError("SemSegMask.array must be 2D with shape (H, W).")
 
-        category_id_rle: List[Tuple[int, int]] = []
+        category_id_rle: list[tuple[int, int]] = []
 
-        cur_cat_id: Optional[int] = None
+        cur_cat_id: int | None = None
         cur_run_length = 0
         for cat_id in array.flatten():
             if cat_id == cur_cat_id:
@@ -81,19 +73,6 @@ def to_binary_mask(self, category_id: int) -> BinaryMaskSegmentation:
             height=self.height,
         )
 
-
-class SemanticSegmentationInput(ABC):
-
-    # TODO(Malte, 11/2025): Add a CLI interface later if needed.
-
-    @abstractmethod
-    def get_categories(self) -> Iterable[Category]:
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_images(self) -> Iterable[Image]:
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
-        raise NotImplementedError()
+    def category_ids(self) -> set[int]:
+        """Get the set of category IDs present in the mask."""
+        return {cat_id for cat_id, _ in self.category_id_rle}
diff --git a/tests/unit/formats/semantic_segmentation/test_pascalvoc.py b/tests/unit/formats/semantic_segmentation/test_pascalvoc.py
@@ -4,6 +4,7 @@
 from pathlib import Path
 from typing import Dict
 
+import cv2
 import numpy as np
 import pytest
 from PIL import Image as PILImage
@@ -12,6 +13,7 @@
 from labelformat.formats.semantic_segmentation.pascalvoc import (
     PascalVOCSemanticSegmentationInput,
 )
+from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
 from labelformat.model.image import Image
 from tests.unit.test_utils import FIXTURES_DIR
 
@@ -52,7 +54,7 @@ def test_get_mask__returns_rle_and_matches_image_length(self) -> None:
         )
 
         for img in ds.get_images():
-            mask = ds.get_mask(img.filename)
+            mask = ds._get_mask(img.filename)
             length = sum(run_length for _, run_length in mask.category_id_rle)
             assert length == img.width * img.height
 
@@ -83,7 +85,77 @@ def test_get_mask__unknown_image_raises(self) -> None:
             ValueError,
             match=r"Unknown image filepath does_not_exist\.jpg",
         ):
-            ds.get_mask("does_not_exist.jpg")
+            ds._get_mask("does_not_exist.jpg")
+
+    def test_get_labels(self, tmp_path: Path) -> None:
+        images_dir = tmp_path / "images"
+        images_dir.mkdir()
+        masks_dir = tmp_path / "masks"
+        masks_dir.mkdir()
+
+        # Create a simple image and mask
+        image0_bgr = np.full((3, 4, 3), (255, 0, 0), dtype=np.uint8)
+        cv2.imwrite(str(images_dir / "image0.jpg"), image0_bgr)
+        mask0 = np.array([[1, 0, 0, 0], [1, 0, 2, 2], [0, 0, 2, 0]], dtype=np.uint8)
+        cv2.imwrite(str(masks_dir / "image0.png"), mask0)
+
+        # Create another image and mask
+        image1_bgr = np.full((2, 2, 3), (0, 255, 0), dtype=np.uint8)
+        cv2.imwrite(str(images_dir / "image1.jpg"), image1_bgr)
+        mask1 = np.array([[1, 1], [1, 1]], dtype=np.uint8)
+        cv2.imwrite(str(masks_dir / "image1.png"), mask1)
+
+        # Create input instance
+        label_input = PascalVOCSemanticSegmentationInput.from_dirs(
+            images_dir=images_dir,
+            masks_dir=masks_dir,
+            class_id_to_name={0: "a", 1: "b", 2: "c", 3: "d"},
+        )
+
+        # Call get_labels
+        labels = sorted(label_input.get_labels(), key=lambda x: x.image.filename)
+        assert len(labels) == 2
+
+        # Verify first image labels
+        assert labels[0].image.filename == "image0.jpg"
+        objects = labels[0].objects
+        assert len(objects) == 3
+        assert objects[0].category.id == 0
+        assert objects[0].category.name == "a"
+        assert isinstance(objects[0].segmentation, BinaryMaskSegmentation)
+        assert objects[0].segmentation.get_binary_mask().tolist() == [
+            [0, 1, 1, 1],
+            [0, 1, 0, 0],
+            [1, 1, 0, 1],
+        ]
+        assert objects[1].category.id == 1
+        assert objects[1].category.name == "b"
+        assert isinstance(objects[1].segmentation, BinaryMaskSegmentation)
+        assert objects[1].segmentation.get_binary_mask().tolist() == [
+            [1, 0, 0, 0],
+            [1, 0, 0, 0],
+            [0, 0, 0, 0],
+        ]
+        assert objects[2].category.id == 2
+        assert objects[2].category.name == "c"
+        assert isinstance(objects[2].segmentation, BinaryMaskSegmentation)
+        assert objects[2].segmentation.get_binary_mask().tolist() == [
+            [0, 0, 0, 0],
+            [0, 0, 1, 1],
+            [0, 0, 1, 0],
+        ]
+
+        # Verify second image labels
+        assert labels[1].image.filename == "image1.jpg"
+        assert len(labels[1].objects) == 1
+        obj = labels[1].objects[0]
+        assert obj.category.id == 1
+        assert obj.category.name == "b"
+        assert isinstance(obj.segmentation, BinaryMaskSegmentation)
+        assert obj.segmentation.get_binary_mask().tolist() == [
+            [1, 1],
+            [1, 1],
+        ]
 
 
 def test__validate_mask__unknown_class_value_raises() -> None:
diff --git a/tests/unit/model/test_semantic_segmentation.py b/tests/unit/model/test_semantic_segmentation.py
@@ -57,3 +57,15 @@ def test_to_binary_mask(self) -> None:
             [0, 0, 0, 0],
             [0, 0, 0, 0],
         ]
+
+    def test_category_ids(self) -> None:
+        mask = SemanticSegmentationMask.from_array(
+            array=np.array(
+                [
+                    [1, 1, 4],
+                    [4, 1, 1],
+                ],
+                dtype=np.int_,
+            )
+        )
+        assert mask.category_ids() == {1, 4}