lightly-ai
diff --git a/‎.github/pull_request_template.md‎
Lines changed: 7 additions & 0 deletions b/‎.github/pull_request_template.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎.python-version‎
Lines changed: 1 addition & 1 deletion b/‎.python-version‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/labelformat/formats/semantic_segmentation/pascalvoc.py‎
Lines changed: 41 additions & 8 deletions b/‎src/labelformat/formats/semantic_segmentation/pascalvoc.py‎
Lines changed: 41 additions & 8 deletions
diff --git a/‎src/labelformat/formats/youtubevis.py‎
Lines changed: 6 additions & 6 deletions b/‎src/labelformat/formats/youtubevis.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/labelformat/model/binary_mask_segmentation.py‎
Lines changed: 77 additions & 0 deletions b/‎src/labelformat/model/binary_mask_segmentation.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/labelformat/model/semantic_segmentation.py‎
Lines changed: 59 additions & 28 deletions b/‎src/labelformat/model/semantic_segmentation.py‎
Lines changed: 59 additions & 28 deletions
@@ -0,0 +1,7 @@
+## What has changed and why?
+
+(Delete this: Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.)
+
+## How has it been tested?
+
+(Delete this: Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration.)
@@ -1 +1 @@
-3.7.16
+3.8
@@ -1,5 +1,3 @@
-from __future__ import annotations
-
 """Pascal VOC semantic segmentation input.
 
 Assumptions:
@@ -8,6 +6,9 @@
 - Masks are PNGs with pixel values equal to class IDs.
 """
 
+from __future__ import annotations
+
+from argparse import ArgumentParser
 from collections.abc import Iterable, Mapping
 from dataclasses import dataclass
 from pathlib import Path
@@ -19,10 +20,12 @@
 from labelformat import utils
 from labelformat.model.category import Category
 from labelformat.model.image import Image
-from labelformat.model.semantic_segmentation import (
-    SemanticSegmentationInput,
-    SemanticSegmentationMask,
+from labelformat.model.instance_segmentation import (
+    ImageInstanceSegmentation,
+    InstanceSegmentationInput,
+    SingleInstanceSegmentation,
 )
+from labelformat.model.semantic_segmentation import SemanticSegmentationMask
 
 """TODO(Malte, 11/2025):
 Support what is already supported in LightlyTrain. https://docs.lightly.ai/train/stable/semantic_segmentation.html#data
@@ -34,12 +37,19 @@
 
 
 @dataclass
-class PascalVOCSemanticSegmentationInput(SemanticSegmentationInput):
+class PascalVOCSemanticSegmentationInput(InstanceSegmentationInput):
+    """Pascal VOC semantic segmentation input format."""
+
     _images_dir: Path
     _masks_dir: Path
     _filename_to_image: dict[str, Image]
     _categories: list[Category]
 
+    @staticmethod
+    def add_cli_arguments(parser: ArgumentParser) -> None:
+        # TODO(Michal, 01/2026): Implement when needed.
+        raise NotImplementedError()
+
     @classmethod
     def from_dirs(
         cls,
@@ -91,7 +101,30 @@ def get_categories(self) -> Iterable[Category]:
     def get_images(self) -> Iterable[Image]:
         yield from self._filename_to_image.values()
 
-    def get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
+    def get_labels(self) -> Iterable[ImageInstanceSegmentation]:
+        """Get semantic segmentation labels.
+
+        Yields an object per image, with one binary mask per category present in the mask.
+        The order of objects is sorted by category ID. Reuses the ImageInstanceSegmentation
+        as the return type for convenience.
+        """
+        category_id_to_category = {c.id: c for c in self._categories}
+        for image in self.get_images():
+            mask = self._get_mask(image_filepath=image.filename)
+            category_ids_in_mask = mask.category_ids()
+            objects = [
+                SingleInstanceSegmentation(
+                    category=category_id_to_category[cid],
+                    segmentation=mask.to_binary_mask(category_id=cid),
+                )
+                for cid in sorted(category_ids_in_mask)
+            ]
+            yield ImageInstanceSegmentation(
+                image=image,
+                objects=objects,
+            )
+
+    def _get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
         # Validate image exists in our index.
         image_obj = self._filename_to_image.get(image_filepath)
         if image_obj is None:
@@ -114,7 +147,7 @@ def get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
             valid_class_ids={c.id for c in self._categories},
         )
 
-        return SemanticSegmentationMask(array=mask_np)
+        return SemanticSegmentationMask.from_array(array=mask_np)
 
 
 def _validate_mask(
 
@@ -83,11 +83,11 @@ def _get_object_track_boxes(
     for bbox in ann["bboxes"]:
         if bbox is None or len(bbox) == 0:
             boxes.append(None)
-            continue
-        boxes.append(
-            BoundingBox.from_format(
-                bbox=[float(x) for x in bbox],
-                format=BoundingBoxFormat.XYWH,
+        else:
+            boxes.append(
+                BoundingBox.from_format(
+                    bbox=[float(x) for x in bbox],
+                    format=BoundingBoxFormat.XYWH,
+                )
             )
-        )
     return boxes
@@ -42,6 +42,28 @@ def from_binary_mask(
             bounding_box=bounding_box,
         )
 
+    @classmethod
+    def from_rle(
+        cls,
+        rle_row_wise: list[int],
+        width: int,
+        height: int,
+        bounding_box: BoundingBox | None = None,
+    ) -> "BinaryMaskSegmentation":
+        """
+        Create a BinaryMaskSegmentation instance from row-wise RLE format.
+        """
+        if bounding_box is None:
+            bounding_box = _compute_bbox_from_rle(
+                rle_row_wise=rle_row_wise, width=width, height=height
+            )
+        return cls(
+            _rle_row_wise=rle_row_wise,
+            width=width,
+            height=height,
+            bounding_box=bounding_box,
+        )
+
     def get_binary_mask(self) -> NDArray[np.int_]:
         """
         Get the binary mask (2D numpy array) from the RLE format.
@@ -50,6 +72,15 @@ def get_binary_mask(self) -> NDArray[np.int_]:
             self._rle_row_wise, self.height, self.width
         )
 
+    def get_rle(self) -> list[int]:
+        """
+        Get the run-length encoding (RLE) of the binary mask in row-wise format.
+
+        The first element corresponds to the number of 0s at the start of the mask.
+        If the mask starts with a 1, the first element will be 0. No other zeros can appear.
+        """
+        return self._rle_row_wise
+
 
 class RLEDecoderEncoder:
     """
@@ -112,3 +143,49 @@ def decode_column_wise_rle(
             decoded.extend([run_val] * count)
             run_val = 1 - run_val
         return np.array(decoded, dtype=np.int_).reshape((height, width), order="F")
+
+
+def _compute_bbox_from_rle(
+    rle_row_wise: list[int], width: int, height: int
+) -> BoundingBox:
+    """Compute bounding box from row-wise RLE.
+
+    Scans through the RLE and tracks the min/max x/y coordinates of the '1' pixels.
+    The time complexity is O(len(rle_row_wise)).
+    """
+    xmin = width
+    ymin = height
+    xmax = 0
+    ymax = 0
+
+    x = 0
+    y = 0
+    next_symbol = 0
+    for run_length in rle_row_wise:
+        if next_symbol == 1:
+            # Compute coordinates for the end of the run
+            run_end_x = x + run_length - 1
+            run_end_y = y
+            if run_end_x >= width:
+                run_end_y += run_end_x // width
+                run_end_x = run_end_x % width
+
+            # Update bounding box
+            ymin = min(ymin, y)
+            ymax = max(ymax, run_end_y)
+            if run_end_y > y:
+                xmin = 0
+                xmax = width - 1
+            else:
+                xmin = min(xmin, x)
+                xmax = max(xmax, run_end_x)
+
+        # Compute coordinates for the start of the next run
+        x += run_length
+        if x >= width:
+            y += x // width
+            x = x % width
+
+        next_symbol = 1 - next_symbol
+
+    return BoundingBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax)
@@ -1,48 +1,79 @@
 from __future__ import annotations
 
+from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
+
 """Semantic segmentation core types and input interface.
 """
 
-from abc import ABC, abstractmethod
-from collections.abc import Iterable
 from dataclasses import dataclass
 
 import numpy as np
 from numpy.typing import NDArray
 
-from labelformat.model.category import Category
-from labelformat.model.image import Image
-
 
 @dataclass
 class SemanticSegmentationMask:
     """Semantic segmentation mask with integer class IDs.
 
-    The mask is stored as a 2D numpy array of integer class IDs with shape (H, W).
+    For internal purposes only, interface might change between minor versions!
 
-    Args:
-        array: The 2D numpy array with integer class IDs of shape (H, W).
+    The mask is stored as multiclass run-length encoding (RLE).
     """
 
-    array: NDArray[np.int_]
-
-    def __post_init__(self) -> None:
-        if self.array.ndim != 2:
+    category_id_rle: list[tuple[int, int]]
+    """The mask as a run-length encoding (RLE) list of (category_id, run_length) tuples."""
+    width: int
+    """Width of the mask in pixels."""
+    height: int
+    """Height of the mask in pixels."""
+
+    @classmethod
+    def from_array(cls, array: NDArray[np.int_]) -> "SemanticSegmentationMask":
+        """Create a SemanticSegmentationMask from a 2D numpy array."""
+        if array.ndim != 2:
             raise ValueError("SemSegMask.array must be 2D with shape (H, W).")
 
-
-class SemanticSegmentationInput(ABC):
-
-    # TODO(Malte, 11/2025): Add a CLI interface later if needed.
-
-    @abstractmethod
-    def get_categories(self) -> Iterable[Category]:
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_images(self) -> Iterable[Image]:
-        raise NotImplementedError()
-
-    @abstractmethod
-    def get_mask(self, image_filepath: str) -> SemanticSegmentationMask:
-        raise NotImplementedError()
+        category_id_rle: list[tuple[int, int]] = []
+
+        cur_cat_id: int | None = None
+        cur_run_length = 0
+        for cat_id in array.flatten():
+            if cat_id == cur_cat_id:
+                cur_run_length += 1
+            else:
+                if cur_cat_id is not None:
+                    category_id_rle.append((cur_cat_id, cur_run_length))
+                cur_cat_id = cat_id
+                cur_run_length = 1
+        if cur_cat_id is not None:
+            category_id_rle.append((cur_cat_id, cur_run_length))
+
+        return cls(
+            category_id_rle=category_id_rle, width=array.shape[1], height=array.shape[0]
+        )
+
+    def to_binary_mask(self, category_id: int) -> BinaryMaskSegmentation:
+        """Get a binary mask for a given category ID."""
+        binary_rle = []
+
+        symbol = 0
+        run_length = 0
+        for cat_id, cur_run_length in self.category_id_rle:
+            cur_symbol = 1 if cat_id == category_id else 0
+            if symbol == cur_symbol:
+                run_length += cur_run_length
+            else:
+                binary_rle.append(run_length)
+                symbol = cur_symbol
+                run_length = cur_run_length
+
+        binary_rle.append(run_length)
+        return BinaryMaskSegmentation.from_rle(
+            rle_row_wise=binary_rle,
+            width=self.width,
+            height=self.height,
+        )
+
+    def category_ids(self) -> set[int]:
+        """Get the set of category IDs present in the mask."""
+        return {cat_id for cat_id, _ in self.category_id_rle}