add overlay in animation of segmentation masks

henrykrumb · henrykrumb · commit 5537112e74c4 · 2025-08-13T11:37:18.000+02:00
diff --git a/ncalab/prediction.py b/ncalab/prediction.py
@@ -22,7 +22,7 @@ def __init__(self, model, steps: int, output_image: torch.Tensor):
         self.steps = steps
         assert output_image.shape[1] == model.num_channels
         self.output_image = output_image
-        self.output_array: Optional[np.ndarray] = None
+        self._output_array: Optional[np.ndarray] = None
 
     @property
     def image_channels(self) -> torch.Tensor:
@@ -56,23 +56,32 @@ def output_channels(self) -> torch.Tensor:
             :,
         ]
 
+    @property
+    def output_array(self) -> np.ndarray:
+        """
+        :returns [np.ndarray]: BCWH
+        """
+        if self._output_array is None:
+            self._output_array = self.output_image.detach().cpu().numpy()
+        return self._output_array
+
     @property
     def image_channels_np(self) -> np.ndarray:
         """
         :returns [np.ndarray]: BCWH
         """
-        if self.output_array is None:
-            self.output_array = self.output_image.detach().cpu().numpy()
-        return self.output_array[:, : self.model.num_image_channels, :, :]
+        if self._output_array is None:
+            self._output_array = self.output_image.detach().cpu().numpy()
+        return self._output_array[:, : self.model.num_image_channels, :, :]
 
     @property
     def hidden_channels_np(self) -> np.ndarray:
         """
         :returns [np.ndarray]: BCWH
         """
-        if self.output_array is None:
-            self.output_array = self.output_image.detach().cpu().numpy()
-        return self.output_array[
+        if self._output_array is None:
+            self._output_array = self.output_image.detach().cpu().numpy()
+        return self._output_array[
             :,
             self.model.num_image_channels : self.model.num_hidden_channels
             + self.model.num_hidden_channels,
@@ -85,9 +94,9 @@ def output_channels_np(self) -> np.ndarray:
         """
         :returns [np.ndarray]: BCWH
         """
-        if self.output_array is None:
-            self.output_array = self.output_image.detach().cpu().numpy()
-        return self.output_array[
+        if self._output_array is None:
+            self._output_array = self.output_image.detach().cpu().numpy()
+        return self._output_array[
             :,
             -self.model.num_output_channels :,
             :,
diff --git a/ncalab/visualization/animation.py b/ncalab/visualization/animation.py
@@ -15,32 +15,31 @@ def __init__(
         interval=100,
         repeat=True,
         repeat_delay=3000,
+        overlay=False,
     ):
         nca.eval()
 
         fig, ax = plt.subplots()
         fig.set_size_inches(2, 2)
 
         # first frame is input image
-        if nca.immutable_image_channels:
+        if nca.immutable_image_channels and not overlay:
             first_frame = seed[0, -nca.num_output_channels :]
         else:
             first_frame = seed[0, : nca.num_image_channels]
-        first_frame = first_frame.permute(1, 2, 0).detach().cpu().numpy()
+        first_frame_np = first_frame.permute(1, 2, 0).detach().cpu().numpy()
+        first_frame_np = np.clip(first_frame, 0, 1)
+
         im = ax.imshow(
-            first_frame,
+            first_frame_np,
             animated=True,
         )
 
         predictions = nca.record(seed, steps)
         images = []
         for prediction in predictions:
-            if nca.immutable_image_channels:
-                output_image = prediction.output_channels_np[0]
-            else:
-                output_image = prediction.image_channels_np[0]
+            output_image = prediction.output_array[0]
             output_image = output_image.transpose(1, 2, 0)
-            output_image = np.clip(output_image, 0, 1)
             images.append(output_image)
 
         ax.set_axis_off()
@@ -49,8 +48,25 @@ def __init__(
         plt.tight_layout()
 
         def update(i):
-            nonlocal images
-            im.set_array(images[i])
+            nonlocal images, nca
+            arr = images[i]
+            if not nca.immutable_image_channels:
+                arr = arr[:, :, : nca.num_image_channels]
+            elif overlay:
+                A = np.clip(arr[:, :, : nca.num_image_channels], 0, 1)
+                B = np.clip(arr[:, :, -nca.num_output_channels :].squeeze(-1), 0, 1)
+                alpha = 0.8
+                threshold = 0.2
+                beta = 0.8
+                blue = A[:, :, 2]
+                blue[B > threshold] = beta * (
+                    alpha * B[B > threshold] + (1 - alpha) * blue[B > threshold]
+                )
+                A[:, :, 2] = blue
+                arr = A
+            else:
+                arr = arr[:, :, -nca.num_output_channels :]
+            im.set_array(arr)
             return (im,)
 
         self.animation_fig = animation.FuncAnimation(
diff --git a/tasks/segmentation_kvasir_seg/.gitignore b/tasks/segmentation_kvasir_seg/.gitignore
@@ -0,0 +1,2 @@
+figures/
+weights/
diff --git a/tasks/segmentation_kvasir_seg/dataset_kvasir_seg.py b/tasks/segmentation_kvasir_seg/dataset_kvasir_seg.py
@@ -0,0 +1,34 @@
+
+from pathlib import Path, PosixPath
+from typing import Any
+
+import numpy as np
+from PIL import Image
+
+from torch.utils.data import Dataset
+
+
+class KvasirSegDataset(Dataset):
+    def __init__(self, path: Path | PosixPath, transform) -> None:
+        super().__init__()
+        self.path = path
+        self.image_filenames = sorted((path / "Kvasir-SEG" / "images").glob("*.jpg"))
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.image_filenames)
+
+    def __getitem__(self, index) -> Any:
+        filename = self.image_filenames[index].name
+        image_filename = (self.path / "Kvasir-SEG" / "images" / filename).resolve()
+        mask_filename = (self.path / "Kvasir-SEG" / "masks" / filename).resolve()
+        image = Image.open(image_filename).convert("RGB")
+        mask = Image.open(mask_filename).convert("L")
+        bbox = image.getbbox()
+        image = image.crop(bbox)
+        mask = mask.crop(bbox)
+        image_arr = np.asarray(image, dtype=np.float32) / 255.0
+        mask_arr = np.asarray(mask, dtype=np.float32) / 255.0
+        sample = {"image": image_arr, "mask": mask_arr}
+        sample = self.transform(**sample)
+        return sample["image"], sample["mask"]
diff --git a/tasks/segmentation_kvasir_seg/eval_segmentation_kvasir_seg.py b/tasks/segmentation_kvasir_seg/eval_segmentation_kvasir_seg.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+import os
+import sys
+from pathlib import Path
+
+root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
+sys.path.append(root_dir)
+
+from ncalab import (
+    Animator,
+    SegmentationNCAModel,
+    CascadeNCA,
+    get_compute_device,
+    print_NCALab_banner,
+    fix_random_seed
+)
+
+from download_kvasir_seg import KVASIR_SEG_PATH  # type: ignore[import-untyped]
+from dataset_kvasir_seg import KvasirSegDataset
+
+import albumentations as A  # type: ignore[import-untyped]
+from albumentations.pytorch import ToTensorV2  # type: ignore[import-untyped]
+import click
+
+import torch
+
+
+TASK_PATH = Path(__file__).parent
+FIGURE_PATH = TASK_PATH / "figures"
+FIGURE_PATH.mkdir(exist_ok=True)
+WEIGHTS_PATH = TASK_PATH / "weights"
+WEIGHTS_PATH.mkdir(exist_ok=True)
+
+
+@click.command()
+@click.option("--hidden-channels", "-H", default=18, type=int)
+@click.option(
+    "--gpu/--no-gpu", is_flag=True, default=True, help="Try using the GPU if available."
+)
+@click.option(
+    "--gpu-index", type=int, default=0, help="Index of GPU to use, if --gpu in use."
+)
+def eval_segmentation_kvasir_seg(hidden_channels: int, gpu: bool, gpu_index: int):
+    print_NCALab_banner()
+    fix_random_seed()
+
+    device = get_compute_device(f"cuda:{gpu_index}" if gpu else "cpu")
+
+    nca = SegmentationNCAModel(
+        device,
+        num_image_channels=3,
+        num_hidden_channels=hidden_channels,
+        num_classes=1,
+        pad_noise=True,
+        fire_rate=0.8,
+    )
+    cascade = CascadeNCA(nca, [8, 4, 2, 1], [70, 20, 10, 5])
+
+    T = A.Compose(
+        [
+            A.RandomCrop(300, 300),
+            A.Resize(256, 256),
+            A.RandomRotate90(),
+            A.HorizontalFlip(),
+            ToTensorV2(),
+        ]
+    )
+    dataset = KvasirSegDataset(KVASIR_SEG_PATH, transform=T)
+
+    cascade.load_state_dict(
+        torch.load(
+            WEIGHTS_PATH / "segmentation_kvasir_seg" / "last_model.pth",
+            weights_only=True,
+        )
+    )
+
+    seed = dataset[0][0].unsqueeze(0).to(device)
+    animator = Animator(cascade, seed, overlay=True)
+
+    out_path = FIGURE_PATH / "segmentation_kvasir_seg.gif"
+    animator.save(out_path)
+    click.secho(f"Done. You'll find the generated GIF in {out_path} .")
+
+
+if __name__ == "__main__":
+    eval_segmentation_kvasir_seg()
diff --git a/tasks/segmentation_kvasir_seg/train_segmentation_kvasir_seg.py b/tasks/segmentation_kvasir_seg/train_segmentation_kvasir_seg.py
@@ -5,64 +5,44 @@
 root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
 sys.path.append(root_dir)
 
-from pathlib import Path, PosixPath
-from typing import Any
+from pathlib import Path
+
 
 from ncalab import (
     SegmentationNCAModel,
     CascadeNCA,
     BasicNCATrainer,
     get_compute_device,
     print_mascot,
+    print_NCALab_banner,
+    fix_random_seed,
 )
 
 from download_kvasir_seg import download_and_extract, KVASIR_SEG_PATH  # type: ignore[import-untyped]
+from dataset_kvasir_seg import KvasirSegDataset
 
 import albumentations as A  # type: ignore[import-untyped]
 from albumentations.pytorch import ToTensorV2  # type: ignore[import-untyped]
 import click
-import numpy as np
-from PIL import Image
+
 from sklearn.model_selection import train_test_split  # type: ignore[import-untyped]
 import torch
 from torch.utils.tensorboard import SummaryWriter
-from torch.utils.data import Dataset, Subset
+from torch.utils.data import Subset
+
 
 TASK_PATH = Path(__file__).parent.resolve()
 WEIGHTS_PATH = TASK_PATH / "weights"
 WEIGHTS_PATH.mkdir(exist_ok=True)
 
 
-class KvasirSegDataset(Dataset):
-    def __init__(self, path: Path | PosixPath, transform) -> None:
-        super().__init__()
-        self.path = path
-        self.image_filenames = sorted((path / "Kvasir-SEG" / "images").glob("*.jpg"))
-        self.transform = transform
-
-    def __len__(self):
-        return len(self.image_filenames)
-
-    def __getitem__(self, index) -> Any:
-        filename = self.image_filenames[index].name
-        image_filename = (self.path / "Kvasir-SEG" / "images" / filename).resolve()
-        mask_filename = (self.path / "Kvasir-SEG" / "masks" / filename).resolve()
-        image = Image.open(image_filename).convert("RGB")
-        mask = Image.open(mask_filename).convert("L")
-        bbox = image.getbbox()
-        image = image.crop(bbox)
-        mask = mask.crop(bbox)
-        image_arr = np.asarray(image, dtype=np.float32) / 255.0
-        mask_arr = np.asarray(mask, dtype=np.float32) / 255.0
-        sample = {"image": image_arr, "mask": mask_arr}
-        sample = self.transform(**sample)
-        return sample["image"], sample["mask"]
-
-
-def train_segmentation_kvasir_seg(batch_size: int, hidden_channels: int):
+def train_segmentation_kvasir_seg(
+    batch_size: int, hidden_channels: int, gpu: bool, gpu_index: int
+):
     writer = SummaryWriter(comment="Segmentation Kvasir-SEG")
-
-    device = get_compute_device("cuda:0")
+    print_NCALab_banner()
+    fix_random_seed()
+    device = get_compute_device(f"cuda:{gpu_index}" if gpu else "cpu")
 
     nca = SegmentationNCAModel(
         device,
@@ -114,7 +94,13 @@ def train_segmentation_kvasir_seg(batch_size: int, hidden_channels: int):
 @click.command()
 @click.option("--batch-size", "-b", default=8, type=int)
 @click.option("--hidden-channels", "-H", default=18, type=int)
-def main(batch_size, hidden_channels):
+@click.option(
+    "--gpu/--no-gpu", is_flag=True, default=True, help="Try using the GPU if available."
+)
+@click.option(
+    "--gpu-index", type=int, default=0, help="Index of GPU to use, if --gpu in use."
+)
+def main(batch_size, hidden_channels, gpu, gpu_index):
     print_mascot(
         "You're training NCAs on a medical dataset now.\n"
         "\n"
@@ -129,7 +115,10 @@ def main(batch_size, hidden_channels):
         download_and_extract()
 
     train_segmentation_kvasir_seg(
-        batch_size=batch_size, hidden_channels=hidden_channels
+        batch_size=batch_size,
+        hidden_channels=hidden_channels,
+        gpu=gpu,
+        gpu_index=gpu_index,
     )