mathpluscode
diff --git a/‎cinema/log.py‎
Lines changed: 3 additions & 2 deletions b/‎cinema/log.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎cinema/mae/mae_test.py‎
Lines changed: 1 addition & 1 deletion b/‎cinema/mae/mae_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cinema/segmentation/train_test.py‎
Lines changed: 1 addition & 1 deletion b/‎cinema/segmentation/train_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/inference/classification_cvd.py‎
Lines changed: 13 additions & 6 deletions b/‎examples/inference/classification_cvd.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎examples/inference/classification_sex.py‎
Lines changed: 13 additions & 6 deletions b/‎examples/inference/classification_sex.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎examples/inference/classification_vendor.py‎
Lines changed: 13 additions & 6 deletions b/‎examples/inference/classification_vendor.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎examples/inference/landmark_coordinate.py‎
Lines changed: 12 additions & 5 deletions b/‎examples/inference/landmark_coordinate.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎examples/inference/landmark_heatmap.py‎
Lines changed: 12 additions & 5 deletions b/‎examples/inference/landmark_heatmap.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎examples/inference/mae.py‎
Lines changed: 18 additions & 11 deletions b/‎examples/inference/mae.py‎
Lines changed: 18 additions & 11 deletions
@@ -4,7 +4,6 @@
 import sys
 from pathlib import Path
 
-import wandb
 from omegaconf import DictConfig, OmegaConf
 
 
@@ -43,7 +42,7 @@ def flatten_dict(d: dict, parent_key: str = "", sep: str = "_") -> dict:  # type
     return dict(items)
 
 
-def init_wandb(config: DictConfig, tags: list[str]) -> tuple[wandb.sdk.wandb_run.Run | None, Path]:
+def init_wandb(config: DictConfig, tags: list[str]) -> tuple:  # type:ignore[type-arg]
     """Initialize wandb.
 
     Args:
@@ -54,6 +53,8 @@ def init_wandb(config: DictConfig, tags: list[str]) -> tuple[wandb.sdk.wandb_run
         wandb run and checkpoint directory.
     """
     if config.logging.wandb.project:
+        import wandb  # lazy import
+
         wandb_run = wandb.init(
             project=config.logging.wandb.project,
             entity=config.logging.wandb.entity,
 
@@ -125,7 +125,7 @@ def test_conv_mae_size(
     # value can be nan if target is empty
     # this is unlikely to happen with large mask_ratio
     if min(ns_masked) > 0:
-        assert not np.isnan(loss.detach().numpy())
+        assert not np.isnan(loss.detach().cpu().numpy())
         for v in metrics.values():
             assert not np.isnan(v.detach())
             assert v.shape == ()
@@ -105,7 +105,7 @@ def test_segmentation_eval_metrics(
 
     metrics = segmentation_metrics(logits, labels, spacing)
     for v in metrics.values():
-        assert not np.any(np.isnan(v.detach().numpy()))
+        assert not np.any(np.isnan(v.detach().cpu().numpy()))
         assert v.shape == (batch,)
 
     # ensure inputs are not modified
 
@@ -12,7 +12,7 @@
 from cinema import ConvViT
 
 
-def run(trained_dataset: str, view: str, seed: int) -> None:
+def run(trained_dataset: str, view: str, seed: int, device: torch.device, dtype: torch.dtype) -> None:
     """Run CVD classification using fine-tuned checkpoint."""
     # load config to get class names
     config_path = hf_hub_download(
@@ -28,13 +28,14 @@ def run(trained_dataset: str, view: str, seed: int) -> None:
         model_filename=f"finetuned/classification_cvd/{trained_dataset}_{view}/{trained_dataset}_{view}_{seed}.safetensors",
         config_filename=f"finetuned/classification_cvd/{trained_dataset}_{view}/config.yaml",
     )
+    model.to(device)
 
     # load sample data from mnms2 of class HCM and form a batch of size 1
     spatial_size = (192, 192, 16) if view == "sax" else (256, 256)
     transform = Compose(
         [
             ScaleIntensityd(keys=view),
-            SpatialPadd(keys=view, spatial_size=spatial_size, method="end", lazy=True, allow_missing_keys=True),
+            SpatialPadd(keys=view, spatial_size=spatial_size, method="end"),
         ]
     )
     exp_dir = Path(__file__).parent.parent.resolve()
@@ -43,9 +44,9 @@ def run(trained_dataset: str, view: str, seed: int) -> None:
     image = np.stack([ed_image, es_image], axis=0)  # (2, x, y, 1) or (2, x, y, z)
     if view != "sax":
         image = image[..., 0]  # (2, x, y, 1) -> (2, x, y)
-    batch = transform({view: torch.from_numpy(image).to(dtype=torch.float32)})
-    batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
-    with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+    batch = transform({view: torch.from_numpy(image)})
+    batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
+    with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
         logits = model(batch)  # (1, n_classes)
     probs = torch.softmax(logits, dim=1)[0]  # (n_classes,)
     probs_dict = dict(zip(classes, probs.cpu().numpy(), strict=False))
@@ -55,10 +56,16 @@ def run(trained_dataset: str, view: str, seed: int) -> None:
 
 
 if __name__ == "__main__":
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
     for trained_dataset, view in zip(
         ["acdc", "mnms", "mnms2", "mnms2"],
         ["sax", "sax", "sax", "lax_4c"],
         strict=False,
     ):
         for seed in range(3):
-            run(trained_dataset, view, seed)
+            run(trained_dataset, view, seed, device, dtype)
@@ -12,7 +12,7 @@
 from cinema import ConvViT
 
 
-def run(seed: int) -> None:
+def run(seed: int, device: torch.device, dtype: torch.dtype) -> None:
     """Run sex classification using fine-tuned checkpoint."""
     trained_dataset, view = "mnms", "sax"
     # load config to get class names
@@ -29,13 +29,14 @@ def run(seed: int) -> None:
         model_filename=f"finetuned/classification_sex/{trained_dataset}_{view}/{trained_dataset}_{view}_{seed}.safetensors",
         config_filename=f"finetuned/classification_sex/{trained_dataset}_{view}/config.yaml",
     )
+    model.to(device)
 
     # load sample data from mnms2 of class HCM and form a batch of size 1
     spatial_size = (192, 192, 16) if view == "sax" else (256, 256)
     transform = Compose(
         [
             ScaleIntensityd(keys=view),
-            SpatialPadd(keys=view, spatial_size=spatial_size, method="end", lazy=True, allow_missing_keys=True),
+            SpatialPadd(keys=view, spatial_size=spatial_size, method="end"),
         ]
     )
     exp_dir = Path(__file__).parent.parent.resolve()
@@ -44,9 +45,9 @@ def run(seed: int) -> None:
     image = np.stack([ed_image, es_image], axis=0)  # (2, x, y, 1) or (2, x, y, z)
     if view != "sax":
         image = image[..., 0]  # (2, x, y, 1) -> (2, x, y)
-    batch = transform({view: torch.from_numpy(image).to(dtype=torch.float32)})
-    batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
-    with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+    batch = transform({view: torch.from_numpy(image)})
+    batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
+    with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
         logits = model(batch)  # (1, n_classes)
     probs = torch.softmax(logits, dim=1)[0]  # (n_classes,)
     probs_dict = dict(zip(classes, probs.cpu().numpy(), strict=False))
@@ -56,5 +57,11 @@ def run(seed: int) -> None:
 
 
 if __name__ == "__main__":
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
     for seed in range(3):
-        run(seed)
+        run(seed, device, dtype)
@@ -12,7 +12,7 @@
 from cinema import ConvViT
 
 
-def run(view: str, seed: int) -> None:
+def run(view: str, seed: int, device: torch.device, dtype: torch.dtype) -> None:
     """Run vendor classification using fine-tuned checkpoint."""
     trained_dataset = "mnms2"
     # load config to get class names
@@ -29,13 +29,14 @@ def run(view: str, seed: int) -> None:
         model_filename=f"finetuned/classification_vendor/{trained_dataset}_{view}/{trained_dataset}_{view}_{seed}.safetensors",
         config_filename=f"finetuned/classification_vendor/{trained_dataset}_{view}/config.yaml",
     )
+    model.to(device)
 
     # load sample data from mnms2 of class HCM and form a batch of size 1
     spatial_size = (192, 192, 16) if view == "sax" else (256, 256)
     transform = Compose(
         [
             ScaleIntensityd(keys=view),
-            SpatialPadd(keys=view, spatial_size=spatial_size, method="end", lazy=True, allow_missing_keys=True),
+            SpatialPadd(keys=view, spatial_size=spatial_size, method="end"),
         ]
     )
     exp_dir = Path(__file__).parent.parent.resolve()
@@ -44,9 +45,9 @@ def run(view: str, seed: int) -> None:
     image = np.stack([ed_image, es_image], axis=0)  # (2, x, y, 1) or (2, x, y, z)
     if view != "sax":
         image = image[..., 0]  # (2, x, y, 1) -> (2, x, y)
-    batch = transform({view: torch.from_numpy(image).to(dtype=torch.float32)})
-    batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
-    with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+    batch = transform({view: torch.from_numpy(image)})
+    batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
+    with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
         logits = model(batch)  # (1, n_classes)
     probs = torch.softmax(logits, dim=1)[0]  # (n_classes,)
     probs_dict = dict(zip(classes, probs.cpu().numpy(), strict=False))
@@ -56,6 +57,12 @@ def run(view: str, seed: int) -> None:
 
 
 if __name__ == "__main__":
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
     for view in ["sax", "lax_4c"]:
         for seed in range(3):
-            run(view, seed)
+            run(view, seed, device, dtype)
@@ -12,14 +12,15 @@
 from cinema import ConvViT
 
 
-def run(view: str, seed: int) -> None:
+def run(view: str, seed: int, device: torch.device, dtype: torch.dtype) -> None:
     """Run landmark localization on LAX images using fine-tuned checkpoint."""
     # load model
     model = ConvViT.from_finetuned(
         repo_id="mathpluscode/CineMA",
         model_filename=f"finetuned/landmark_coordinate/{view}/{view}_{seed}.safetensors",
         config_filename=f"finetuned/landmark_coordinate/{view}/config.yaml",
     )
+    model.to(device)
 
     # load sample data and form a batch of size 1
     transform = ScaleIntensityd(keys=view)
@@ -31,9 +32,9 @@ def run(view: str, seed: int) -> None:
     preds_list = []
     lv_lengths = []
     for t in tqdm(range(n_frames), total=n_frames):
-        batch = transform({view: torch.from_numpy(images[None, ..., 0, t]).to(dtype=torch.float32)})
-        batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
-        with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+        batch = transform({view: torch.from_numpy(images[None, ..., 0, t])})
+        batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
+        with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
             coords = model(batch)[0].numpy()  # (6,)
         coords *= np.array([w, h, w, h, w, h])
         coords = [int(x) for x in coords]
@@ -85,6 +86,12 @@ def run(view: str, seed: int) -> None:
 
 
 if __name__ == "__main__":
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
     for view in ["lax_2c", "lax_4c"]:
         for seed in range(3):
-            run(view, seed)
+            run(view, seed, device, dtype)
@@ -12,14 +12,15 @@
 from cinema import ConvUNetR, heatmap_soft_argmax
 
 
-def run(view: str, seed: int) -> None:
+def run(view: str, seed: int, device: torch.device, dtype: torch.dtype) -> None:
     """Run landmark localization on LAX images using fine-tuned checkpoint."""
     # load model
     model = ConvUNetR.from_finetuned(
         repo_id="mathpluscode/CineMA",
         model_filename=f"finetuned/landmark_heatmap/{view}/{view}_{seed}.safetensors",
         config_filename=f"finetuned/landmark_heatmap/{view}/config.yaml",
     )
+    model.to(device)
 
     # load sample data and form a batch of size 1
     transform = ScaleIntensityd(keys=view)
@@ -32,9 +33,9 @@ def run(view: str, seed: int) -> None:
     preds_list = []
     lv_lengths = []
     for t in tqdm(range(n_frames), total=n_frames):
-        batch = transform({view: torch.from_numpy(images[None, ..., 0, t]).to(dtype=torch.float32)})
-        batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
-        with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+        batch = transform({view: torch.from_numpy(images[None, ..., 0, t])})
+        batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
+        with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
             logits = model(batch)[view]  # (1, 3, x, y)
         probs = torch.sigmoid(logits)  # (1, 3, width, height)
         probs_list.append(probs[0].detach().cpu().numpy())
@@ -106,6 +107,12 @@ def run(view: str, seed: int) -> None:
 
 
 if __name__ == "__main__":
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
     for view in ["lax_2c", "lax_4c"]:
         for seed in range(3):
-            run(view, seed)
+            run(view, seed, device, dtype)
@@ -11,17 +11,18 @@
 from cinema import CineMA, patchify, unpatchify
 
 
-def run() -> None:
+def run(device: torch.device, dtype: torch.dtype) -> None:
     """Run MAE reconstruction."""
     # load model
     model = CineMA.from_pretrained()
+    model.to(device)
     model.eval()
 
     # load sample data and form a batch of size 1
     transform = Compose(
         [
             ScaleIntensityd(keys=("sax", "lax_2c", "lax_3c", "lax_4c"), allow_missing_keys=True),
-            SpatialPadd(keys="sax", spatial_size=(192, 192, 16), method="end", lazy=True, allow_missing_keys=True),
+            SpatialPadd(keys="sax", spatial_size=(192, 192, 16), method="end"),
             SpatialPadd(
                 keys=("lax_2c", "lax_3c", "lax_4c"),
                 spatial_size=(256, 256),
@@ -47,17 +48,17 @@ def run() -> None:
     )
     t = 25  # which time frame to use
     batch = {
-        "sax": sax_image[None, ..., t].to(dtype=torch.float32),
-        "lax_2c": lax_2c_image[None, ..., 0, t].to(dtype=torch.float32),
-        "lax_3c": lax_3c_image[None, ..., 0, t].to(dtype=torch.float32),
-        "lax_4c": lax_4c_image[None, ..., 0, t].to(dtype=torch.float32),
+        "sax": sax_image[None, ..., t],
+        "lax_2c": lax_2c_image[None, ..., 0, t],
+        "lax_3c": lax_3c_image[None, ..., 0, t],
+        "lax_4c": lax_4c_image[None, ..., 0, t],
     }
     batch = transform(batch)
     print(f"SAX view had originally {sax_image.shape[-2]} slices, now zero-padded to {batch['sax'].shape[-1]} slices.")  # noqa: T201
-    batch = {k: v[None, ...] for k, v in batch.items()}  # batch size 1
+    batch = {k: v[None, ...].to(device=device, dtype=dtype) for k, v in batch.items()}
 
     # forward
-    with torch.no_grad(), torch.autocast("cuda", enabled=torch.cuda.is_available()):
+    with torch.no_grad(), torch.autocast("cuda", dtype=dtype, enabled=torch.cuda.is_available()):
         _, pred_dict, enc_mask_dict, _ = model(batch, enc_mask_ratio=0.75)
 
     # visualize
@@ -76,8 +77,8 @@ def run() -> None:
             patch_size=model.dec_patch_size_dict[view],
             grid_size=model.enc_down_dict[view].patch_embed.grid_size,
         )
-        reconstructed = reconstructed[0, 0].detach().numpy()
-        image = batch[view][0, 0].detach().numpy()
+        reconstructed = reconstructed[0, 0].detach().cpu().numpy()
+        image = batch[view][0, 0].detach().cpu().numpy()
         error = np.abs(reconstructed - image)
 
         if view == "sax":
@@ -104,4 +105,10 @@ def run() -> None:
 
 
 if __name__ == "__main__":
-    run()
+    dtype, device = torch.float32, torch.device("cpu")
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        if torch.cuda.is_bf16_supported():
+            dtype = torch.bfloat16
+
+    run(device, dtype)