mehta-lab · edyoshikun · Apr 21, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 23, 2025
diff --git a/applications/DynaCell/README.md b/applications/DynaCell/README.md
diff --git a/applications/DynaCell/demo_script.py b/applications/DynaCell/demo_script.py
@@ -0,0 +1,178 @@
+"""
+This script is a demo script for the DynaCell application.
+It loads the ome-zarr 0.4v format, calculates metrics and saves the results as csv files
-It loads the ome-zarr 0.4v format, calculates metrics and saves the results as csv files
+It loads the ome-zarr v0.4 format, calculates metrics and saves the results as csv files
-It loads the ome-zarr 0.4v format, calculates metrics and saves the results as csv files
+It loads the ome-zarr v0.4 format, calculates metrics and saves the results as csv files
+"""
+
+import datetime
+import tempfile
+from pathlib import Path
+from typing import Literal
+
+import pandas as pd
+import torch
+from lightning.pytorch.loggers import CSVLogger
+
+from viscy.data.dynacell import DynaCellDataBase, DynaCellDataModule
+from viscy.trainer import Trainer
+from viscy.translation.evaluation import IntensityMetrics, SegmentationMetrics
+
+# Set float32 matmul precision for better performance on Tensor Cores
+torch.set_float32_matmul_precision("high")
+
+csv_database_path = Path(
+    "/home/eduardo.hirata/repos/viscy/applications/DynaCell/dynacell_summary_table.csv"
+).expanduser()
+tmp_path = Path("/home/eduardo.hirata/repos/viscy/applications/DynaCell/demo_metrics")
+tmp_path.mkdir(parents=True, exist_ok=True)
+
+
+def main(
+    method: Literal["segmentation2D", "segmentation3D", "intensity"] = "intensity",
+    use_z_slice_range: bool = False,
+):
+    """
+    Run DynaCell metrics computation.
+
+    Parameters
+    ----------
+    method : Literal["segmentation2D", "segmentation3D", "intensity"], optional
+        Type of metrics to compute, by default "intensity"
+    use_z_slice_range : bool, optional
+        Whether to use a z-slice range instead of a single slice, by default False
+    """
+    # Generate timestamp for unique versioning
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+
+    # Set z_slice based on whether to use range or single slice
+    z_slice_value = slice(15, 17) if use_z_slice_range else 16
+
+    # Create target database
+    target_db = DynaCellDataBase(
+        database_path=csv_database_path,
+        cell_types=["HEK293T"],
+        organelles=["HIST2H2BE"],
+        infection_conditions=["Mock"],
+        channel_name="Organelle",
+        z_slice=z_slice_value,
+    )
+
+    if method == "segmentation2D":
+        # For segmentation, use same channel for pred and target (self-comparison)
+        pred_db = DynaCellDataBase(
+            database_path=csv_database_path,
+            cell_types=["HEK293T"],
+            organelles=["HIST2H2BE"],
+            infection_conditions=["Mock"],
+            channel_name="Organelle",
+            z_slice=z_slice_value,
+        )
+
+        # Create data module with both databases
+        dm = DynaCellDataModule(
+            target_database=target_db,
+            pred_database=pred_db,
+            batch_size=1,
+            num_workers=0,
+        )
+        dm.setup(stage="test")
+
+        # Print a sample to verify metadata
+        sample = next(iter(dm.test_dataloader()))
+        print(f"Sample keys: {sample.keys()}")
+        print(f"Cell type: {sample['cell_type']}")
+        print(f"Organelle: {sample['organelle']}")
+        print(f"Infection condition: {sample['infection_condition']}")
+
+        # Run segmentation metrics
+        lm = SegmentationMetrics()
+        # Use the method name and timestamp for unique identification
+        name = f"segmentation_{timestamp}"
+        version = "1"
+
+        output_dir = tmp_path / "segmentation"
+        output_dir.mkdir(exist_ok=True)
+
+        # Use the CSVLogger without version (we'll use our own naming)
+        logger = CSVLogger(save_dir=output_dir, name=name, version=version)
+        trainer = Trainer(logger=logger)
+        trainer.test(lm, datamodule=dm)
+
+        # Find the metrics file - use the correct relative pattern
+        metrics_file = output_dir / name / version / "metrics.csv"
+        if metrics_file.exists():
+            metrics = pd.read_csv(metrics_file)
+            print(f"Segmentation metrics saved to: {metrics_file}")
+            print(f"Segmentation metrics columns: {metrics.columns.tolist()}")
+        else:
+            print(f"Warning: Metrics file not found at {metrics_file}")
+            metrics = None
+
+        return metrics
+
+    elif method == "segmentation3D":
+        raise NotImplementedError("Segmentation3D is not implemented yet")
+
+    elif method == "intensity":
+        # For intensity comparison, use the same channel to compare to itself
+        pred_db = DynaCellDataBase(
+            database_path=csv_database_path,
+            cell_types=["HEK293T"],
+            organelles=["HIST2H2BE"],
+            infection_conditions=["Mock"],
+            channel_name="Organelle",
+            z_slice=z_slice_value,
+        )
+
+        # Create data module with both databases
+        dm = DynaCellDataModule(
+            target_database=target_db,
+            pred_database=pred_db,
+            batch_size=1,
+            num_workers=0,
+        )
+        dm.setup(stage="test")
+
+        # Print a sample to verify metadata
+        sample = next(iter(dm.test_dataloader()))
+        print(f"Sample keys: {sample.keys()}")
+        print(f"Cell type: {sample['cell_type']}")
+        print(f"Organelle: {sample['organelle']}")
+        print(f"Infection condition: {sample['infection_condition']}")
+
+        # Run intensity metrics
+        lm = IntensityMetrics()
+        # Indicate whether z-slice range was used in the name
+        range_suffix = "_range" if use_z_slice_range else ""
+        name = f"intensity{range_suffix}_{timestamp}"
+        version = "1"
+
+        output_dir = tmp_path / "intensity"
+        output_dir.mkdir(exist_ok=True)
+
+        # Use the CSVLogger without version (we'll use our own naming)
+        logger = CSVLogger(save_dir=output_dir, name=name, version=version)
+        trainer = Trainer(logger=logger)
+        trainer.test(lm, datamodule=dm)
+
+        # Find the metrics file - use the correct relative pattern
+        metrics_file = output_dir / name / version / "metrics.csv"
+        if metrics_file.exists():
+            metrics = pd.read_csv(metrics_file)
+            print(f"Intensity metrics saved to: {metrics_file}")
+            print(f"Intensity metrics columns: {metrics.columns.tolist()}")
+        else:
+            print(f"Warning: Metrics file not found at {metrics_file}")
+            metrics = None
+
+        return metrics
+    else:
+        raise ValueError(f"Invalid method: {method}")
+
+
+# %%
+if __name__ == "__main__":
+    # print("Running intensity metrics with single z-slice...")
+    # intensity_metrics = main("intensity", use_z_slice_range=False)
+
+    print("\nRunning intensity metrics with z-slice range...")
+    intensity_metrics_range = main("intensity", use_z_slice_range=True)
diff --git a/docs/usage.md b/docs/usage.md
@@ -84,3 +84,49 @@ requires an exclusive node on HPC OR a non-distributed system (e.g. a PC).
 with a valid `config.yaml` in order to be initialized.
 This can be "hacked" by locating the config in a directory
 called `checkpoints` beneath a valid config's directory.
+
+## DynaCell Metrics
+
+Compute metrics on DynaCell datasets using the `compute_dynacell_metrics` command:
+
+```sh
+viscy compute_dynacell_metrics -c config.yaml
+```
+
+### Configuration File Format
+
+Example configuration file:
+
+```yaml
+# Required parameters
+target_database: /path/to/target_database.csv
+pred_database: /path/to/prediction_database.csv
+output_dir: ./metrics_output
+method: intensity  # Options: 'intensity' or 'segmentation2D'
+
+# Optional parameters
+target_channel: Organelle
+pred_channel: Organelle
+# Z-slice options:
+# - Single integer (e.g., 16): Use specific z-slice
+# - List of two integers [start, end] (e.g., [15, 17]): Use range of z-slices
+# - -1: Use all available z-slices
+target_z_slice: 16  
+pred_z_slice: 16
+# You can also specify a range of z-slices:
+# target_z_slice: [15, 17]  # Use z-slices from 15 to 16 (exclusive of 17)
+# pred_z_slice: [15, 17]    # Use z-slices from 15 to 16 (exclusive of 17)
+target_cell_types: [HEK293T]  # or leave empty [] for all available
+target_organelles: [HIST2H2BE]
+target_infection_conditions: [Mock]
+pred_cell_types: [HEK293T]
+pred_organelles: [HIST2H2BE]
+pred_infection_conditions: [Mock]
+batch_size: 1
+num_workers: 0
+version: "1"
+```
+
+If cell types, organelles, or infection conditions are not specified or left empty, all available values from the respective database will be used.
+
+Using a z-slice range (e.g., `[15, 17]`) can be particularly useful for computing metrics on multiple consecutive z-slices, which is beneficial for 3D analysis or when working with volumes where the structures of interest span multiple z-slices.
diff --git a/examples/configs/dynacell_metrics_example.yml b/examples/configs/dynacell_metrics_example.yml
@@ -0,0 +1,34 @@
+# Example configuration for DynaCell metrics computation
+
+# Required parameters
+target_database: /path/to/target_database.csv
+pred_database: /path/to/prediction_database.csv
+output_dir: ./metrics_output
+method: intensity  # Options: 'intensity' or 'segmentation2D'
+
+# Target dataset parameters
+target_channel: Organelle
+# Z-slice can be a single integer (e.g., 16) or a range specified as a list of two integers [start, end] (e.g., [15, 17])
+target_z_slice: 16  # Use -1 for all slices, or a list like [15, 17] for a range
+target_cell_types: 
+  - HEK293T
+target_organelles:
+  - HIST2H2BE  
+target_infection_conditions:
+  - Mock
+
+# Prediction dataset parameters
+pred_channel: Organelle
+# Z-slice can be a single integer (e.g., 16) or a range specified as a list of two integers [start, end] (e.g., [15, 17])
+pred_z_slice: 16  # Use -1 for all slices, or a list like [15, 17] for a range
+pred_cell_types:
+  - HEK293T
+pred_organelles: 
+  - HIST2H2BE
+pred_infection_conditions:
+  - Mock
+
+# Processing parameters
+batch_size: 1
+num_workers: 0
+version: "1"
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,6 +41,7 @@ metrics = [
     "umap-learn",
     "captum>=0.7.0",
     "phate",
+    "panoptica",
 ]
 examples = ["napari", "jupyter", "jupytext"]
 visual = [
@@ -77,5 +78,9 @@ line-length = 88
 
 [tool.ruff]
 src = ["viscy", "tests"]
-lint.extend-select = ["I001"]
-lint.isort.known-first-party = ["viscy"]
+
+[tool.ruff.lint]
+extend-select = ["I001"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["viscy"]
diff --git a/tests/translation/test_evaluation.py b/tests/translation/test_evaluation.py
@@ -4,14 +4,14 @@
 from lightning.pytorch.loggers import CSVLogger
 from numpy.testing import assert_array_equal
 
-from viscy.data.segmentation import SegmentationDataModule
+from viscy.data.segmentation import TargetPredictionDataModule
 from viscy.trainer import Trainer
 from viscy.translation.evaluation import SegmentationMetrics2D
 
 
 @pytest.mark.parametrize("pred_channel", ["DAPI", "GFP"])
 def test_segmentation_metrics_2d(pred_channel, labels_hcs_dataset, tmp_path) -> None:
-    dm = SegmentationDataModule(
+    dm = TargetPredictionDataModule(
         pred_dataset=labels_hcs_dataset,
         target_dataset=labels_hcs_dataset,
         target_channel="DAPI",

diff --git a/viscy/cli.py b/viscy/cli.py
@@ -2,6 +2,7 @@
 import os
 import sys
 from datetime import datetime
+from pathlib import Path
 
 import torch
 from jsonargparse import lazy_instance
@@ -22,6 +23,7 @@ def subcommands() -> dict[str, set[str]]:
         subcommands["preprocess"] = subcommand_base_args
         subcommands["export"] = subcommand_base_args
         subcommands["precompute"] = subcommand_base_args
+        subcommands["compute_dynacell_metrics"] = subcommand_base_args
         return subcommands
 
     def add_arguments_to_parser(self, parser) -> None:
@@ -51,8 +53,15 @@ def main() -> None:
     Set default random seed to 42.
     """
     _setup_environment()
-    require_model = {"preprocess", "precompute"}.isdisjoint(sys.argv)
-    require_data = {"preprocess", "precompute", "export"}.isdisjoint(sys.argv)
+    require_model = {"preprocess", "precompute", "compute_dynacell_metrics"}.isdisjoint(
+        sys.argv
+    )
+    require_data = {
+        "preprocess",
+        "precompute",
+        "export",
+        "compute_dynacell_metrics",
+    }.isdisjoint(sys.argv)
     _ = VisCyCLI(
         model_class=LightningModule,
         datamodule_class=LightningDataModule if require_data else None,