Skip to content
Draft
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
96f266f
protype of metrics and files to modify
edyoshikun Apr 21, 2025
3aa3ddf
fix ruff settings
ieivanov Apr 22, 2025
d5394d3
add panoptica to deps
ieivanov Apr 22, 2025
93524ea
remove metrics module and add optional dtype casting to SegmentationD…
ieivanov Apr 23, 2025
71eb7ed
rename Segmentation to TargetPrediction modules
ieivanov Apr 23, 2025
5275349
add dynacell data module draft
ieivanov Apr 23, 2025
56a0119
dynacell dataloader WIP
ieivanov Apr 24, 2025
10a33c7
style
ieivanov Apr 24, 2025
c13914f
style
ieivanov Apr 25, 2025
48df3de
debug
ieivanov Apr 25, 2025
620e368
splitting the logic for computing metrics by accepting two databases …
edyoshikun May 8, 2025
149d36e
fix demo
edyoshikun May 8, 2025
bc93f89
CLI prototype to compute metrics
edyoshikun May 8, 2025
d3c3d8a
support z-slice 3d via list to slice object conversion
edyoshikun May 8, 2025
b7141d9
allow for independent target and prediction databases
ieivanov Jul 18, 2025
4f2e13a
refactor demo script
ieivanov Jul 18, 2025
9bc4f49
rename demo script
ieivanov Jul 18, 2025
e9ab7a1
docs
ieivanov Jul 18, 2025
db86e3c
test of specified positions
ieivanov Jul 18, 2025
5b1b683
fixing the trainer from 'auto' for resources to 'cpu' and limiting th…
edyoshikun Jul 23, 2025
ea4ed97
adding transforms to do normalization.
edyoshikun Jul 29, 2025
0e9dd7c
WIP
ieivanov Jul 31, 2025
7215723
bugfix - convert data to float32
ieivanov Jul 31, 2025
a10ac34
segment prototype. can be deleted later
edyoshikun Jul 31, 2025
ab8617b
add plotting
edyoshikun Jul 31, 2025
8da84e9
vs metrics v1
ieivanov Sep 8, 2025
411aeb6
compute metrics on multiple conditions at a time
ieivanov Sep 9, 2025
1de0525
use multiple workers
ieivanov Sep 9, 2025
dbd9b56
cleaner messaging
ieivanov Sep 9, 2025
9467a91
use gpu acceleration
ieivanov Sep 10, 2025
62d2703
add note on ssim data_range
ieivanov Sep 10, 2025
c80e4aa
ivan's VS metrics scripts
ieivanov Oct 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added applications/DynaCell/README.md
Empty file.
178 changes: 178 additions & 0 deletions applications/DynaCell/demo_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""
This script is a demo script for the DynaCell application.
It loads the ome-zarr 0.4v format, calculates metrics and saves the results as csv files
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
It loads the ome-zarr 0.4v format, calculates metrics and saves the results as csv files
It loads the ome-zarr v0.4 format, calculates metrics and saves the results as csv files

"""

import datetime
import tempfile
from pathlib import Path
from typing import Literal

import pandas as pd
import torch
from lightning.pytorch.loggers import CSVLogger

from viscy.data.dynacell import DynaCellDataBase, DynaCellDataModule
from viscy.trainer import Trainer
from viscy.translation.evaluation import IntensityMetrics, SegmentationMetrics

# Set float32 matmul precision for better performance on Tensor Cores
torch.set_float32_matmul_precision("high")

csv_database_path = Path(
"/home/eduardo.hirata/repos/viscy/applications/DynaCell/dynacell_summary_table.csv"
).expanduser()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Path.home() / "rel/path" is likely what you want.

tmp_path = Path("/home/eduardo.hirata/repos/viscy/applications/DynaCell/demo_metrics")
tmp_path.mkdir(parents=True, exist_ok=True)


def main(
method: Literal["segmentation2D", "segmentation3D", "intensity"] = "intensity",
use_z_slice_range: bool = False,
):
"""
Run DynaCell metrics computation.

Parameters
----------
method : Literal["segmentation2D", "segmentation3D", "intensity"], optional
Type of metrics to compute, by default "intensity"
use_z_slice_range : bool, optional
Whether to use a z-slice range instead of a single slice, by default False
"""
# Generate timestamp for unique versioning
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Set z_slice based on whether to use range or single slice
z_slice_value = slice(15, 17) if use_z_slice_range else 16

# Create target database
target_db = DynaCellDataBase(
database_path=csv_database_path,
cell_types=["HEK293T"],
organelles=["HIST2H2BE"],
infection_conditions=["Mock"],
channel_name="Organelle",
z_slice=z_slice_value,
)

if method == "segmentation2D":
# For segmentation, use same channel for pred and target (self-comparison)
pred_db = DynaCellDataBase(
database_path=csv_database_path,
cell_types=["HEK293T"],
organelles=["HIST2H2BE"],
infection_conditions=["Mock"],
channel_name="Organelle",
z_slice=z_slice_value,
)

# Create data module with both databases
dm = DynaCellDataModule(
target_database=target_db,
pred_database=pred_db,
batch_size=1,
num_workers=0,
)
dm.setup(stage="test")

# Print a sample to verify metadata
sample = next(iter(dm.test_dataloader()))
print(f"Sample keys: {sample.keys()}")
print(f"Cell type: {sample['cell_type']}")
print(f"Organelle: {sample['organelle']}")
print(f"Infection condition: {sample['infection_condition']}")

# Run segmentation metrics
lm = SegmentationMetrics()
# Use the method name and timestamp for unique identification
name = f"segmentation_{timestamp}"
version = "1"

output_dir = tmp_path / "segmentation"
output_dir.mkdir(exist_ok=True)

# Use the CSVLogger without version (we'll use our own naming)
logger = CSVLogger(save_dir=output_dir, name=name, version=version)
trainer = Trainer(logger=logger)
trainer.test(lm, datamodule=dm)

# Find the metrics file - use the correct relative pattern
metrics_file = output_dir / name / version / "metrics.csv"
if metrics_file.exists():
metrics = pd.read_csv(metrics_file)
print(f"Segmentation metrics saved to: {metrics_file}")
print(f"Segmentation metrics columns: {metrics.columns.tolist()}")
else:
print(f"Warning: Metrics file not found at {metrics_file}")
metrics = None

return metrics

elif method == "segmentation3D":
raise NotImplementedError("Segmentation3D is not implemented yet")

elif method == "intensity":
# For intensity comparison, use the same channel to compare to itself
pred_db = DynaCellDataBase(
database_path=csv_database_path,
cell_types=["HEK293T"],
organelles=["HIST2H2BE"],
infection_conditions=["Mock"],
channel_name="Organelle",
z_slice=z_slice_value,
)

# Create data module with both databases
dm = DynaCellDataModule(
target_database=target_db,
pred_database=pred_db,
batch_size=1,
num_workers=0,
)
dm.setup(stage="test")

# Print a sample to verify metadata
sample = next(iter(dm.test_dataloader()))
print(f"Sample keys: {sample.keys()}")
print(f"Cell type: {sample['cell_type']}")
print(f"Organelle: {sample['organelle']}")
print(f"Infection condition: {sample['infection_condition']}")

# Run intensity metrics
lm = IntensityMetrics()
# Indicate whether z-slice range was used in the name
range_suffix = "_range" if use_z_slice_range else ""
name = f"intensity{range_suffix}_{timestamp}"
version = "1"

output_dir = tmp_path / "intensity"
output_dir.mkdir(exist_ok=True)

# Use the CSVLogger without version (we'll use our own naming)
logger = CSVLogger(save_dir=output_dir, name=name, version=version)
trainer = Trainer(logger=logger)
trainer.test(lm, datamodule=dm)

# Find the metrics file - use the correct relative pattern
metrics_file = output_dir / name / version / "metrics.csv"
if metrics_file.exists():
metrics = pd.read_csv(metrics_file)
print(f"Intensity metrics saved to: {metrics_file}")
print(f"Intensity metrics columns: {metrics.columns.tolist()}")
else:
print(f"Warning: Metrics file not found at {metrics_file}")
metrics = None

return metrics
else:
raise ValueError(f"Invalid method: {method}")


# %%
if __name__ == "__main__":
# print("Running intensity metrics with single z-slice...")
# intensity_metrics = main("intensity", use_z_slice_range=False)

print("\nRunning intensity metrics with z-slice range...")
intensity_metrics_range = main("intensity", use_z_slice_range=True)
46 changes: 46 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,49 @@ requires an exclusive node on HPC OR a non-distributed system (e.g. a PC).
with a valid `config.yaml` in order to be initialized.
This can be "hacked" by locating the config in a directory
called `checkpoints` beneath a valid config's directory.

## DynaCell Metrics

Compute metrics on DynaCell datasets using the `compute_dynacell_metrics` command:

```sh
viscy compute_dynacell_metrics -c config.yaml
```

### Configuration File Format

Example configuration file:

```yaml
# Required parameters
target_database: /path/to/target_database.csv
pred_database: /path/to/prediction_database.csv
output_dir: ./metrics_output
method: intensity # Options: 'intensity' or 'segmentation2D'

# Optional parameters
target_channel: Organelle
pred_channel: Organelle
# Z-slice options:
# - Single integer (e.g., 16): Use specific z-slice
# - List of two integers [start, end] (e.g., [15, 17]): Use range of z-slices
# - -1: Use all available z-slices
target_z_slice: 16
pred_z_slice: 16
# You can also specify a range of z-slices:
# target_z_slice: [15, 17] # Use z-slices from 15 to 16 (exclusive of 17)
# pred_z_slice: [15, 17] # Use z-slices from 15 to 16 (exclusive of 17)
target_cell_types: [HEK293T] # or leave empty [] for all available
target_organelles: [HIST2H2BE]
target_infection_conditions: [Mock]
pred_cell_types: [HEK293T]
pred_organelles: [HIST2H2BE]
pred_infection_conditions: [Mock]
batch_size: 1
num_workers: 0
version: "1"
```

If cell types, organelles, or infection conditions are not specified or left empty, all available values from the respective database will be used.

Using a z-slice range (e.g., `[15, 17]`) can be particularly useful for computing metrics on multiple consecutive z-slices, which is beneficial for 3D analysis or when working with volumes where the structures of interest span multiple z-slices.
34 changes: 34 additions & 0 deletions examples/configs/dynacell_metrics_example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Example configuration for DynaCell metrics computation

# Required parameters
target_database: /path/to/target_database.csv
pred_database: /path/to/prediction_database.csv
output_dir: ./metrics_output
method: intensity # Options: 'intensity' or 'segmentation2D'

# Target dataset parameters
target_channel: Organelle
# Z-slice can be a single integer (e.g., 16) or a range specified as a list of two integers [start, end] (e.g., [15, 17])
target_z_slice: 16 # Use -1 for all slices, or a list like [15, 17] for a range
target_cell_types:
- HEK293T
target_organelles:
- HIST2H2BE
target_infection_conditions:
- Mock

# Prediction dataset parameters
pred_channel: Organelle
# Z-slice can be a single integer (e.g., 16) or a range specified as a list of two integers [start, end] (e.g., [15, 17])
pred_z_slice: 16 # Use -1 for all slices, or a list like [15, 17] for a range
pred_cell_types:
- HEK293T
pred_organelles:
- HIST2H2BE
pred_infection_conditions:
- Mock

# Processing parameters
batch_size: 1
num_workers: 0
version: "1"
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ metrics = [
"umap-learn",
"captum>=0.7.0",
"phate",
"panoptica",
]
examples = ["napari", "jupyter", "jupytext"]
visual = [
Expand Down Expand Up @@ -77,5 +78,9 @@ line-length = 88

[tool.ruff]
src = ["viscy", "tests"]
lint.extend-select = ["I001"]
lint.isort.known-first-party = ["viscy"]

[tool.ruff.lint]
extend-select = ["I001"]

[tool.ruff.lint.isort]
known-first-party = ["viscy"]
4 changes: 2 additions & 2 deletions tests/translation/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from lightning.pytorch.loggers import CSVLogger
from numpy.testing import assert_array_equal

from viscy.data.segmentation import SegmentationDataModule
from viscy.data.segmentation import TargetPredictionDataModule
from viscy.trainer import Trainer
from viscy.translation.evaluation import SegmentationMetrics2D


@pytest.mark.parametrize("pred_channel", ["DAPI", "GFP"])
def test_segmentation_metrics_2d(pred_channel, labels_hcs_dataset, tmp_path) -> None:
dm = SegmentationDataModule(
dm = TargetPredictionDataModule(
pred_dataset=labels_hcs_dataset,
target_dataset=labels_hcs_dataset,
target_channel="DAPI",
Expand Down
13 changes: 11 additions & 2 deletions viscy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
from datetime import datetime
from pathlib import Path

import torch
from jsonargparse import lazy_instance
Expand All @@ -22,6 +23,7 @@ def subcommands() -> dict[str, set[str]]:
subcommands["preprocess"] = subcommand_base_args
subcommands["export"] = subcommand_base_args
subcommands["precompute"] = subcommand_base_args
subcommands["compute_dynacell_metrics"] = subcommand_base_args
return subcommands

def add_arguments_to_parser(self, parser) -> None:
Expand Down Expand Up @@ -51,8 +53,15 @@ def main() -> None:
Set default random seed to 42.
"""
_setup_environment()
require_model = {"preprocess", "precompute"}.isdisjoint(sys.argv)
require_data = {"preprocess", "precompute", "export"}.isdisjoint(sys.argv)
require_model = {"preprocess", "precompute", "compute_dynacell_metrics"}.isdisjoint(
sys.argv
)
require_data = {
"preprocess",
"precompute",
"export",
"compute_dynacell_metrics",
}.isdisjoint(sys.argv)
_ = VisCyCLI(
model_class=LightningModule,
datamodule_class=LightningDataModule if require_data else None,
Expand Down
Loading
Loading