Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e3f24f5
Add analysis script for synapse subtypes
constantinpape Aug 18, 2025
49ec4e8
Add support for extracting data from zarr v3
constantinpape Aug 19, 2025
3dd2c3d
Add script for gerbil block extraction
constantinpape Aug 20, 2025
ec8b6f5
Minor update
constantinpape Aug 20, 2025
cadf297
Some plot updates
constantinpape Aug 20, 2025
bb4364e
Add and update some plots
constantinpape Aug 21, 2025
5540d08
More plot update
constantinpape Aug 21, 2025
7a5d742
Merge branch 'master' of https://github.com/computational-cell-analyt…
constantinpape Aug 21, 2025
80be0ee
More plot updates
constantinpape Aug 21, 2025
90edf75
Minor changes
constantinpape Aug 22, 2025
107186b
Merge branch 'syn-coloc-subtypes' of https://github.com/computational…
constantinpape Aug 22, 2025
527e636
Merge branch 'master' into syn-coloc-subtypes
constantinpape Aug 22, 2025
fd17888
Plot updates
constantinpape Aug 22, 2025
a33f265
Update processing and block extraction scripts
constantinpape Aug 23, 2025
0ebea67
Update LaVision processing
constantinpape Aug 25, 2025
2a7b66e
Update SGN subtype measurements
constantinpape Aug 25, 2025
06ac936
Update SGN subtype analysis
constantinpape Aug 25, 2025
71b46ce
Update subtype analysis
constantinpape Aug 25, 2025
ed02fe3
Update La-Vision processing
constantinpape Aug 26, 2025
0b65b19
Add analysis script for LaVision data
constantinpape Aug 26, 2025
336d874
Add new IHC extraction
constantinpape Aug 26, 2025
bbc922a
Merge branch 'syn-coloc-subtypes' of https://github.com/computational…
constantinpape Aug 26, 2025
7921317
Update sgn subtype analysis
constantinpape Aug 27, 2025
98c6db4
Further updates to processing scripts; add function to sync mobie dat…
constantinpape Sep 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions flamingo_tools/extract_block_util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
from typing import Optional, List
from typing import Optional, List, Union, Tuple

import imageio.v3 as imageio
import numpy as np
import zarr
from skimage.transform import rescale

import flamingo_tools.s3_utils as s3_utils
from flamingo_tools.file_utils import read_image_data
Expand All @@ -15,14 +16,15 @@ def extract_block(
output_dir: Optional[str] = None,
input_key: Optional[str] = None,
output_key: Optional[str] = None,
resolution: float = 0.38,
resolution: Union[float, Tuple[float, float, float]] = 0.38,
roi_halo: List[int] = [128, 128, 64],
tif: bool = False,
s3: Optional[bool] = False,
s3_credentials: Optional[str] = None,
s3_bucket_name: Optional[str] = None,
s3_service_endpoint: Optional[str] = None,
):
scale_factor: Optional[Tuple[float, float, float]] = None,
) -> None:
"""Extract block around coordinate from input data according to a given halo.
Either from a local file or from an S3 bucket.

Expand All @@ -38,11 +40,14 @@ def extract_block(
s3_bucket_name: S3 bucket name.
s3_service_endpoint: S3 service endpoint.
s3_credentials: File path to credentials for S3 bucket.
scale_factor: Optional factor for rescaling the extracted data.
"""
coords = [int(round(c)) for c in coords]
coord_string = "-".join([str(c).zfill(4) for c in coords])
coord_string = "-".join([str(int(round(c))).zfill(4) for c in coords])

# Dimensions are inversed to view in MoBIE (x y z) -> (z y x)
# Make sure the coords / roi_halo are not modified in-place.
coords = coords.copy()
roi_halo = roi_halo.copy()
coords.reverse()
roi_halo.reverse()

Expand All @@ -61,6 +66,7 @@ def extract_block(

if output_dir == "":
output_dir = input_dir
os.makedirs(output_dir, exist_ok=True)

if tif:
if output_key is None:
Expand All @@ -73,21 +79,32 @@ def extract_block(
output_key = "raw" if output_key is None else output_key
output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5")

coords = np.array(coords)
coords = np.array(coords).astype("float")
if not isinstance(resolution, float):
assert len(resolution) == 3
resolution = np.array(resolution)[::-1]
coords = coords / resolution
coords = np.round(coords).astype(np.int32)

roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo))

if s3:
input_path, fs = s3_utils.get_s3_path(input_path, bucket_name=s3_bucket_name,
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
input_path, fs = s3_utils.get_s3_path(
input_path, bucket_name=s3_bucket_name,
service_endpoint=s3_service_endpoint, credential_file=s3_credentials
)

data_ = read_image_data(input_path, input_key)
data_roi = data_[roi]
if scale_factor is not None:
kwargs = {"preserve_range": True}
# Check if this is a segmentation.
if data_roi.dtype in (np.dtype("int32"), np.dtype("uint32"), np.dtype("int64"), np.dtype("uint64")):
kwargs.update({"order": 0, "anti_aliasing": False})
data_roi = rescale(data_roi, scale_factor, **kwargs).astype(data_roi.dtype)

if tif:
imageio.imwrite(output_file, data_roi, compression="zlib")
else:
with zarr.open(output_file, mode="w") as f_out:
f_out.create_dataset(output_key, data=data_roi, compression="gzip")
f_out = zarr.open(output_file, mode="w")
f_out.create_dataset(output_key, data=data_roi, compression="gzip")
4 changes: 2 additions & 2 deletions flamingo_tools/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,6 @@ def read_image_data(input_path: Union[str, Store], input_key: Optional[str]) ->
elif isinstance(input_path, str):
input_ = open_file(input_path, "r")[input_key]
else:
with zarr.open(input_path, mode="r") as f:
input_ = f[input_key]
f = zarr.open(input_path, mode="r")
input_ = f[input_key]
return input_
120 changes: 115 additions & 5 deletions flamingo_tools/s3_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"""This file contains utility functions for processing data located on an S3 storage.
The upload of data to the storage system should be performed with 'rclone'.
"""
import json
import os
import warnings
from shutil import which
from subprocess import run
from typing import Optional, Tuple

import s3fs
Expand Down Expand Up @@ -115,14 +119,23 @@ def get_s3_path(
bucket_name, service_endpoint, credential_file
)

s3_filesystem = create_s3_target(url=service_endpoint, anon=False, credential_file=credential_file)
zarr_major_version = int(zarr.__version__.split(".")[0])
s3_filesystem = create_s3_target(
url=service_endpoint, anon=False, credential_file=credential_file, asynchronous=zarr_major_version == 3,
)

zarr_path = f"{bucket_name}/{input_path}"

if not s3_filesystem.exists(zarr_path):
if zarr_major_version == 2 and not s3_filesystem.exists(zarr_path):
print(f"Error: S3 path {zarr_path} does not exist!")

s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
# The approach for opening a dataset from S3 differs in zarr v2 and zarr v3.
if zarr_major_version == 2:
s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
elif zarr_major_version == 3:
s3_path = zarr.storage.FsspecStore(fs=s3_filesystem, path=zarr_path)
else:
raise RuntimeError(f"Unsupported zarr version {zarr_major_version}")

return s3_path, s3_filesystem

Expand Down Expand Up @@ -153,6 +166,7 @@ def create_s3_target(
url: Optional[str] = None,
anon: Optional[str] = False,
credential_file: Optional[str] = None,
asynchronous: bool = False,
) -> s3fs.core.S3FileSystem:
"""Create file system for S3 bucket based on a service endpoint and an optional credential file.
If the credential file is not provided, the s3fs.S3FileSystem function checks the environment variables
Expand All @@ -162,14 +176,110 @@ def create_s3_target(
url: Service endpoint for S3 bucket
anon: Option for anon argument of S3FileSystem
credential_file: File path to credentials
asynchronous: Whether to open the file system in async mode.

Returns:
s3_filesystem
"""
client_kwargs = {"endpoint_url": SERVICE_ENDPOINT if url is None else url}
if credential_file is not None:
key, secret = read_s3_credentials(credential_file)
s3_filesystem = s3fs.S3FileSystem(key=key, secret=secret, client_kwargs=client_kwargs)
s3_filesystem = s3fs.S3FileSystem(
key=key, secret=secret, client_kwargs=client_kwargs, asynchronous=asynchronous
)
else:
s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs)
s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs, asynchronous=asynchronous)
return s3_filesystem


def _sync_rclone(local_dir, target):
# The rclone alias could also be exposed as parameter.
rclone_alias = "cochlea-lightsheet"
print("Sync", local_dir, "to", target)
run(["rclone", "--progress", "copyto", local_dir, f"{rclone_alias}:{target}"])


def sync_dataset(
mobie_root: str,
dataset_name: str,
bucket_name: Optional[str] = None,
url: Optional[str] = None,
anon: Optional[str] = False,
credential_file: Optional[str] = None,
force_segmentation_update: bool = False,
) -> None:
"""Sync a MoBIE dataset on the s3 bucket using rclone.

Args:
mobie_root: The directory with the local mobie project.
dataset_name: The mobie dataset to sync.
bucket_name: The name of the dataset's bucket on s3.
url: Service endpoint for S3 bucket
anon: Option for anon argument of S3FileSystem
credential_file: File path to credentials
force_segmentation_update: Whether to force segmentation updates.
"""
from mobie.metadata import add_remote_project_metadata

# Make sure that rclone is loaded.
if which("rclone") is None:
raise RuntimeError("rclone is required for synchronization. Try loading it via 'module load rclone'.")

# Make sure the dataset is in the local version of the dataset.
with open(os.path.join(mobie_root, "project.json")) as f:
project_metadata = json.load(f)
datasets = project_metadata["datasets"]
assert dataset_name in datasets

# Get s3 filsystem and bucket name.
s3 = create_s3_target(url, anon, credential_file)
if bucket_name is None:
bucket_name = BUCKET_NAME
if url is None:
url = SERVICE_ENDPOINT

# Add the required remote metadata to the project. Suppress warnings about missing local data.
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
add_remote_project_metadata(mobie_root, bucket_name, url)

# Get the metadata from the S3 bucket.
project_metadata_path = os.path.join(bucket_name, "project.json")
with s3.open(project_metadata_path, "r") as f:
project_metadata = json.load(f)

# Check if the dataset is part of the remote project already.
local_ds_root = os.path.join(mobie_root, dataset_name)
remote_ds_root = os.path.join(bucket_name, dataset_name)
if dataset_name not in project_metadata["datasets"]:
print("The dataset is not yet synced. Will copy it over.")
_sync_rclone(os.path.join(mobie_root, "project.json"), project_metadata_path)
_sync_rclone(local_ds_root, remote_ds_root)
return

# Otherwise, check which sources are new and add them.
with open(os.path.join(mobie_root, dataset_name, "dataset.json")) as f:
local_dataset_metadata = json.load(f)

dataset_metadata_path = os.path.join(bucket_name, dataset_name, "dataset.json")
with s3.open(dataset_metadata_path, "r") as f:
remote_dataset_metadata = json.load(f)

for source_name, source_data in local_dataset_metadata["sources"].items():
source_type, source_data = next(iter(source_data.items()))
is_segmentation = source_type == "segmentation"
is_spots = source_type == "spots"
data_path = source_data["imageData"]["ome.zarr"]["relativePath"]
source_not_on_remote = (source_name not in remote_dataset_metadata["sources"])
# Only update the image data if the source is not updated or if we force updates for segmentations.
if source_not_on_remote or (is_segmentation and force_segmentation_update):
_sync_rclone(os.path.join(local_ds_root, data_path), os.path.join(remote_ds_root, data_path))
# We always sync the tables.
if is_segmentation or is_spots:
table_path = source_data["tableData"]["tsv"]["relativePath"]
_sync_rclone(os.path.join(local_ds_root, table_path), os.path.join(remote_ds_root, table_path))

# Sync the dataset metadata.
_sync_rclone(
os.path.join(mobie_root, dataset_name, "dataset.json"), os.path.join(remote_ds_root, "dataset.json")
)
8 changes: 8 additions & 0 deletions reproducibility/label_components/IHC_gerbil.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[
{
"cochlea": "G_EK_000233_L",
"image_channel": "VGlut3",
"cell_type": "ihc",
"unet_version": "v5"
}
]
6 changes: 6 additions & 0 deletions reproducibility/label_components/IHC_v4c_fig2.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,11 @@
"image_channel": "VGlut3",
"cell_type": "ihc",
"unet_version": "v4c"
},
{
"cochlea": "G_EK_000233_L",
"image_channel": "VGlut3",
"cell_type": "ihc",
"unet_version": "v5"
}
]
22 changes: 18 additions & 4 deletions reproducibility/label_components/repro_label_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
from flamingo_tools.s3_utils import get_s3_path
from flamingo_tools.segmentation.postprocessing import label_components_sgn, label_components_ihc
from flamingo_tools.segmentation.cochlea_mapping import tonotopic_mapping


def repro_label_components(
Expand All @@ -14,6 +15,7 @@ def repro_label_components(
s3_credentials: Optional[str] = None,
s3_bucket_name: Optional[str] = None,
s3_service_endpoint: Optional[str] = None,
apply_tonotopic_mapping: bool = False,
):
min_size = 1000
default_threshold_erode = None
Expand All @@ -23,7 +25,7 @@ def repro_label_components(
default_cell_type = "sgn"
default_component_list = [1]

with open(ddict, 'r') as myfile:
with open(ddict, "r") as myfile:
data = myfile.read()
param_dicts = json.loads(data)

Expand All @@ -39,11 +41,17 @@ def repro_label_components(
cell_type = dic["cell_type"] if "cell_type" in dic else default_cell_type
component_list = dic["component_list"] if "component_list" in dic else default_component_list

# The table name sometimes has to be over-written.
# table_name = "PV_SGN_V2_DA"
# table_name = "CR_SGN_v2"
# table_name = "Ntng1_SGN_v2"

table_name = f"{cell_type.upper()}_{unet_version}"

s3_path = os.path.join(f"{cochlea}", "tables", table_name, "default.tsv")
tsv_path, fs = get_s3_path(s3_path, bucket_name=s3_bucket_name,
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
with fs.open(tsv_path, 'r') as f:
with fs.open(tsv_path, "r") as f:
table = pd.read_csv(f, sep="\t")

if cell_type == "sgn":
Expand All @@ -66,8 +74,12 @@ def repro_label_components(
else:
print(f"Custom component(s) have {largest_comp} {cell_type.upper()}s.")

if apply_tonotopic_mapping:
tsv_table = tonotopic_mapping(tsv_table, cell_type=cell_type)

cochlea_str = "-".join(cochlea.split("_"))
table_str = "-".join(table_name.split("_"))
os.makedirs(output_dir, exist_ok=True)
out_path = os.path.join(output_dir, "_".join([cochlea_str, f"{table_str}.tsv"]))

tsv_table.to_csv(out_path, sep="\t", index=False)
Expand All @@ -77,8 +89,9 @@ def main():
parser = argparse.ArgumentParser(
description="Script to label segmentation using a segmentation table and graph connected components.")

parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.")
parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.")
parser.add_argument("-i", "--input", type=str, required=True, help="Input JSON dictionary.")
parser.add_argument("-o", "--output", type=str, required=True, help="Output directory.")
parser.add_argument("-t", "--tonotopic_mapping", action="store_true", help="Also compute the tonotopic mapping.")

parser.add_argument("--s3_credentials", type=str, default=None,
help="Input file containing S3 credentials. "
Expand All @@ -93,6 +106,7 @@ def main():
repro_label_components(
args.input, args.output,
args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint,
apply_tonotopic_mapping=args.tonotopic_mapping,
)


Expand Down
10 changes: 10 additions & 0 deletions reproducibility/templates_processing/REAMDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,17 @@ For IHC segmentation run:
- apply_unet_IHC_template.sbatch
- segment_unet_IHC_template.sbatch

After this, run the following to add segmentation to MoBIE, create component labelings and upload to S3:
- templates_transfer/mobie_segmentation_template.sbatch
- templates_transfer/sync_mobie.py
- label_components/repro_label_components.py
- templates_transfer/sync_mobie.py

For ribbon synapse detection without associated IHC segmentation run
- detect_synapse_template.sbatch
For ribbon synapse detection with associated IHC segmentation run
- detect_synapse_marker_template.sbatch

After this, run the following to add detections to MoBIE and upload to S3:
- templates_transfer/mobie_spots_template.sbatch
- templates_transfer/sync_mobie.py
Loading
Loading