computational-cell-analytics · schilling40 · Sep 1, 2025 · Aug 18, 2025 · Aug 19, 2025 · Aug 20, 2025
diff --git a/flamingo_tools/extract_block_util.py b/flamingo_tools/extract_block_util.py
@@ -1,9 +1,10 @@
 import os
-from typing import Optional, List
+from typing import Optional, List, Union, Tuple
 
 import imageio.v3 as imageio
 import numpy as np
 import zarr
+from skimage.transform import rescale
 
 import flamingo_tools.s3_utils as s3_utils
 from flamingo_tools.file_utils import read_image_data
@@ -15,14 +16,15 @@ def extract_block(
     output_dir: Optional[str] = None,
     input_key: Optional[str] = None,
     output_key: Optional[str] = None,
-    resolution: float = 0.38,
+    resolution: Union[float, Tuple[float, float, float]] = 0.38,
     roi_halo: List[int] = [128, 128, 64],
     tif: bool = False,
     s3: Optional[bool] = False,
     s3_credentials: Optional[str] = None,
     s3_bucket_name: Optional[str] = None,
     s3_service_endpoint: Optional[str] = None,
-):
+    scale_factor: Optional[Tuple[float, float, float]] = None,
+) -> None:
     """Extract block around coordinate from input data according to a given halo.
     Either from a local file or from an S3 bucket.
 
@@ -38,11 +40,14 @@ def extract_block(
         s3_bucket_name: S3 bucket name.
         s3_service_endpoint: S3 service endpoint.
         s3_credentials: File path to credentials for S3 bucket.
+        scale_factor: Optional factor for rescaling the extracted data.
     """
-    coords = [int(round(c)) for c in coords]
-    coord_string = "-".join([str(c).zfill(4) for c in coords])
+    coord_string = "-".join([str(int(round(c))).zfill(4) for c in coords])
 
     # Dimensions are inversed to view in MoBIE (x y z) -> (z y x)
+    # Make sure the coords / roi_halo are not modified in-place.
+    coords = coords.copy()
+    roi_halo = roi_halo.copy()
     coords.reverse()
     roi_halo.reverse()
 
@@ -61,6 +66,7 @@ def extract_block(
 
     if output_dir == "":
         output_dir = input_dir
+    os.makedirs(output_dir, exist_ok=True)
 
     if tif:
         if output_key is None:
@@ -73,21 +79,32 @@ def extract_block(
         output_key = "raw" if output_key is None else output_key
         output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5")
 
-    coords = np.array(coords)
+    coords = np.array(coords).astype("float")
+    if not isinstance(resolution, float):
+        assert len(resolution) == 3
+        resolution = np.array(resolution)[::-1]
     coords = coords / resolution
     coords = np.round(coords).astype(np.int32)
 
     roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo))
 
     if s3:
-        input_path, fs = s3_utils.get_s3_path(input_path, bucket_name=s3_bucket_name,
-                                              service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
+        input_path, fs = s3_utils.get_s3_path(
+            input_path, bucket_name=s3_bucket_name,
+            service_endpoint=s3_service_endpoint, credential_file=s3_credentials
+        )
 
     data_ = read_image_data(input_path, input_key)
     data_roi = data_[roi]
+    if scale_factor is not None:
+        kwargs = {"preserve_range": True}
+        # Check if this is a segmentation.
+        if data_roi.dtype in (np.dtype("int32"), np.dtype("uint32"), np.dtype("int64"), np.dtype("uint64")):
+            kwargs.update({"order": 0, "anti_aliasing": False})
+        data_roi = rescale(data_roi, scale_factor, **kwargs).astype(data_roi.dtype)
 
     if tif:
         imageio.imwrite(output_file, data_roi, compression="zlib")
     else:
-        with zarr.open(output_file, mode="w") as f_out:
-            f_out.create_dataset(output_key, data=data_roi, compression="gzip")
+        f_out = zarr.open(output_file, mode="w")
+        f_out.create_dataset(output_key, data=data_roi, compression="gzip")
diff --git a/flamingo_tools/file_utils.py b/flamingo_tools/file_utils.py
@@ -85,6 +85,6 @@ def read_image_data(input_path: Union[str, Store], input_key: Optional[str]) ->
     elif isinstance(input_path, str):
         input_ = open_file(input_path, "r")[input_key]
     else:
-        with zarr.open(input_path, mode="r") as f:
-            input_ = f[input_key]
+        f = zarr.open(input_path, mode="r")
+        input_ = f[input_key]
     return input_
diff --git a/flamingo_tools/s3_utils.py b/flamingo_tools/s3_utils.py
@@ -1,7 +1,11 @@
 """This file contains utility functions for processing data located on an S3 storage.
 The upload of data to the storage system should be performed with 'rclone'.
 """
+import json
 import os
+import warnings
+from shutil import which
+from subprocess import run
 from typing import Optional, Tuple
 
 import s3fs
@@ -115,14 +119,23 @@ def get_s3_path(
         bucket_name, service_endpoint, credential_file
     )
 
-    s3_filesystem = create_s3_target(url=service_endpoint, anon=False, credential_file=credential_file)
+    zarr_major_version = int(zarr.__version__.split(".")[0])
+    s3_filesystem = create_s3_target(
+        url=service_endpoint, anon=False, credential_file=credential_file, asynchronous=zarr_major_version == 3,
+    )
 
     zarr_path = f"{bucket_name}/{input_path}"
 
-    if not s3_filesystem.exists(zarr_path):
+    if zarr_major_version == 2 and not s3_filesystem.exists(zarr_path):
         print(f"Error: S3 path {zarr_path} does not exist!")
 
-    s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
+    # The approach for opening a dataset from S3 differs in zarr v2 and zarr v3.
+    if zarr_major_version == 2:
+        s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
+    elif zarr_major_version == 3:
+        s3_path = zarr.storage.FsspecStore(fs=s3_filesystem, path=zarr_path)
+    else:
+        raise RuntimeError(f"Unsupported zarr version {zarr_major_version}")
 
     return s3_path, s3_filesystem
 
@@ -153,6 +166,7 @@ def create_s3_target(
     url: Optional[str] = None,
     anon: Optional[str] = False,
     credential_file: Optional[str] = None,
+    asynchronous: bool = False,
 ) -> s3fs.core.S3FileSystem:
     """Create file system for S3 bucket based on a service endpoint and an optional credential file.
     If the credential file is not provided, the s3fs.S3FileSystem function checks the environment variables
@@ -162,14 +176,110 @@ def create_s3_target(
         url: Service endpoint for S3 bucket
         anon: Option for anon argument of S3FileSystem
         credential_file: File path to credentials
+        asynchronous: Whether to open the file system in async mode.
 
     Returns:
         s3_filesystem
     """
     client_kwargs = {"endpoint_url": SERVICE_ENDPOINT if url is None else url}
     if credential_file is not None:
         key, secret = read_s3_credentials(credential_file)
-        s3_filesystem = s3fs.S3FileSystem(key=key, secret=secret, client_kwargs=client_kwargs)
+        s3_filesystem = s3fs.S3FileSystem(
+            key=key, secret=secret, client_kwargs=client_kwargs, asynchronous=asynchronous
+        )
     else:
-        s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs)
+        s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs, asynchronous=asynchronous)
     return s3_filesystem
+
+
+def _sync_rclone(local_dir, target):
+    # The rclone alias could also be exposed as parameter.
+    rclone_alias = "cochlea-lightsheet"
+    print("Sync", local_dir, "to", target)
+    run(["rclone", "--progress", "copyto", local_dir, f"{rclone_alias}:{target}"])
+
+
+def sync_dataset(
+    mobie_root: str,
+    dataset_name: str,
+    bucket_name: Optional[str] = None,
+    url: Optional[str] = None,
+    anon: Optional[str] = False,
+    credential_file: Optional[str] = None,
+    force_segmentation_update: bool = False,
+) -> None:
+    """Sync a MoBIE dataset on the s3 bucket using rclone.
+
+    Args:
+        mobie_root: The directory with the local mobie project.
+        dataset_name: The mobie dataset to sync.
+        bucket_name: The name of the dataset's bucket on s3.
+        url: Service endpoint for S3 bucket
+        anon: Option for anon argument of S3FileSystem
+        credential_file: File path to credentials
+        force_segmentation_update: Whether to force segmentation updates.
+    """
+    from mobie.metadata import add_remote_project_metadata
+
+    # Make sure that rclone is loaded.
+    if which("rclone") is None:
+        raise RuntimeError("rclone is required for synchronization. Try loading it via 'module load rclone'.")
+
+    # Make sure the dataset is in the local version of the dataset.
+    with open(os.path.join(mobie_root, "project.json")) as f:
+        project_metadata = json.load(f)
+    datasets = project_metadata["datasets"]
+    assert dataset_name in datasets
+
+    # Get s3 filsystem and bucket name.
+    s3 = create_s3_target(url, anon, credential_file)
+    if bucket_name is None:
+        bucket_name = BUCKET_NAME
+    if url is None:
+        url = SERVICE_ENDPOINT
+
+    # Add the required remote metadata to the project. Suppress warnings about missing local data.
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=UserWarning)
+        add_remote_project_metadata(mobie_root, bucket_name, url)
+
+    # Get the metadata from the S3 bucket.
+    project_metadata_path = os.path.join(bucket_name, "project.json")
+    with s3.open(project_metadata_path, "r") as f:
+        project_metadata = json.load(f)
+
+    # Check if the dataset is part of the remote project already.
+    local_ds_root = os.path.join(mobie_root, dataset_name)
+    remote_ds_root = os.path.join(bucket_name, dataset_name)
+    if dataset_name not in project_metadata["datasets"]:
+        print("The dataset is not yet synced. Will copy it over.")
+        _sync_rclone(os.path.join(mobie_root, "project.json"), project_metadata_path)
+        _sync_rclone(local_ds_root, remote_ds_root)
+        return
+
+    # Otherwise, check which sources are new and add them.
+    with open(os.path.join(mobie_root, dataset_name, "dataset.json")) as f:
+        local_dataset_metadata = json.load(f)
+
+    dataset_metadata_path = os.path.join(bucket_name, dataset_name, "dataset.json")
+    with s3.open(dataset_metadata_path, "r") as f:
+        remote_dataset_metadata = json.load(f)
+
+    for source_name, source_data in local_dataset_metadata["sources"].items():
+        source_type, source_data = next(iter(source_data.items()))
+        is_segmentation = source_type == "segmentation"
+        is_spots = source_type == "spots"
+        data_path = source_data["imageData"]["ome.zarr"]["relativePath"]
+        source_not_on_remote = (source_name not in remote_dataset_metadata["sources"])
+        # Only update the image data if the source is not updated or if we force updates for segmentations.
+        if source_not_on_remote or (is_segmentation and force_segmentation_update):
+            _sync_rclone(os.path.join(local_ds_root, data_path), os.path.join(remote_ds_root, data_path))
+        # We always sync the tables.
+        if is_segmentation or is_spots:
+            table_path = source_data["tableData"]["tsv"]["relativePath"]
+            _sync_rclone(os.path.join(local_ds_root, table_path), os.path.join(remote_ds_root, table_path))
+
+    # Sync the dataset metadata.
+    _sync_rclone(
+        os.path.join(mobie_root, dataset_name, "dataset.json"), os.path.join(remote_ds_root, "dataset.json")
+    )
diff --git a/reproducibility/label_components/IHC_gerbil.json b/reproducibility/label_components/IHC_gerbil.json
@@ -0,0 +1,8 @@
+[
+	{
+		"cochlea": "G_EK_000233_L",
+		"image_channel": "VGlut3",
+		"cell_type": "ihc",
+		"unet_version": "v5"
+	}
+]
diff --git a/reproducibility/label_components/IHC_v4c_fig2.json b/reproducibility/label_components/IHC_v4c_fig2.json
@@ -22,5 +22,11 @@
 		"image_channel": "VGlut3",
 		"cell_type": "ihc",
 		"unet_version": "v4c"
+	},
+	{
+		"cochlea": "G_EK_000233_L",
+		"image_channel": "VGlut3",
+		"cell_type": "ihc",
+		"unet_version": "v5"
 	}
 ]
diff --git a/reproducibility/label_components/repro_label_components.py b/reproducibility/label_components/repro_label_components.py
@@ -6,6 +6,7 @@
 import pandas as pd
 from flamingo_tools.s3_utils import get_s3_path
 from flamingo_tools.segmentation.postprocessing import label_components_sgn, label_components_ihc
+from flamingo_tools.segmentation.cochlea_mapping import tonotopic_mapping
 
 
 def repro_label_components(
@@ -14,6 +15,7 @@ def repro_label_components(
     s3_credentials: Optional[str] = None,
     s3_bucket_name: Optional[str] = None,
     s3_service_endpoint: Optional[str] = None,
+    apply_tonotopic_mapping: bool = False,
 ):
     min_size = 1000
     default_threshold_erode = None
@@ -23,7 +25,7 @@ def repro_label_components(
     default_cell_type = "sgn"
     default_component_list = [1]
 
-    with open(ddict, 'r') as myfile:
+    with open(ddict, "r") as myfile:
         data = myfile.read()
     param_dicts = json.loads(data)
 
@@ -39,11 +41,17 @@ def repro_label_components(
         cell_type = dic["cell_type"] if "cell_type" in dic else default_cell_type
         component_list = dic["component_list"] if "component_list" in dic else default_component_list
 
+        # The table name sometimes has to be over-written.
+        # table_name = "PV_SGN_V2_DA"
+        # table_name = "CR_SGN_v2"
+        # table_name = "Ntng1_SGN_v2"
+
         table_name = f"{cell_type.upper()}_{unet_version}"
+
         s3_path = os.path.join(f"{cochlea}", "tables", table_name, "default.tsv")
         tsv_path, fs = get_s3_path(s3_path, bucket_name=s3_bucket_name,
                                    service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
-        with fs.open(tsv_path, 'r') as f:
+        with fs.open(tsv_path, "r") as f:
             table = pd.read_csv(f, sep="\t")
 
         if cell_type == "sgn":
@@ -66,8 +74,12 @@ def repro_label_components(
         else:
             print(f"Custom component(s) have {largest_comp} {cell_type.upper()}s.")
 
+        if apply_tonotopic_mapping:
+            tsv_table = tonotopic_mapping(tsv_table, cell_type=cell_type)
+
         cochlea_str = "-".join(cochlea.split("_"))
         table_str = "-".join(table_name.split("_"))
+        os.makedirs(output_dir, exist_ok=True)
         out_path = os.path.join(output_dir, "_".join([cochlea_str, f"{table_str}.tsv"]))
 
         tsv_table.to_csv(out_path, sep="\t", index=False)
@@ -77,8 +89,9 @@ def main():
     parser = argparse.ArgumentParser(
         description="Script to label segmentation using a segmentation table and graph connected components.")
 
-    parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.")
-    parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.")
+    parser.add_argument("-i", "--input", type=str, required=True, help="Input JSON dictionary.")
+    parser.add_argument("-o", "--output", type=str, required=True, help="Output directory.")
+    parser.add_argument("-t", "--tonotopic_mapping", action="store_true", help="Also compute the tonotopic mapping.")
 
     parser.add_argument("--s3_credentials", type=str, default=None,
                         help="Input file containing S3 credentials. "
@@ -93,6 +106,7 @@ def main():
     repro_label_components(
         args.input, args.output,
         args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint,
+        apply_tonotopic_mapping=args.tonotopic_mapping,
     )
 
 

diff --git a/reproducibility/templates_processing/REAMDE.md b/reproducibility/templates_processing/REAMDE.md
@@ -12,7 +12,17 @@ For IHC segmentation run:
 - apply_unet_IHC_template.sbatch
 - segment_unet_IHC_template.sbatch
 
+After this, run the following to add segmentation to MoBIE, create component labelings and upload to S3:
+- templates_transfer/mobie_segmentation_template.sbatch
+- templates_transfer/sync_mobie.py
+- label_components/repro_label_components.py
+- templates_transfer/sync_mobie.py
+
 For ribbon synapse detection without associated IHC segmentation run
 - detect_synapse_template.sbatch
 For ribbon synapse detection with associated IHC segmentation run
 - detect_synapse_marker_template.sbatch
+
+After this, run the following to add detections to MoBIE and upload to S3:
+- templates_transfer/mobie_spots_template.sbatch
+- templates_transfer/sync_mobie.py