Skip to content

Commit cb554bc

Browse files
authored
Merge pull request #62 from computational-cell-analytics/syn-coloc-subtypes
The branch includes updates to plots, LaVision processing, SGN subtype analysis, and synchronisation of MoBIE data. It is functional and should be merged before any additional changes are made.
2 parents ede96b9 + 98c6db4 commit cb554bc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1977
-171
lines changed

flamingo_tools/extract_block_util.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
2-
from typing import Optional, List
2+
from typing import Optional, List, Union, Tuple
33

44
import imageio.v3 as imageio
55
import numpy as np
66
import zarr
7+
from skimage.transform import rescale
78

89
import flamingo_tools.s3_utils as s3_utils
910
from flamingo_tools.file_utils import read_image_data
@@ -15,14 +16,15 @@ def extract_block(
1516
output_dir: Optional[str] = None,
1617
input_key: Optional[str] = None,
1718
output_key: Optional[str] = None,
18-
resolution: float = 0.38,
19+
resolution: Union[float, Tuple[float, float, float]] = 0.38,
1920
roi_halo: List[int] = [128, 128, 64],
2021
tif: bool = False,
2122
s3: Optional[bool] = False,
2223
s3_credentials: Optional[str] = None,
2324
s3_bucket_name: Optional[str] = None,
2425
s3_service_endpoint: Optional[str] = None,
25-
):
26+
scale_factor: Optional[Tuple[float, float, float]] = None,
27+
) -> None:
2628
"""Extract block around coordinate from input data according to a given halo.
2729
Either from a local file or from an S3 bucket.
2830
@@ -38,11 +40,14 @@ def extract_block(
3840
s3_bucket_name: S3 bucket name.
3941
s3_service_endpoint: S3 service endpoint.
4042
s3_credentials: File path to credentials for S3 bucket.
43+
scale_factor: Optional factor for rescaling the extracted data.
4144
"""
42-
coords = [int(round(c)) for c in coords]
43-
coord_string = "-".join([str(c).zfill(4) for c in coords])
45+
coord_string = "-".join([str(int(round(c))).zfill(4) for c in coords])
4446

4547
# Dimensions are inversed to view in MoBIE (x y z) -> (z y x)
48+
# Make sure the coords / roi_halo are not modified in-place.
49+
coords = coords.copy()
50+
roi_halo = roi_halo.copy()
4651
coords.reverse()
4752
roi_halo.reverse()
4853

@@ -61,6 +66,7 @@ def extract_block(
6166

6267
if output_dir == "":
6368
output_dir = input_dir
69+
os.makedirs(output_dir, exist_ok=True)
6470

6571
if tif:
6672
if output_key is None:
@@ -73,21 +79,32 @@ def extract_block(
7379
output_key = "raw" if output_key is None else output_key
7480
output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5")
7581

76-
coords = np.array(coords)
82+
coords = np.array(coords).astype("float")
83+
if not isinstance(resolution, float):
84+
assert len(resolution) == 3
85+
resolution = np.array(resolution)[::-1]
7786
coords = coords / resolution
7887
coords = np.round(coords).astype(np.int32)
7988

8089
roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo))
8190

8291
if s3:
83-
input_path, fs = s3_utils.get_s3_path(input_path, bucket_name=s3_bucket_name,
84-
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
92+
input_path, fs = s3_utils.get_s3_path(
93+
input_path, bucket_name=s3_bucket_name,
94+
service_endpoint=s3_service_endpoint, credential_file=s3_credentials
95+
)
8596

8697
data_ = read_image_data(input_path, input_key)
8798
data_roi = data_[roi]
99+
if scale_factor is not None:
100+
kwargs = {"preserve_range": True}
101+
# Check if this is a segmentation.
102+
if data_roi.dtype in (np.dtype("int32"), np.dtype("uint32"), np.dtype("int64"), np.dtype("uint64")):
103+
kwargs.update({"order": 0, "anti_aliasing": False})
104+
data_roi = rescale(data_roi, scale_factor, **kwargs).astype(data_roi.dtype)
88105

89106
if tif:
90107
imageio.imwrite(output_file, data_roi, compression="zlib")
91108
else:
92-
with zarr.open(output_file, mode="w") as f_out:
93-
f_out.create_dataset(output_key, data=data_roi, compression="gzip")
109+
f_out = zarr.open(output_file, mode="w")
110+
f_out.create_dataset(output_key, data=data_roi, compression="gzip")

flamingo_tools/file_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,6 @@ def read_image_data(input_path: Union[str, Store], input_key: Optional[str]) ->
8585
elif isinstance(input_path, str):
8686
input_ = open_file(input_path, "r")[input_key]
8787
else:
88-
with zarr.open(input_path, mode="r") as f:
89-
input_ = f[input_key]
88+
f = zarr.open(input_path, mode="r")
89+
input_ = f[input_key]
9090
return input_

flamingo_tools/s3_utils.py

Lines changed: 115 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
"""This file contains utility functions for processing data located on an S3 storage.
22
The upload of data to the storage system should be performed with 'rclone'.
33
"""
4+
import json
45
import os
6+
import warnings
7+
from shutil import which
8+
from subprocess import run
59
from typing import Optional, Tuple
610

711
import s3fs
@@ -115,14 +119,23 @@ def get_s3_path(
115119
bucket_name, service_endpoint, credential_file
116120
)
117121

118-
s3_filesystem = create_s3_target(url=service_endpoint, anon=False, credential_file=credential_file)
122+
zarr_major_version = int(zarr.__version__.split(".")[0])
123+
s3_filesystem = create_s3_target(
124+
url=service_endpoint, anon=False, credential_file=credential_file, asynchronous=zarr_major_version == 3,
125+
)
119126

120127
zarr_path = f"{bucket_name}/{input_path}"
121128

122-
if not s3_filesystem.exists(zarr_path):
129+
if zarr_major_version == 2 and not s3_filesystem.exists(zarr_path):
123130
print(f"Error: S3 path {zarr_path} does not exist!")
124131

125-
s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
132+
# The approach for opening a dataset from S3 differs in zarr v2 and zarr v3.
133+
if zarr_major_version == 2:
134+
s3_path = zarr.storage.FSStore(zarr_path, fs=s3_filesystem)
135+
elif zarr_major_version == 3:
136+
s3_path = zarr.storage.FsspecStore(fs=s3_filesystem, path=zarr_path)
137+
else:
138+
raise RuntimeError(f"Unsupported zarr version {zarr_major_version}")
126139

127140
return s3_path, s3_filesystem
128141

@@ -153,6 +166,7 @@ def create_s3_target(
153166
url: Optional[str] = None,
154167
anon: Optional[str] = False,
155168
credential_file: Optional[str] = None,
169+
asynchronous: bool = False,
156170
) -> s3fs.core.S3FileSystem:
157171
"""Create file system for S3 bucket based on a service endpoint and an optional credential file.
158172
If the credential file is not provided, the s3fs.S3FileSystem function checks the environment variables
@@ -162,14 +176,110 @@ def create_s3_target(
162176
url: Service endpoint for S3 bucket
163177
anon: Option for anon argument of S3FileSystem
164178
credential_file: File path to credentials
179+
asynchronous: Whether to open the file system in async mode.
165180
166181
Returns:
167182
s3_filesystem
168183
"""
169184
client_kwargs = {"endpoint_url": SERVICE_ENDPOINT if url is None else url}
170185
if credential_file is not None:
171186
key, secret = read_s3_credentials(credential_file)
172-
s3_filesystem = s3fs.S3FileSystem(key=key, secret=secret, client_kwargs=client_kwargs)
187+
s3_filesystem = s3fs.S3FileSystem(
188+
key=key, secret=secret, client_kwargs=client_kwargs, asynchronous=asynchronous
189+
)
173190
else:
174-
s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs)
191+
s3_filesystem = s3fs.S3FileSystem(anon=anon, client_kwargs=client_kwargs, asynchronous=asynchronous)
175192
return s3_filesystem
193+
194+
195+
def _sync_rclone(local_dir, target):
196+
# The rclone alias could also be exposed as parameter.
197+
rclone_alias = "cochlea-lightsheet"
198+
print("Sync", local_dir, "to", target)
199+
run(["rclone", "--progress", "copyto", local_dir, f"{rclone_alias}:{target}"])
200+
201+
202+
def sync_dataset(
203+
mobie_root: str,
204+
dataset_name: str,
205+
bucket_name: Optional[str] = None,
206+
url: Optional[str] = None,
207+
anon: Optional[str] = False,
208+
credential_file: Optional[str] = None,
209+
force_segmentation_update: bool = False,
210+
) -> None:
211+
"""Sync a MoBIE dataset on the s3 bucket using rclone.
212+
213+
Args:
214+
mobie_root: The directory with the local mobie project.
215+
dataset_name: The mobie dataset to sync.
216+
bucket_name: The name of the dataset's bucket on s3.
217+
url: Service endpoint for S3 bucket
218+
anon: Option for anon argument of S3FileSystem
219+
credential_file: File path to credentials
220+
force_segmentation_update: Whether to force segmentation updates.
221+
"""
222+
from mobie.metadata import add_remote_project_metadata
223+
224+
# Make sure that rclone is loaded.
225+
if which("rclone") is None:
226+
raise RuntimeError("rclone is required for synchronization. Try loading it via 'module load rclone'.")
227+
228+
# Make sure the dataset is in the local version of the dataset.
229+
with open(os.path.join(mobie_root, "project.json")) as f:
230+
project_metadata = json.load(f)
231+
datasets = project_metadata["datasets"]
232+
assert dataset_name in datasets
233+
234+
# Get s3 filsystem and bucket name.
235+
s3 = create_s3_target(url, anon, credential_file)
236+
if bucket_name is None:
237+
bucket_name = BUCKET_NAME
238+
if url is None:
239+
url = SERVICE_ENDPOINT
240+
241+
# Add the required remote metadata to the project. Suppress warnings about missing local data.
242+
with warnings.catch_warnings():
243+
warnings.filterwarnings("ignore", category=UserWarning)
244+
add_remote_project_metadata(mobie_root, bucket_name, url)
245+
246+
# Get the metadata from the S3 bucket.
247+
project_metadata_path = os.path.join(bucket_name, "project.json")
248+
with s3.open(project_metadata_path, "r") as f:
249+
project_metadata = json.load(f)
250+
251+
# Check if the dataset is part of the remote project already.
252+
local_ds_root = os.path.join(mobie_root, dataset_name)
253+
remote_ds_root = os.path.join(bucket_name, dataset_name)
254+
if dataset_name not in project_metadata["datasets"]:
255+
print("The dataset is not yet synced. Will copy it over.")
256+
_sync_rclone(os.path.join(mobie_root, "project.json"), project_metadata_path)
257+
_sync_rclone(local_ds_root, remote_ds_root)
258+
return
259+
260+
# Otherwise, check which sources are new and add them.
261+
with open(os.path.join(mobie_root, dataset_name, "dataset.json")) as f:
262+
local_dataset_metadata = json.load(f)
263+
264+
dataset_metadata_path = os.path.join(bucket_name, dataset_name, "dataset.json")
265+
with s3.open(dataset_metadata_path, "r") as f:
266+
remote_dataset_metadata = json.load(f)
267+
268+
for source_name, source_data in local_dataset_metadata["sources"].items():
269+
source_type, source_data = next(iter(source_data.items()))
270+
is_segmentation = source_type == "segmentation"
271+
is_spots = source_type == "spots"
272+
data_path = source_data["imageData"]["ome.zarr"]["relativePath"]
273+
source_not_on_remote = (source_name not in remote_dataset_metadata["sources"])
274+
# Only update the image data if the source is not updated or if we force updates for segmentations.
275+
if source_not_on_remote or (is_segmentation and force_segmentation_update):
276+
_sync_rclone(os.path.join(local_ds_root, data_path), os.path.join(remote_ds_root, data_path))
277+
# We always sync the tables.
278+
if is_segmentation or is_spots:
279+
table_path = source_data["tableData"]["tsv"]["relativePath"]
280+
_sync_rclone(os.path.join(local_ds_root, table_path), os.path.join(remote_ds_root, table_path))
281+
282+
# Sync the dataset metadata.
283+
_sync_rclone(
284+
os.path.join(mobie_root, dataset_name, "dataset.json"), os.path.join(remote_ds_root, "dataset.json")
285+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{
3+
"cochlea": "G_EK_000233_L",
4+
"image_channel": "VGlut3",
5+
"cell_type": "ihc",
6+
"unet_version": "v5"
7+
}
8+
]

reproducibility/label_components/IHC_v4c_fig2.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,11 @@
2222
"image_channel": "VGlut3",
2323
"cell_type": "ihc",
2424
"unet_version": "v4c"
25+
},
26+
{
27+
"cochlea": "G_EK_000233_L",
28+
"image_channel": "VGlut3",
29+
"cell_type": "ihc",
30+
"unet_version": "v5"
2531
}
2632
]

reproducibility/label_components/repro_label_components.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import pandas as pd
77
from flamingo_tools.s3_utils import get_s3_path
88
from flamingo_tools.segmentation.postprocessing import label_components_sgn, label_components_ihc
9+
from flamingo_tools.segmentation.cochlea_mapping import tonotopic_mapping
910

1011

1112
def repro_label_components(
@@ -14,6 +15,7 @@ def repro_label_components(
1415
s3_credentials: Optional[str] = None,
1516
s3_bucket_name: Optional[str] = None,
1617
s3_service_endpoint: Optional[str] = None,
18+
apply_tonotopic_mapping: bool = False,
1719
):
1820
min_size = 1000
1921
default_threshold_erode = None
@@ -23,7 +25,7 @@ def repro_label_components(
2325
default_cell_type = "sgn"
2426
default_component_list = [1]
2527

26-
with open(ddict, 'r') as myfile:
28+
with open(ddict, "r") as myfile:
2729
data = myfile.read()
2830
param_dicts = json.loads(data)
2931

@@ -39,11 +41,17 @@ def repro_label_components(
3941
cell_type = dic["cell_type"] if "cell_type" in dic else default_cell_type
4042
component_list = dic["component_list"] if "component_list" in dic else default_component_list
4143

44+
# The table name sometimes has to be over-written.
45+
# table_name = "PV_SGN_V2_DA"
46+
# table_name = "CR_SGN_v2"
47+
# table_name = "Ntng1_SGN_v2"
48+
4249
table_name = f"{cell_type.upper()}_{unet_version}"
50+
4351
s3_path = os.path.join(f"{cochlea}", "tables", table_name, "default.tsv")
4452
tsv_path, fs = get_s3_path(s3_path, bucket_name=s3_bucket_name,
4553
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
46-
with fs.open(tsv_path, 'r') as f:
54+
with fs.open(tsv_path, "r") as f:
4755
table = pd.read_csv(f, sep="\t")
4856

4957
if cell_type == "sgn":
@@ -66,8 +74,12 @@ def repro_label_components(
6674
else:
6775
print(f"Custom component(s) have {largest_comp} {cell_type.upper()}s.")
6876

77+
if apply_tonotopic_mapping:
78+
tsv_table = tonotopic_mapping(tsv_table, cell_type=cell_type)
79+
6980
cochlea_str = "-".join(cochlea.split("_"))
7081
table_str = "-".join(table_name.split("_"))
82+
os.makedirs(output_dir, exist_ok=True)
7183
out_path = os.path.join(output_dir, "_".join([cochlea_str, f"{table_str}.tsv"]))
7284

7385
tsv_table.to_csv(out_path, sep="\t", index=False)
@@ -77,8 +89,9 @@ def main():
7789
parser = argparse.ArgumentParser(
7890
description="Script to label segmentation using a segmentation table and graph connected components.")
7991

80-
parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.")
81-
parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.")
92+
parser.add_argument("-i", "--input", type=str, required=True, help="Input JSON dictionary.")
93+
parser.add_argument("-o", "--output", type=str, required=True, help="Output directory.")
94+
parser.add_argument("-t", "--tonotopic_mapping", action="store_true", help="Also compute the tonotopic mapping.")
8295

8396
parser.add_argument("--s3_credentials", type=str, default=None,
8497
help="Input file containing S3 credentials. "
@@ -93,6 +106,7 @@ def main():
93106
repro_label_components(
94107
args.input, args.output,
95108
args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint,
109+
apply_tonotopic_mapping=args.tonotopic_mapping,
96110
)
97111

98112

reproducibility/templates_processing/REAMDE.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,17 @@ For IHC segmentation run:
1212
- apply_unet_IHC_template.sbatch
1313
- segment_unet_IHC_template.sbatch
1414

15+
After this, run the following to add segmentation to MoBIE, create component labelings and upload to S3:
16+
- templates_transfer/mobie_segmentation_template.sbatch
17+
- templates_transfer/sync_mobie.py
18+
- label_components/repro_label_components.py
19+
- templates_transfer/sync_mobie.py
20+
1521
For ribbon synapse detection without associated IHC segmentation run
1622
- detect_synapse_template.sbatch
1723
For ribbon synapse detection with associated IHC segmentation run
1824
- detect_synapse_marker_template.sbatch
25+
26+
After this, run the following to add detections to MoBIE and upload to S3:
27+
- templates_transfer/mobie_spots_template.sbatch
28+
- templates_transfer/sync_mobie.py

0 commit comments

Comments
 (0)