Skip to content

Commit 88af4e3

Browse files
Add option to cache background mask in object measures and add script for processing all ChReef cochleae
1 parent 15c4003 commit 88af4e3

File tree

5 files changed

+126
-1
lines changed

5 files changed

+126
-1
lines changed

flamingo_tools/measurements.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88
import pandas as pd
99
import trimesh
10+
from elf.io import open_file
1011
from elf.wrapper.resized_volume import ResizedVolume
1112
from nifty.tools import blocking
1213
from skimage.measure import marching_cubes, regionprops_table
@@ -261,6 +262,7 @@ def compute_object_measures_impl(
261262

262263
# For debugging.
263264
# measure_function(seg_ids[0])
265+
# breakpoint()
264266

265267
with futures.ThreadPoolExecutor(n_threads) as pool:
266268
measures = list(tqdm(
@@ -352,6 +354,7 @@ def compute_sgn_background_mask(
352354
threshold_percentile: float = 35.0,
353355
scale_factor: Tuple[int, int, int] = (16, 16, 16),
354356
n_threads: Optional[int] = None,
357+
cache_path: Optional[str] = None,
355358
) -> np.typing.ArrayLike:
356359
"""Compute the background mask for intensity measurements in the SGN segmentation.
357360
@@ -368,6 +371,7 @@ def compute_sgn_background_mask(
368371
threshold_percentile: The percentile threshold for separating foreground and background in the PV signal.
369372
scale_factor: The scale factor for internally downsampling the mask.
370373
n_threads: The number of threads for parallelizing the computation.
374+
cache_path: Optional path to save the downscaled background mask to zarr.
371375
372376
Returns:
373377
The mask for determining the background values.
@@ -376,6 +380,13 @@ def compute_sgn_background_mask(
376380
segmentation = read_image_data(segmentation_path, segmentation_key)
377381
assert image.shape == segmentation.shape
378382

383+
if cache_path is not None and os.path.exists(cache_path):
384+
with open_file(cache_path, "r") as f:
385+
if "mask" in f:
386+
low_res_mask = f["mask"][:]
387+
mask = ResizedVolume(low_res_mask, shape=image.shape, order=0)
388+
return mask
389+
379390
original_shape = image.shape
380391
downsampled_shape = tuple(int(np.round(sh / sf)) for sh, sf in zip(original_shape, scale_factor))
381392

@@ -406,10 +417,16 @@ def _compute_block(block_id):
406417
low_res_mask[bb] = this_mask
407418

408419
n_threads = mp.cpu_count() if n_threads is None else n_threads
420+
randomized_blocks = np.arange(0, n_blocks)
421+
np.random.shuffle(randomized_blocks)
409422
with futures.ThreadPoolExecutor(n_threads) as tp:
410423
list(tqdm(
411-
tp.map(_compute_block, range(n_blocks)), total=n_blocks, desc="Compute background mask"
424+
tp.map(_compute_block, randomized_blocks), total=n_blocks, desc="Compute background mask"
412425
))
413426

427+
if cache_path is not None:
428+
with open_file(cache_path, "a") as f:
429+
f.create_dataset("mask", data=low_res_mask, chunks=(64, 64, 64))
430+
414431
mask = ResizedVolume(low_res_mask, shape=original_shape, order=0)
415432
return mask
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[
2+
{
3+
"cochlea": "M_LR_000143_R",
4+
"image_channel": [
5+
"GFP"
6+
],
7+
"segmentation_channel": "SGN_v2",
8+
"background_mask": "yes",
9+
"component_list": [
10+
1
11+
]
12+
}
13+
]
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import json
2+
import os
3+
import subprocess
4+
import zarr
5+
6+
import flamingo_tools.s3_utils as s3_utils
7+
8+
OUTPUT_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/mobie_project/cochlea-lightsheet/tables/measurements/" # noqa
9+
JSON_ROOT = "/user/pape41/u12086/Work/my_projects/flamingo-tools/reproducibility/object_measures"
10+
COCHLEAE = [
11+
"M_LR_000143_L",
12+
"M_LR_000144_L",
13+
"M_LR_000145_L",
14+
"M_LR_000153_L",
15+
"M_LR_000155_L",
16+
"M_LR_000189_L",
17+
"M_LR_000143_R",
18+
"M_LR_000144_R",
19+
"M_LR_000145_R",
20+
"M_LR_000153_R",
21+
"M_LR_000155_R",
22+
"M_LR_000189_R",
23+
]
24+
25+
26+
def process_cochlea(cochlea, start_slurm):
27+
short_name = cochlea.replace("_", "").replace("0", "")
28+
29+
# Check if this cochlea has been processed already.
30+
output_name = cochlea.replace("_", "-")
31+
output_path = os.path.join(OUTPUT_ROOT, f"{output_name}_GFP_SGN-v2_object-measures.tsv")
32+
if os.path.exists(output_path):
33+
print(cochlea, "has been processed already.")
34+
return
35+
36+
# Check if the raw data for this cochlea is accessible.
37+
img_name = f"{cochlea}/images/ome-zarr/GFP.ome.zarr"
38+
img_path, _ = s3_utils.get_s3_path(img_name)
39+
try:
40+
zarr.open(img_path, mode="r")
41+
except Exception:
42+
print("The data for", cochlea, "at", img_name, "does not exist.")
43+
return
44+
45+
# Then generate the json file if it does not yet exist.
46+
template_path = os.path.join(JSON_ROOT, "ChReef_MLR143L.json")
47+
with open(template_path, "r") as f:
48+
json_template = json.load(f)
49+
50+
json_path = os.path.join(JSON_ROOT, f"ChReef_{short_name}.json")
51+
if not os.path.exists(json_path):
52+
print("Write json to", json_path)
53+
# TODO: We may need to replace the component list for some.
54+
json_template[0]["cochlea"] = cochlea
55+
with open(json_path, "w") as f:
56+
json.dump(json_template, f, indent=4)
57+
58+
print(cochlea, "is not yet processed")
59+
# Then start the slurm job.
60+
if not start_slurm:
61+
return
62+
63+
print("Submit slurm job for", cochlea)
64+
subprocess.run(["sbatch", "slurm_template.sbatch", json_path, OUTPUT_ROOT])
65+
66+
67+
def main():
68+
start_slurm = False
69+
for cochlea in COCHLEAE:
70+
process_cochlea(cochlea, start_slurm)
71+
72+
73+
if __name__ == "__main__":
74+
main()

reproducibility/object_measures/repro_object_measures.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import argparse
22
import json
33
import os
4+
from multiprocessing import cpu_count
45
from typing import Optional
56

67
import flamingo_tools.s3_utils as s3_utils
@@ -49,6 +50,7 @@ def repro_object_measures(
4950
seg_path, fs = s3_utils.get_s3_path(seg_s3, bucket_name=s3_bucket_name,
5051
service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
5152

53+
n_threads = int(os.environ.get("SLURM_CPUS_ON_NODE", cpu_count()))
5254
if os.path.isfile(output_table_path) and not force_overwrite:
5355
print(f"Skipping creation of {output_table_path}. File already exists.")
5456

@@ -62,11 +64,14 @@ def repro_object_measures(
6264
feature_set = "default_background_subtract"
6365
dilation = 4
6466
median_only = True
67+
mask_cache_path = os.path.join(output_dir, f"{cochlea_str}_{img_str}_{seg_str}_bg-mask.zarr")
6568
bg_mask = compute_sgn_background_mask(
6669
image_path=img_path,
6770
segmentation_path=seg_path,
6871
image_key=input_key,
6972
segmentation_key=input_key,
73+
n_threads=n_threads,
74+
cache_path=mask_cache_path,
7075
)
7176

7277
compute_object_measures(
@@ -82,6 +87,7 @@ def repro_object_measures(
8287
dilation=dilation,
8388
median_only=median_only,
8489
background_mask=bg_mask,
90+
n_threads=n_threads,
8591
)
8692

8793

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/bin/bash
2+
#SBATCH -t 24:00:00 # estimated time, adapt to your needs
3+
#SBATCH [email protected] # change this to your mailaddress
4+
#SBATCH --mail-type=FAIL # send mail when job begins and ends
5+
6+
#SBATCH -p standard96s:shared # the partition
7+
#SBATCH -A nim00007
8+
9+
#SBATCH -c 32
10+
#SBATCH --mem 256G
11+
12+
PYTHON=/scratch-grete/usr/nimcpape/software/mamba/envs/sam/bin/python
13+
SCRIPT=/mnt/vast-nhr/home/pape41/u12086/Work/my_projects/flamingo-tools/reproducibility/object_measures/repro_object_measures.py
14+
15+
$PYTHON $SCRIPT --input $1 --output $2

0 commit comments

Comments
 (0)