Skip to content

Commit 4566e99

Browse files
Update SGN subtype scripts
1 parent 4473d9d commit 4566e99

File tree

3 files changed

+82
-15
lines changed

3 files changed

+82
-15
lines changed

flamingo_tools/file_utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import zarr
88
from elf.io import open_file
99

10+
from .s3_utils import get_s3_path
11+
1012
try:
1113
from zarr.abc.store import Store
1214
except ImportError:
@@ -67,7 +69,9 @@ def read_tif(file_path: str) -> Union[np.ndarray, np.memmap]:
6769
return x
6870

6971

70-
def read_image_data(input_path: Union[str, Store], input_key: Optional[str]) -> np.typing.ArrayLike:
72+
def read_image_data(
73+
input_path: Union[str, Store], input_key: Optional[str], from_s3: bool = False
74+
) -> np.typing.ArrayLike:
7175
"""Read flamingo image data, stored in various formats.
7276
7377
Args:
@@ -76,10 +80,16 @@ def read_image_data(input_path: Union[str, Store], input_key: Optional[str]) ->
7680
Access via S3 is only supported for a zarr container.
7781
input_key: The key (= internal path) for a zarr or n5 container.
7882
Set it to None if the data is stored in a tif file.
83+
from_s3: Whether to read the data from S3.
7984
8085
Returns:
8186
The data, loaded either as a numpy mem-map, a numpy array, or a zarr / n5 array.
8287
"""
88+
if from_s3:
89+
assert input_key is not None
90+
s3_store, fs = get_s3_path(input_path)
91+
return zarr.open(s3_store, mode="r")[input_key]
92+
8393
if input_key is None:
8494
input_ = read_tif(input_path)
8595
elif isinstance(input_path, str):

flamingo_tools/measurements.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import warnings
44
from concurrent import futures
55
from functools import partial
6-
from typing import List, Optional, Tuple
6+
from typing import List, Optional, Tuple, Union
77

88
import numpy as np
99
import pandas as pd
@@ -60,10 +60,13 @@ def _get_bounding_box_and_center(table, seg_id, resolution, shape, dilation):
6060
for bmin, bmax, sh in zip(bb_min, bb_max, shape)
6161
)
6262

63+
if isinstance(resolution, float):
64+
resolution = (resolution,) * 3
65+
6366
center = (
64-
int(row.anchor_z.item() / resolution),
65-
int(row.anchor_y.item() / resolution),
66-
int(row.anchor_x.item() / resolution),
67+
int(row.anchor_z.item() / resolution[0]),
68+
int(row.anchor_y.item() / resolution[1]),
69+
int(row.anchor_x.item() / resolution[2]),
6770
)
6871

6972
return bb, center
@@ -307,7 +310,7 @@ def compute_object_measures(
307310
image_key: Optional[str] = None,
308311
segmentation_key: Optional[str] = None,
309312
n_threads: Optional[int] = None,
310-
resolution: float = 0.38,
313+
resolution: Union[float, Tuple[float, ...]] = 0.38,
311314
force: bool = False,
312315
feature_set: str = "default",
313316
s3_flag: bool = False,
@@ -359,8 +362,8 @@ def compute_object_measures(
359362
table = table[table["component_labels"].isin(component_list)]
360363

361364
# Then, open the volumes.
362-
image = read_image_data(image_path, image_key)
363-
segmentation = read_image_data(segmentation_path, segmentation_key)
365+
image = read_image_data(image_path, image_key, from_s3=s3_flag)
366+
segmentation = read_image_data(segmentation_path, segmentation_key, from_s3=s3_flag)
364367

365368
measures = compute_object_measures_impl(
366369
image, segmentation, n_threads, resolution, table=table, feature_set=feature_set,

scripts/measurements/sgn_subtypes.py

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,17 +173,17 @@ def require_missing_tables(missing_tables):
173173
img_s3 = f"{cochlea}/images/ome-zarr/{channel}.ome.zarr"
174174
seg_s3 = f"{cochlea}/images/ome-zarr/{seg_name}.ome.zarr"
175175
seg_table_s3 = f"{cochlea}/tables/{seg_name}/default.tsv"
176-
img_path, _ = get_s3_path(img_s3)
177-
seg_path, _ = get_s3_path(seg_s3)
176+
# img_path, _ = get_s3_path(img_s3)
177+
# seg_path, _ = get_s3_path(seg_s3)
178178

179179
output_folder = os.path.join(output_root, cochlea)
180180
os.makedirs(output_folder, exist_ok=True)
181181
output_table_path = os.path.join(
182182
output_folder, f"{channel}_{seg_name.replace('_', '-')}_object-measures.tsv"
183183
)
184184
compute_object_measures(
185-
image_path=img_path,
186-
segmentation_path=seg_path,
185+
image_path=img_s3,
186+
segmentation_path=seg_s3,
187187
segmentation_table_path=seg_table_s3,
188188
output_table_path=output_table_path,
189189
image_key="s0",
@@ -215,6 +215,7 @@ def compile_data_for_subtype_analysis():
215215
assert "CR" in channels
216216
reference_channel = "CR"
217217
seg_name = "CR_SGN_v2"
218+
print(cochlea)
218219

219220
content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8")
220221
info = json.loads(content.read())
@@ -537,16 +538,69 @@ def analyze_subtype_data_regular(show_plots=True):
537538
combined_analysis(results, show_plots=show_plots)
538539

539540

541+
def export_for_annotation():
542+
files = sorted(glob("./subtype_analysis/*.tsv"))
543+
out_folder = "./subtype_analysis/for_mobie_annotation"
544+
os.makedirs(out_folder, exist_ok=True)
545+
546+
all_thresholds = {}
547+
for ff in files:
548+
cochlea = os.path.basename(ff)[:-len("_subtype_analysis.tsv")]
549+
if cochlea not in REGULAR_COCHLEAE:
550+
continue
551+
552+
print(cochlea)
553+
channels = COCHLEAE_FOR_SUBTYPES[cochlea]
554+
reference_channel = "PV"
555+
assert channels[0] == reference_channel
556+
tab = pd.read_csv(ff, sep="\t")
557+
558+
tab_for_export = {"label_id": tab.label_id.values}
559+
classification = []
560+
thresholds = {}
561+
562+
for chan in channels[1:]:
563+
data = tab[f"{chan}_ratio_PV"].values
564+
tab_for_export[f"{chan}_ratio_PV"] = data
565+
threshold = threshold_otsu(data)
566+
thresholds[chan] = threshold
567+
568+
c0, c1 = f"{chan}-", f"{chan}+"
569+
classification.append([c0 if datum < threshold else c1 for datum in data])
570+
571+
all_thresholds[cochlea] = thresholds
572+
573+
if len(classification) == 2:
574+
cls1, cls2 = classification
575+
classification = [f"{c1} / {c2}" for c1, c2 in zip(cls1, cls2)]
576+
else:
577+
classification = classification[0]
578+
classification = [stain_to_type(cls) for cls in classification]
579+
classification = [f"{stype} ({stain})" for stype, stain in classification]
580+
581+
tab_for_export["classification"] = classification
582+
tab_for_export = pd.DataFrame(tab_for_export)
583+
tab_for_export.to_csv(os.path.join(out_folder, f"{cochlea}.tsv"), sep="\t", index=False)
584+
585+
with open(os.path.join(out_folder, "thresholds.json"), "w") as f:
586+
json.dump(all_thresholds, f, indent=2, sort_keys=True)
587+
588+
540589
# More TODO:
541590
# > It's good to see that for the N mice the Ntng1C and Lypd1 separate from CR so well on the thresholds.
542591
# Can I visualize these samples ones segmentation masks are done to verify the Ntng1C thresholds?
543592
# As this is a quite clear signal I'm not sure if taking the middle of the histogram would be the best choice.
544593
# The segmentations are in MoBIE already. I need to send you the tables for analyzing the signals. Will send them later.
545594
def main():
546-
# missing_tables = check_processing_status()
547-
# require_missing_tables(missing_tables)
548-
# compile_data_for_subtype_analysis()
595+
# These scripts are for computing the intensity tables etc.
596+
missing_tables = check_processing_status()
597+
require_missing_tables(missing_tables)
598+
compile_data_for_subtype_analysis()
599+
600+
# This script is for exporting the tables for annotation in MoBIE.
601+
export_for_annotation()
549602

603+
# This script is for running the analysis and creating the plots.
550604
analyze_subtype_data_regular(show_plots=False)
551605

552606
# TODO

0 commit comments

Comments
 (0)