diff --git a/flamingo_tools/extract_block_util.py b/flamingo_tools/extract_block_util.py new file mode 100644 index 0000000..09851da --- /dev/null +++ b/flamingo_tools/extract_block_util.py @@ -0,0 +1,93 @@ +import os +from typing import Optional, List + +import imageio.v3 as imageio +import numpy as np +import zarr + +import flamingo_tools.s3_utils as s3_utils +from flamingo_tools.file_utils import read_image_data + + +def extract_block( + input_path: str, + coords: List[int], + output_dir: Optional[str] = None, + input_key: Optional[str] = None, + output_key: Optional[str] = None, + resolution: float = 0.38, + roi_halo: List[int] = [128, 128, 64], + tif: bool = False, + s3: Optional[bool] = False, + s3_credentials: Optional[str] = None, + s3_bucket_name: Optional[str] = None, + s3_service_endpoint: Optional[str] = None, +): + """Extract block around coordinate from input data according to a given halo. + Either from a local file or from an S3 bucket. + + Args: + input_path: Input folder in n5 / ome-zarr format. + coords: Center coordinates of extracted 3D volume. + output_dir: Output directory for saving output as _crop.n5. Default: input directory. + input_key: Input key for data in input file. + output_key: Output key for data in n5 output or used as suffix for tif output. + roi_halo: ROI halo of extracted 3D volume. + tif: Flag for tif output + s3: Flag for considering input_path for S3 bucket. + s3_bucket_name: S3 bucket name. + s3_service_endpoint: S3 service endpoint. + s3_credentials: File path to credentials for S3 bucket. + """ + coords = [int(round(c)) for c in coords] + coord_string = "-".join([str(c).zfill(4) for c in coords]) + + # Dimensions are inversed to view in MoBIE (x y z) -> (z y x) + coords.reverse() + roi_halo.reverse() + + input_content = list(filter(None, input_path.split("/"))) + + if s3: + image_name = input_content[-1].split(".")[0] + image_prefix = image_name + basename = input_content[0] + else: + basename = "".join(input_content[-1].split(".")[:-1]) + image_prefix = basename.split("_")[-1] + + input_dir = input_path.split(basename)[0] + input_dir = os.path.abspath(input_dir) + + if output_dir == "": + output_dir = input_dir + + if tif: + if output_key is None: + output_name = basename + "_crop_" + coord_string + "_" + image_prefix + ".tif" + else: + output_name = basename + "_" + image_prefix + "_crop_" + coord_string + "_" + output_key + ".tif" + + output_file = os.path.join(output_dir, output_name) + else: + output_key = "raw" if output_key is None else output_key + output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5") + + coords = np.array(coords) + coords = coords / resolution + coords = np.round(coords).astype(np.int32) + + roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo)) + + if s3: + input_path, fs = s3_utils.get_s3_path(input_path, bucket_name=s3_bucket_name, + service_endpoint=s3_service_endpoint, credential_file=s3_credentials) + + data_ = read_image_data(input_path, input_key) + data_roi = data_[roi] + + if tif: + imageio.imwrite(output_file, data_roi, compression="zlib") + else: + with zarr.open(output_file, mode="w") as f_out: + f_out.create_dataset(output_key, data=data_roi, compression="gzip") diff --git a/reproducibility/README.md b/reproducibility/README.md new file mode 100644 index 0000000..c8f9b1c --- /dev/null +++ b/reproducibility/README.md @@ -0,0 +1,23 @@ +# Scripts to ensure/facilitate the reproducbility of pre-/post-processing + +The scripts within these folders document steps involved in the pre-/post-processing or analysis of data. +The folders contain a script for reproducibility as well as dictionaries in JSON format, which contain script parameters and additional information. + +## Extraction of blocks from a 3D volume + +The extraction of blocks from a 3D volume is required for the creation of annotations, empty crops, or others regions of interest. + +Usage: +``` +python repro_block_extraction.py --input --output +``` + +## Post-processing of SGN segmentation + +The post-processing of the SGN segmentation may involve the erosion of the segmentation to exclude artifacts, the variation of the minimal number of nodes within a component, or the minimal distance between nodes to consider them the same component. + +Usage: + ``` +python repro_postprocess_sgn_v1.py --input --output +``` + diff --git a/reproducibility/block_extraction/2025-05-SGN_annotated_crops.json b/reproducibility/block_extraction/2025-05-SGN_annotated_crops.json new file mode 100644 index 0000000..e7e9e79 --- /dev/null +++ b/reproducibility/block_extraction/2025-05-SGN_annotated_crops.json @@ -0,0 +1,147 @@ +[ + { + "cochlea": "M_LR_000144_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1040, + 700, + 1390 + ], + [ + 1300, + 690, + 1191 + ], + [ + 1455, + 814, + 1191 + ] + ], + "halo_size": [ + 128, + 128, + 32 + ] + }, + { + "cochlea": "M_LR_000145_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 972, + 862, + 610 + ], + [ + 1301, + 812, + 740 + ] + ], + "halo_size": [ + 128, + 128, + 32 + ] + }, + { + "cochlea": "M_LR_000167_R", + "image_channel": "PV", + "crop_centers": [ + [ + 909, + 813, + 810 + ], + [ + 916, + 800, + 722 + ], + [ + 951, + 742, + 652 + ], + [ + 951, + 742, + 602 + ], + [ + 1251, + 266, + 575 + ], + [ + 1210, + 148, + 606 + ], + [ + 1305, + 341, + 526 + ], + [ + 1137, + 669, + 1044 + ] + ], + "halo_size": [ + 128, + 128, + 32 + ] + }, + { + "cochlea": "M_LR_000151_R", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1275, + 806, + 578 + ], + [ + 875, + 539, + 747 + ], + [ + 1300, + 535, + 747 + ] + ], + "halo_size": [ + 128, + 128, + 32 + ] + }, + { + "cochlea": "M_LR_000184_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1326, + 844, + 516 + ], + [ + 885, + 913, + 1023 + ] + ], + "halo_size": [ + 128, + 128, + 32 + ] + } +] \ No newline at end of file diff --git a/reproducibility/block_extraction/2025-05-SGN_domain_crops.json b/reproducibility/block_extraction/2025-05-SGN_domain_crops.json new file mode 100644 index 0000000..27fa856 --- /dev/null +++ b/reproducibility/block_extraction/2025-05-SGN_domain_crops.json @@ -0,0 +1,102 @@ +[ + { + "cochlea": "M_LR_000144_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1490, + 600, + 735 + ], + [ + 1540, + 930, + 865 + ] + ], + "halo_size": [ + 256, + 256, + 128 + ] + }, + { + "cochlea": "M_LR_000145_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1300, + 700, + 700 + ] + ], + "halo_size": [ + 256, + 256, + 128 + ] + }, + { + "cochlea": "M_LR_000151_R", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1350, + 690, + 1000 + ], + [ + 1350, + 570, + 734 + ] + ], + "halo_size": [ + 256, + 256, + 128 + ] + }, + { + "cochlea": "M_LR_000155_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 900, + 1090, + 700 + ], + [ + 1464, + 830, + 420 + ] + ], + "halo_size": [ + 256, + 256, + 128 + ] + }, + { + "cochlea": "M_LR_000167_R", + "image_channel": "PV", + "crop_centers": [ + [ + 1360, + 1040, + 840 + ], + [ + 900, + 780, + 780 + ] + ], + "halo_size": [ + 256, + 256, + 128 + ] + } +] \ No newline at end of file diff --git a/reproducibility/block_extraction/2025-05-SGN_empty_crops_vicinity_cochlea.json b/reproducibility/block_extraction/2025-05-SGN_empty_crops_vicinity_cochlea.json new file mode 100644 index 0000000..46e9db5 --- /dev/null +++ b/reproducibility/block_extraction/2025-05-SGN_empty_crops_vicinity_cochlea.json @@ -0,0 +1,70 @@ +[ + { + "cochlea": "M_AMD_000058_L", + "image_channel": "PV", + "crop_centers": [ + [ + 200, + 660, + 180 + ], + [ + 270, + 370, + 70 + ], + [ + 160, + 180, + 70 + ] + ], + "halo_size": [ + 128, + 128, + 64 + ] + }, + { + "cochlea": "M_LR_000145_L", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 503, + 970, + 1220 + ], + [ + 572, + 482, + 1135 + ], + [ + 352, + 759, + 1100 + ] + ], + "halo_size": [ + 128, + 128, + 64 + ] + }, + { + "cochlea": "M_LR_000151_R", + "image_channel": "PV_resized", + "crop_centers": [ + [ + 1600, + 1070, + 660 + ] + ], + "halo_size": [ + 128, + 128, + 64 + ] + } +] \ No newline at end of file diff --git a/reproducibility/block_extraction/repro_block_extraction.py b/reproducibility/block_extraction/repro_block_extraction.py new file mode 100644 index 0000000..5b1e2f2 --- /dev/null +++ b/reproducibility/block_extraction/repro_block_extraction.py @@ -0,0 +1,60 @@ +import argparse +import json +import os +from typing import Optional + +from flamingo_tools.extract_block_util import extract_block + + +def repro_block_extraction( + ddict: dict, + output_dir: str, + s3_credentials: Optional[str] = None, + s3_bucket_name: Optional[str] = None, + s3_service_endpoint: Optional[str] = None, +): + s3_flag = True + tif_flag = True + input_key = "s0" + + with open(ddict, 'r') as myfile: + data = myfile.read() + param_dicts = json.loads(data) + + for dic in param_dicts: + if "image_channel" in dic: + input_path = os.path.join(dic["cochlea"], "images", "ome-zarr", dic["image_channel"] + ".ome.zarr") + roi_halo = dic["halo_size"] + crop_centers = dic["crop_centers"] + for coord in crop_centers: + extract_block(input_path, coord, output_dir, input_key=input_key, roi_halo=roi_halo, tif=tif_flag, + s3=s3_flag, s3_credentials=s3_credentials, s3_bucket_name=s3_bucket_name, + s3_service_endpoint=s3_service_endpoint) + + +def main(): + parser = argparse.ArgumentParser( + description="Script to extract region of interest (ROI) block around center coordinate.") + + parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.") + parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.") + + parser.add_argument("--s3_credentials", type=str, default=None, + help="Input file containing S3 credentials. " + "Optional if AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY were exported.") + parser.add_argument("--s3_bucket_name", type=str, default=None, + help="S3 bucket name. Optional if BUCKET_NAME was exported.") + parser.add_argument("--s3_service_endpoint", type=str, default=None, + help="S3 service endpoint. Optional if SERVICE_ENDPOINT was exported.") + + args = parser.parse_args() + + repro_block_extraction( + args.input, args.output, + args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint, + ) + + +if __name__ == "__main__": + + main() diff --git a/reproducibility/postprocess_sgn/SGN_v1_postprocess.json b/reproducibility/postprocess_sgn/SGN_v1_postprocess.json new file mode 100644 index 0000000..9a21835 --- /dev/null +++ b/reproducibility/postprocess_sgn/SGN_v1_postprocess.json @@ -0,0 +1,47 @@ +[ + { + "cochlea": "M_AMD_000058_L", + "image_channel": "PV", + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000167_R", + "image_channel": "PV", + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000144_L", + "image_channel": "PV_resized", + "min_edge_distance": 70, + "iterations_erode": 1, + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000145_L", + "image_channel": "PV_resized", + "threshold_erode": 90, + "iterations_erode": 1, + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000151_R", + "image_channel": "PV_resized", + "threshold_erode": 45, + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000155_L", + "image_channel": "PV_resized", + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000167_R", + "image_channel": "PV_resized", + "unet_version": "v1" + }, + { + "cochlea": "M_LR_000184_L", + "image_channel": "PV_resized", + "unet_version": "v1" + } +] \ No newline at end of file diff --git a/reproducibility/postprocess_sgn/repro_postprocess_sgn_v1.py b/reproducibility/postprocess_sgn/repro_postprocess_sgn_v1.py new file mode 100644 index 0000000..ee44f77 --- /dev/null +++ b/reproducibility/postprocess_sgn/repro_postprocess_sgn_v1.py @@ -0,0 +1,86 @@ +import argparse +import json +import os +from typing import Optional + +import pandas as pd +from flamingo_tools.s3_utils import get_s3_path +from flamingo_tools.segmentation.postprocessing import postprocess_sgn_seg + + +def repro_postprocess_sgn_v1( + ddict: dict, + output_dir: str, + s3_credentials: Optional[str] = None, + s3_bucket_name: Optional[str] = None, + s3_service_endpoint: Optional[str] = None, +): + min_size = 1000 + default_threshold_erode = None + default_min_length = 50 + default_min_edge_distance = 30 + default_iterations_erode = None + + with open(ddict, 'r') as myfile: + data = myfile.read() + param_dicts = json.loads(data) + + for dic in param_dicts[2:4]: + cochlea = dic["cochlea"] + print(f"Creating components for {cochlea}.") + suffix = dic["suffix"] + tsv_path, fs = get_s3_path(dic["s3_path"], bucket_name=s3_bucket_name, + service_endpoint=s3_service_endpoint, credential_file=s3_credentials) + with fs.open(tsv_path, 'r') as f: + table = pd.read_csv(f, sep="\t") + + threshold_erode = dic["threshold_erode"] if "threshold_erode" in dic else default_threshold_erode + min_component_length = dic["min_component_length"] if "min_component_length" in dic else default_min_length + min_edge_distance = dic["min_edge_distance"] if "min_edge_distance" in dic else default_min_edge_distance + iterations_erode = dic["iterations_erode"] if "iterations_erode" in dic else default_iterations_erode + + print("threshold_erode", threshold_erode) + print("min_component_length", min_component_length) + print("min_edge", min_edge_distance) + print("iterations_erode", iterations_erode) + + tsv_table = postprocess_sgn_seg(table, min_size=min_size, + threshold_erode=threshold_erode, + min_component_length=min_component_length, + min_edge_distance=min_edge_distance, + iterations_erode=iterations_erode) + + largest_comp = len(tsv_table[tsv_table["component_labels"] == 1]) + print(f"Largest component has {largest_comp} SGNs.") + + out_path = os.path.join(output_dir, "".join([cochlea, suffix, ".tsv"])) + + tsv_table.to_csv(out_path, sep="\t", index=False) + + +def main(): + parser = argparse.ArgumentParser( + description="Script to extract region of interest (ROI) block around center coordinate.") + + parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.") + parser.add_argument('-o', "--output", type=str, required=True, help="Output directory.") + + parser.add_argument("--s3_credentials", type=str, default=None, + help="Input file containing S3 credentials. " + "Optional if AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY were exported.") + parser.add_argument("--s3_bucket_name", type=str, default=None, + help="S3 bucket name. Optional if BUCKET_NAME was exported.") + parser.add_argument("--s3_service_endpoint", type=str, default=None, + help="S3 service endpoint. Optional if SERVICE_ENDPOINT was exported.") + + args = parser.parse_args() + + repro_postprocess_sgn_v1( + args.input, args.output, + args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint, + ) + + +if __name__ == "__main__": + + main() diff --git a/scripts/extract_block.py b/scripts/extract_block.py index df1e5c5..4f4af72 100644 --- a/scripts/extract_block.py +++ b/scripts/extract_block.py @@ -2,108 +2,11 @@ """ import argparse import json -import os -from typing import Optional, List -import imageio.v3 as imageio -import numpy as np -import zarr +from flamingo_tools.extract_block_util import extract_block -import flamingo_tools.s3_utils as s3_utils -from flamingo_tools.file_utils import read_image_data - - -def main( - input_path: str, - coords: List[int], - output_dir: Optional[str] = None, - input_key: Optional[str] = None, - output_key: Optional[str] = None, - resolution: float = 0.38, - roi_halo: List[int] = [128, 128, 64], - tif: bool = False, - s3: Optional[bool] = False, - s3_credentials: Optional[str] = None, - s3_bucket_name: Optional[str] = None, - s3_service_endpoint: Optional[str] = None, -): - """Extract block around coordinate from input data according to a given halo. - Either from a local file or from an S3 bucket. - - Args: - input_path: Input folder in n5 / ome-zarr format. - coords: Center coordinates of extracted 3D volume. - output_dir: Output directory for saving output as _crop.n5. Default: input directory. - input_key: Input key for data in input file. - output_key: Output key for data in n5 output or used as suffix for tif output. - roi_halo: ROI halo of extracted 3D volume. - tif: Flag for tif output - s3: Flag for considering input_path for S3 bucket. - s3_bucket_name: S3 bucket name. - s3_service_endpoint: S3 service endpoint. - s3_credentials: File path to credentials for S3 bucket. - """ - coords = [int(round(c)) for c in coords] - coord_string = "-".join([str(c).zfill(4) for c in coords]) - - # Dimensions are inversed to view in MoBIE (x y z) -> (z y x) - coords.reverse() - roi_halo.reverse() - - input_content = list(filter(None, input_path.split("/"))) - - if s3: - image_name = input_content[-1].split(".")[0] - if len(image_name.split("_")) > 1: - resized_suffix = "_resized" - image_prefix = image_name.split("_")[0] - else: - resized_suffix = "" - image_prefix = image_name - basename = input_content[0] + resized_suffix - else: - basename = "".join(input_content[-1].split(".")[:-1]) - image_prefix = basename.split("_")[-1] - - input_dir = input_path.split(basename)[0] - input_dir = os.path.abspath(input_dir) - - if output_dir == "": - output_dir = input_dir - - if tif: - if output_key is None: - output_name = basename + "_crop_" + coord_string + "_" + image_prefix + ".tif" - else: - output_name = basename + "_" + image_prefix + "_crop_" + coord_string + "_" + output_key + ".tif" - - output_file = os.path.join(output_dir, output_name) - else: - output_key = "raw" if output_key is None else output_key - output_file = os.path.join(output_dir, basename + "_crop_" + coord_string + ".n5") - - coords = np.array(coords) - coords = coords / resolution - coords = np.round(coords).astype(np.int32) - - roi = tuple(slice(co - rh, co + rh) for co, rh in zip(coords, roi_halo)) - - if s3: - input_path, fs = s3_utils.get_s3_path(input_path, bucket_name=s3_bucket_name, - service_endpoint=s3_service_endpoint, credential_file=s3_credentials) - - data_ = read_image_data(input_path, input_key) - data_roi = data_[roi] - - if tif: - imageio.imwrite(output_file, data_roi, compression="zlib") - else: - with zarr.open(output_file, mode="w") as f_out: - f_out.create_dataset(output_key, data=data_roi, compression="gzip") - - -if __name__ == "__main__": +def main(): parser = argparse.ArgumentParser( description="Script to extract region of interest (ROI) block around center coordinate.") @@ -135,7 +38,12 @@ def main( coords = json.loads(args.coord) roi_halo = json.loads(args.roi_halo) - main( + extract_block( args.input, coords, args.output, args.input_key, args.output_key, args.resolution, roi_halo, args.tif, args.s3, args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint, ) + + +if __name__ == "__main__": + + main()