diff --git a/flamingo_tools/validation.py b/flamingo_tools/validation.py index 87494ad..ff393ba 100644 --- a/flamingo_tools/validation.py +++ b/flamingo_tools/validation.py @@ -43,6 +43,8 @@ def fetch_data_for_evaluation( seg_name: str = "SGN_v2", z_extent: int = 0, components_for_postprocessing: Optional[List[int]] = None, + cochlea: Optional[str] = None, + extra_data: Optional[str] = None, ) -> Tuple[np.ndarray, pd.DataFrame]: """Fetch segmentation from S3 matching the annotation path for evaluation. @@ -53,6 +55,8 @@ def fetch_data_for_evaluation( z_extent: Additional z-slices to load from the segmentation. components_for_postprocessing: The component ids for restricting the segmentation to. Choose [1] for the default componentn containing the helix. + cochlea: Optional name of the cochlea. + extra_data: Extra data to fetch. Returns: The segmentation downloaded from the S3 bucket. @@ -60,13 +64,11 @@ def fetch_data_for_evaluation( """ # Load the annotations and normalize them for the given z-extent. annotations = pd.read_csv(annotation_path) - annotations = annotations.drop(columns="index") + if "index" in annotations.columns: + annotations = annotations.drop(columns="index") if z_extent == 0: # If we don't have a z-extent then we just drop the first axis and rename the other two. annotations = annotations.drop(columns="axis-0") annotations = annotations.rename(columns={"axis-1": "axis-0", "axis-2": "axis-1"}) - else: # Otherwise we have to center the first axis. - # TODO - raise NotImplementedError # Load the segmentaiton from cache path if it is given and if it is already cached. if cache_path is not None and os.path.exists(cache_path): @@ -74,7 +76,10 @@ def fetch_data_for_evaluation( return segmentation, annotations # Parse which ID and which cochlea from the name. - cochlea, slice_id = _parse_annotation_path(annotation_path) + if cochlea is None: + cochlea, slice_id = _parse_annotation_path(annotation_path) + else: + _, slice_id = _parse_annotation_path(annotation_path) # Open the S3 connection, get the path to the SGN segmentation in S3. internal_path = os.path.join(cochlea, "images", "ome-zarr", f"{seg_name}.ome.zarr") @@ -111,6 +116,14 @@ def fetch_data_for_evaluation( if cache_path is not None: imageio.imwrite(cache_path, segmentation, compression="zlib") + if extra_data is not None: + internal_path = os.path.join(cochlea, "images", "ome-zarr", f"{extra_data}.ome.zarr") + s3_store, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT) + input_key = "s0" + with zarr.open(s3_store, mode="r") as f: + extra_im_data = f[input_key][roi] + return segmentation, annotations, extra_im_data + return segmentation, annotations @@ -347,6 +360,62 @@ def union(a, b): return consensus_df, unmatched_df +def match_detections( + detections: np.ndarray, + annotations: np.ndarray, + max_dist: float +): + """One-to-one matching between 3-D detections and ground-truth points. + + Args: + detections: N x 3 candidate detections. + annotations: M x 3 ground-truth annotations for the reference points. + max_dist: Maximum Euclidean distance allowed for a match. + + Returns: + Indices in `detections` that were matched (true positives). + Indices in `annotations` that were matched (true positives). + Unmatched detection indices (false positives). + Unmatched annotation indices (false negatives). + """ + det = np.asarray(detections, dtype=float) + ann = np.asarray(annotations, dtype=float) + N, M = len(det), len(ann) + + # trivial corner cases -------------------------------------------------------- + if N == 0: + return np.empty(0, int), np.empty(0, int), np.empty(0, int), np.arange(M) + if M == 0: + return np.empty(0, int), np.empty(0, int), np.arange(N), np.empty(0, int) + + # 1. build sparse radius-filtered distance matrix ----------------------------- + tree_det = cKDTree(det) + tree_ann = cKDTree(ann) + coo = tree_det.sparse_distance_matrix(tree_ann, max_dist, output_type="coo_matrix") + + if coo.nnz == 0: # nothing is close enough + return np.empty(0, int), np.empty(0, int), np.arange(N), np.arange(M) + + cost = np.full((N, M), 5 * max_dist, dtype=float) + cost[coo.row, coo.col] = coo.data # fill only existing edges + + # 2. optimal one-to-one assignment (Hungarian) -------------------------------- + row_ind, col_ind = linear_sum_assignment(cost) + + # Filter assignments that were padded with +∞ cost for non-existent edges + # (linear_sum_assignment automatically does that padding internally). + valid_mask = cost[row_ind, col_ind] <= max_dist + tp_det_ids = row_ind[valid_mask] + tp_ann_ids = col_ind[valid_mask] + assert len(tp_det_ids) == len(tp_ann_ids) + + # 3. derive FP / FN ----------------------------------------------------------- + fp_det_ids = np.setdiff1d(np.arange(N), tp_det_ids, assume_unique=True) + fn_ann_ids = np.setdiff1d(np.arange(M), tp_ann_ids, assume_unique=True) + + return tp_det_ids, tp_ann_ids, fp_det_ids, fn_ann_ids + + def for_visualization(segmentation, annotations, matches): green_red = ["#00FF00", "#FF0000"] diff --git a/scripts/export_lower_resolution.py b/scripts/export_lower_resolution.py index fdde906..abce61d 100644 --- a/scripts/export_lower_resolution.py +++ b/scripts/export_lower_resolution.py @@ -7,22 +7,22 @@ import zarr from flamingo_tools.s3_utils import get_s3_path, BUCKET_NAME, SERVICE_ENDPOINT -from skimage.segmentation import relabel_sequential +# from skimage.segmentation import relabel_sequential -def filter_component(fs, segmentation, cochlea, seg_name): +def filter_component(fs, segmentation, cochlea, seg_name, components): # First, we download the MoBIE table for this segmentation. internal_path = os.path.join(BUCKET_NAME, cochlea, "tables", seg_name, "default.tsv") with fs.open(internal_path, "r") as f: table = pd.read_csv(f, sep="\t") # Then we get the ids for the components and us them to filter the segmentation. - component_mask = np.isin(table.component_labels.values, [1]) + component_mask = np.isin(table.component_labels.values, components) keep_label_ids = table.label_id.values[component_mask].astype("int64") filter_mask = ~np.isin(segmentation, keep_label_ids) segmentation[filter_mask] = 0 - segmentation, _, _ = relabel_sequential(segmentation) + # segmentation, _, _ = relabel_sequential(segmentation) return segmentation @@ -42,8 +42,10 @@ def export_lower_resolution(args): with zarr.open(s3_store, mode="r") as f: data = f[input_key][:] print(data.shape) - if args.filter_by_component: - data = filter_component(fs, data, args.cochlea, channel) + if args.filter_by_components is not None: + data = filter_component(fs, data, args.cochlea, channel, args.filter_by_components) + if args.binarize: + data = (data > 0).astype("uint8") tifffile.imwrite(out_path, data, bigtiff=True, compression="zlib") @@ -53,7 +55,8 @@ def main(): parser.add_argument("--scale", "-s", type=int, required=True) parser.add_argument("--output_folder", "-o", required=True) parser.add_argument("--channels", nargs="+", default=["PV", "VGlut3", "CTBP2"]) - parser.add_argument("--filter_by_component", action="store_true") + parser.add_argument("--filter_by_components", nargs="+", type=int, default=None) + parser.add_argument("--binarize", action="store_true") args = parser.parse_args() export_lower_resolution(args) diff --git a/scripts/measurements/evaluate_otof_therapy.py b/scripts/measurements/evaluate_otof_therapy.py new file mode 100644 index 0000000..463898a --- /dev/null +++ b/scripts/measurements/evaluate_otof_therapy.py @@ -0,0 +1,80 @@ +import os +import json + +import pandas as pd + +from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target + +OUTPUT_FOLDER = "./results/otof-measurements" + + +def check_project(save=False): + s3 = create_s3_target() + + # content = s3.open(f"{BUCKET_NAME}/project.json", mode="r", encoding="utf-8") + # x = json.loads(content.read()) + # print(x) + # return + + cochleae = ["M_AMD_OTOF1_L", "M_AMD_OTOF2_L"] + ihc_name = "IHC_v2" + + for cochlea in cochleae: + content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8") + info = json.loads(content.read()) + sources = info["sources"] + + source_names = list(sources.keys()) + assert ihc_name in source_names + + # Get the ihc table folder. + ihc = sources[ihc_name]["segmentation"] + table_folder = os.path.join(BUCKET_NAME, cochlea, ihc["tableData"]["tsv"]["relativePath"]) + + # For debugging. + # print(s3.ls(table_folder)) + + default_table = s3.open(os.path.join(table_folder, "default.tsv"), mode="rb") + default_table = pd.read_csv(default_table, sep="\t") + + measurement_table = s3.open(os.path.join(table_folder, "Apha_IHC-v2_object-measures.tsv"), mode="rb") + measurement_table = pd.read_csv(measurement_table, sep="\t") + if save: + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + measurement_table.to_csv(os.path.join(OUTPUT_FOLDER, f"{cochlea}.csv"), index=False) + + print("Cochlea:", cochlea) + print("AlphaTag measurements for:", len(measurement_table), "IHCs:") + print(measurement_table.columns) + print() + + +def plot_distribution(): + import seaborn as sns + import matplotlib.pyplot as plt + + table1 = "./results/otof-measurements/M_AMD_OTOF1_L.csv" + table2 = "./results/otof-measurements/M_AMD_OTOF2_L.csv" + + table1 = pd.read_csv(table1) + table2 = pd.read_csv(table2) + + fig, axes = plt.subplots(1, 2, figsize=(10, 4)) + sns.histplot(data=table1, x="mean", bins=32, ax=axes[0]) + axes[0].set_title("Dual AAV") + sns.histplot(data=table2, x="mean", bins=32, ax=axes[1]) + axes[1].set_title("Overloaded AAV") + + fig.suptitle("OTOF Gene Therapy - Mean AlphaTag Intensity of IHCs") + plt.tight_layout() + + plt.show() + + +def main(): + # check_project(save=True) + plot_distribution() + + +if __name__ == "__main__": + main() diff --git a/scripts/measurements/evaluate_sgn_therapy.py b/scripts/measurements/evaluate_sgn_therapy.py new file mode 100644 index 0000000..7410d58 --- /dev/null +++ b/scripts/measurements/evaluate_sgn_therapy.py @@ -0,0 +1,100 @@ +import os +import json + +import pandas as pd + +from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target + +OUTPUT_FOLDER = "./results/sgn-measurements" + + +def check_project(save=False): + s3 = create_s3_target() + + content = s3.open(f"{BUCKET_NAME}/project.json", mode="r", encoding="utf-8") + project_info = json.loads(content.read()) + + cochleae = [ + "M_LR_000144_L", "M_LR_000145_L", "M_LR_000151_R", "M_LR_000155_L", + ] + + sgn_name = "SGN_resized_v2" + for cochlea in cochleae: + assert cochlea in project_info["datasets"] + + content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8") + info = json.loads(content.read()) + sources = info["sources"] + + source_names = list(sources.keys()) + if sgn_name not in source_names: + continue + + # Get the ihc table folder. + sgn = sources[sgn_name]["segmentation"] + table_folder = os.path.join(BUCKET_NAME, cochlea, sgn["tableData"]["tsv"]["relativePath"]) + + # For debugging. + x = s3.ls(table_folder) + if len(x) == 1: + continue + + default_table = s3.open(os.path.join(table_folder, "default.tsv"), mode="rb") + default_table = pd.read_csv(default_table, sep="\t") + main_ids = default_table[default_table.component_labels == 1].label_id + + measurement_table = s3.open( + os.path.join(table_folder, "GFP-resized_SGN-resized-v2_object-measures.tsv"), mode="rb" + ) + measurement_table = pd.read_csv(measurement_table, sep="\t") + measurement_table = measurement_table[measurement_table.label_id.isin(main_ids)] + assert len(measurement_table) == len(main_ids) + + if save: + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + measurement_table.to_csv(os.path.join(OUTPUT_FOLDER, f"{cochlea}.csv"), index=False) + + print("Cochlea:", cochlea) + print("GFP measurements for:", len(measurement_table), "SGNs:") + print(measurement_table.columns) + print() + + +def plot_distribution(): + import seaborn as sns + import matplotlib.pyplot as plt + + table1 = "./results/sgn-measurements/M_LR_000145_L.csv" + table2 = "./results/sgn-measurements/M_LR_000151_R.csv" + table3 = "./results/sgn-measurements/M_LR_000155_L.csv" + + table1 = pd.read_csv(table1) + table2 = pd.read_csv(table2) + table3 = pd.read_csv(table3) + + print(len(table1)) + print(len(table3)) + + fig, axes = plt.subplots(1, 2, figsize=(12, 4)) + + sns.histplot(data=table1, x="mean", bins=32, ax=axes[0]) + axes[0].set_title("M145_L") + # Something is wrong here, the values are normalized. + # sns.histplot(data=table2, x="mean", bins=32, ax=axes[1]) + # axes[1].set_title("M151_R") + sns.histplot(data=table3, x="mean", bins=32, ax=axes[1]) + axes[1].set_title("M155_L") + + fig.suptitle("SGN Gene Therapy - Mean GFP Intensity of SGNs") + plt.tight_layout() + + plt.show() + + +def main(): + # check_project(save=True) + plot_distribution() + + +if __name__ == "__main__": + main() diff --git a/scripts/measurements/measure_sgns.py b/scripts/measurements/measure_sgns.py index f94730a..c42d179 100644 --- a/scripts/measurements/measure_sgns.py +++ b/scripts/measurements/measure_sgns.py @@ -21,10 +21,22 @@ def open_tsv(fs, path): def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--cochleae", "-c", nargs="+") + args = parser.parse_args() + fs = create_s3_target() project_info = open_json(fs, "project.json") - for dataset in project_info["datasets"]: - if dataset == "fens": + + if args.cochleae is None: + cochleae = project_info["datasets"] + else: + cochleae = args.cochleae + + for dataset in cochleae: + if dataset not in project_info["datasets"]: + print("Could not find cochleae", dataset) continue print(dataset) dataset_info = open_json(fs, os.path.join(dataset, "dataset.json")) @@ -36,12 +48,22 @@ def main(): source_info = source_info["segmentation"] table_path = source_info["tableData"]["tsv"]["relativePath"] table = open_tsv(fs, os.path.join(dataset, table_path, "default.tsv")) - component_labels = table.component_labels.values - remaining_sgns = component_labels[component_labels != 0] - print(source) - print("Number of SGNs (all components) :", len(remaining_sgns)) - _, n_per_component = np.unique(remaining_sgns, return_counts=True) - print("Number of SGNs (largest component):", max(n_per_component)) + + if hasattr(table, "component_labels"): + component_labels = table.component_labels.values + remaining_sgns = component_labels[component_labels != 0] + print(source) + print( + "Number of SGNs (all components) :", len(remaining_sgns), "/", len(table), + "(total number of segmented objects)" + ) + component_ids, n_per_component = np.unique( + remaining_sgns, return_counts=True + ) + print("Number of SGNs (largest component):", max(n_per_component)) + else: + print(source) + print("Number of SGNs (no postprocessing):", len(table)) if __name__ == "__main__": diff --git a/scripts/measurements/measure_synapses.py b/scripts/measurements/measure_synapses.py new file mode 100644 index 0000000..11a2afb --- /dev/null +++ b/scripts/measurements/measure_synapses.py @@ -0,0 +1,87 @@ +import os +import json + +import numpy as np +import pandas as pd + +from flamingo_tools.s3_utils import BUCKET_NAME, create_s3_target + +OUTPUT_FOLDER = "./ihc_counts" + + +def check_project(plot=False, save_ihc_table=False): + s3 = create_s3_target() + cochleae = ['M_LR_000226_L', 'M_LR_000226_R', 'M_LR_000227_L', 'M_LR_000227_R'] + + results = {} + for cochlea in cochleae: + content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8") + info = json.loads(content.read()) + sources = info["sources"] + + # Load the synapse table. + syn = sources["synapse_v3"]["spots"] + rel_path = syn["tableData"]["tsv"]["relativePath"] + table_content = s3.open(os.path.join(BUCKET_NAME, cochlea, rel_path, "default.tsv"), mode="rb") + syn_table = pd.read_csv(table_content, sep="\t") + max_dist = syn_table.distance_to_ihc.max() + + # Load the corresponding ihc table. + ihc = sources["IHC_v2"]["segmentation"] + rel_path = ihc["tableData"]["tsv"]["relativePath"] + table_content = s3.open(os.path.join(BUCKET_NAME, cochlea, rel_path, "default.tsv"), mode="rb") + ihc_table = pd.read_csv(table_content, sep="\t") + + # Keep only the synapses that were matched to a valid IHC. + component_id = 2 if cochlea == "M_LR_000226_R" else 1 + valid_ihcs = ihc_table.label_id[ihc_table.component_labels == component_id].values.astype("int") + + valid_syn_table = syn_table[syn_table.matched_ihc.isin(valid_ihcs)] + n_synapses = len(valid_syn_table) + + ihc_ids, syn_per_ihc = np.unique(valid_syn_table.matched_ihc.values, return_counts=True) + ihc_ids = ihc_ids.astype("int") + results[cochlea] = syn_per_ihc + + print("Cochlea:", cochlea) + print("N-Synapses:", n_synapses) + print("Average Syn per IHC:", np.mean(syn_per_ihc)) + print("STDEV Syn per IHC:", np.std(syn_per_ihc)) + print("@ max dist:", max_dist) + print() + + if save_ihc_table: + ihc_to_count = {ihc_id: count for ihc_id, count in zip(ihc_ids, syn_per_ihc)} + unmatched_ihcs = np.setdiff1d(valid_ihcs, ihc_ids) + ihc_to_count.update({ihc_id: 0 for ihc_id in unmatched_ihcs}) + ihc_count_table = pd.DataFrame({ + "label_id": list(ihc_to_count.keys()), + "synapse_count": list(ihc_to_count.values()), + }) + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + output_path = os.path.join(OUTPUT_FOLDER, f"ihc_count_{cochlea}.tsv") + ihc_count_table.to_csv(output_path, sep="\t", index=False) + + if plot: + import matplotlib.pyplot as plt + import seaborn as sns + + cap = 30 + + fig, axes = plt.subplots(1, 4, figsize=(16, 4), sharex=True, sharey=True) + for i, (cochlea, res) in enumerate(results.items()): + sns.histplot(data=np.clip(res, 0, cap), bins=16, ax=axes[i]) + axes[i].set_title(cochlea) + + fig.suptitle(f"Ribbon Synapses per IHC @ {np.round(max_dist)} micron") + + plt.tight_layout() + plt.show() + + +def main(): + check_project(plot=False, save_ihc_table=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/synapse_marker_detection/export_model.py b/scripts/synapse_marker_detection/export_model.py new file mode 100644 index 0000000..a467d97 --- /dev/null +++ b/scripts/synapse_marker_detection/export_model.py @@ -0,0 +1,25 @@ +import argparse +import sys + +import torch +from torch_em.util import load_model + +sys.path.append("/home/pape/Work/my_projects/czii-protein-challenge") +sys.path.append("/user/pape41/u12086/Work/my_projects/czii-protein-challenge") + + +def export_model(input_, output): + model = load_model(input_, device="cpu") + torch.save(model, output) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input", required=True) + parser.add_argument("-o", "--output", required=True) + args = parser.parse_args() + export_model(args.input, args.output) + + +if __name__ == "__main__": + main() diff --git a/scripts/synapse_marker_detection/extract_training_data.py b/scripts/synapse_marker_detection/extract_training_data.py index 68090f7..2baf27c 100644 --- a/scripts/synapse_marker_detection/extract_training_data.py +++ b/scripts/synapse_marker_detection/extract_training_data.py @@ -191,10 +191,32 @@ def process_training_data_v3(visualize=True): extract_training_data(imaris_file, output_folder, tif_file=None, crop=True) +def process_eval_data(visualize=False): + input_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/SynapseValidation" # noqa + test_folders = ["M227R_IHC-synapsecrops", "MLR227L_IHC-synapsecrops"] + test_output = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3" # noqa + exclude_names = [] + + for folder in test_folders: + + if visualize: + output_folder = None + else: + output_folder = test_output + + imaris_files = sorted(glob(os.path.join(input_root, folder, "*.ims"))) + for imaris_file in imaris_files: + if os.path.basename(imaris_file) in exclude_names: + print("Skipping", imaris_file) + continue + extract_training_data(imaris_file, output_folder, tif_file=None, crop=True) + + def main(): # process_training_data_v1() # process_training_data_v2(visualize=True) - process_training_data_v3(visualize=False) + # process_training_data_v3(visualize=False) + process_eval_data(visualize=False) if __name__ == "__main__": diff --git a/scripts/validation/IHCs/consensus_annotations.py b/scripts/validation/IHCs/consensus_annotations.py new file mode 100644 index 0000000..9229442 --- /dev/null +++ b/scripts/validation/IHCs/consensus_annotations.py @@ -0,0 +1,91 @@ +import os +from glob import glob + +import pandas as pd +from flamingo_tools.validation import create_consensus_annotations + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationIHCs" +ANNOTATION_FOLDERS = ["Annotations_AMD", "Annotations_EK", "Annotations_LR"] + +OUTPUT_FOLDER = os.path.join(ROOT, "consensus_annotation") +COLOR = ["blue", "yellow", "orange"] + + +def match_annotations(image_path): + annotations = {} + prefix = os.path.basename(image_path).split("_")[:3] + prefix = "_".join(prefix) + + annotations = {} + for annotation_folder in ANNOTATION_FOLDERS: + all_annotations = glob(os.path.join(ROOT, annotation_folder, "*.csv")) + matches = [ann for ann in all_annotations if os.path.basename(ann).startswith(prefix)] + assert len(matches) == 1 + annotation_path = matches[0] + annotations[annotation_folder] = annotation_path + + return annotations + + +def consensus_annotations(image_path, check): + annotation_paths = match_annotations(image_path) + assert len(annotation_paths) == len(ANNOTATION_FOLDERS) + + # I tried first with a matching distnce of 8, but that is too conservative. + # A mathing distance of 16 seems better, but might still need to refine this. + matching_distance = 16.0 + consensus_annotations, unmatched_annotations = create_consensus_annotations( + annotation_paths, matching_distance=matching_distance, min_matches_for_consensus=2, + ) + fname = os.path.basename(image_path) + + if check: + import napari + import tifffile + + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]].values + unmatched_annotators = unmatched_annotations.annotator.values + unmatched_annotations = unmatched_annotations[["axis-0", "axis-1", "axis-2"]].values + + image = tifffile.imread(image_path) + v = napari.Viewer() + v.add_image(image) + v.add_points(consensus_annotations, face_color="green") + v.add_points( + unmatched_annotations, + properties={"annotator": unmatched_annotators}, + face_color="annotator", + face_color_cycle=COLOR, # TODO reorder + ) + v.title = os.path.basename(fname) + napari.run() + else: + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]] + consensus_annotations.insert(0, "annotator", ["consensus"] * len(consensus_annotations)) + unmatched_annotations = unmatched_annotations[["axis-0", "axis-1", "axis-2", "annotator"]] + annotations = pd.concat([consensus_annotations, unmatched_annotations]) + output_path = os.path.join(OUTPUT_FOLDER, fname.replace(".tif", ".csv")) + annotations.to_csv(output_path, index=False) + print("Saved to", output_path) + + +def main(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--images", nargs="+") + parser.add_argument("--check", action="store_true") + args = parser.parse_args() + + if args.images is None: + image_paths = sorted(glob(os.path.join(ROOT, "*.tif"))) + else: + image_paths = args.images + + for image_path in image_paths: + consensus_annotations(image_path, args.check) + + +if __name__ == "__main__": + main() diff --git a/scripts/validation/IHCs/evaluate_consensus.py b/scripts/validation/IHCs/evaluate_consensus.py new file mode 100644 index 0000000..725469d --- /dev/null +++ b/scripts/validation/IHCs/evaluate_consensus.py @@ -0,0 +1,56 @@ +import os +from glob import glob + +import pandas as pd +from flamingo_tools.validation import match_detections + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationIHCs" +ANNOTATION_FOLDERS = ["Annotations_AMD", "Annotations_EK", "Annotations_LR"] +CONSENSUS_FOLDER = "consensus_annotation" + + +def match_annotations(consensus_path): + annotations = {} + prefix = os.path.basename(consensus_path).split("_")[:3] + prefix = "_".join(prefix) + + annotations = {} + for annotation_folder in ANNOTATION_FOLDERS: + all_annotations = glob(os.path.join(ROOT, annotation_folder, "*.csv")) + matches = [ann for ann in all_annotations if os.path.basename(ann).startswith(prefix)] + assert len(matches) == 1 + annotation_path = matches[0] + annotations[annotation_folder] = annotation_path + + return annotations + + +def main(): + consensus_files = sorted(glob(os.path.join(ROOT, CONSENSUS_FOLDER, "*.csv"))) + + results = { + "annotator": [], + "tps": [], + "fps": [], + "fns": [], + } + for consensus_file in consensus_files: + consensus = pd.read_csv(consensus_file) + consensus = consensus[consensus.annotator == "consensus"][["axis-0", "axis-1", "axis-2"]] + + annotations = match_annotations(consensus_file) + for name, annotation_path in annotations.items(): + annotation = pd.read_csv(annotation_path)[["axis-0", "axis-1", "axis-2"]] + tp, _, fp, fn = match_detections(annotation, consensus, max_dist=12.0) + results["annotator"].append(name) + results["tps"].append(len(tp)) + results["fps"].append(len(fp)) + results["fns"].append(len(fn)) + + results = pd.DataFrame(results) + print(results) + results.to_csv("consensus_evaluation.csv", index=False) + + +if __name__ == "__main__": + main() diff --git a/scripts/validation/IHCs/run_evaluation.py b/scripts/validation/IHCs/run_evaluation.py index d87e5ae..936f825 100644 --- a/scripts/validation/IHCs/run_evaluation.py +++ b/scripts/validation/IHCs/run_evaluation.py @@ -8,10 +8,11 @@ ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationIHCs" # ANNOTATION_FOLDERS = ["AnnotationsEK", "AnnotationsAMD", "AnnotationsLR"] -ANNOTATION_FOLDERS = ["Annotations_AMD", "Annotations_LR"] +# ANNOTATION_FOLDERS = ["Annotations_AMD", "Annotations_LR"] +ANNOTATION_FOLDERS = ["consensus_annotation"] -def run_evaluation(root, annotation_folders, result_file, cache_folder): +def run_evaluation(root, annotation_folders, result_file, cache_folder, segmentation_name): results = { "annotator": [], "cochlea": [], @@ -25,18 +26,18 @@ def run_evaluation(root, annotation_folders, result_file, cache_folder): os.makedirs(cache_folder, exist_ok=True) for folder in annotation_folders: - annotator = folder[len("Annotations"):] + annotator = "consensus" if folder == "consensus_annotation" else folder[len("Annotations"):] annotations = sorted(glob(os.path.join(root, folder, "*.csv"))) for annotation_path in annotations: print(annotation_path) cochlea, slice_id = _parse_annotation_path(annotation_path) - # For the cochlea M_LR_000226_R the actual component is 2, not 1 - component = 2 if "226_R" in cochlea else 1 + # For the cochlea M_LR_000226_R the actual component is 2, not 1. (Only for IHC_v2). + component = 2 if ("226_R" in cochlea and segmentation_name == "IHC_v2") else 1 print("Run evaluation for", annotator, cochlea, "z=", slice_id) segmentation, annotations = fetch_data_for_evaluation( annotation_path, components_for_postprocessing=[component], - seg_name="IHC_v2", + seg_name=segmentation_name, cache_path=None if cache_folder is None else os.path.join(cache_folder, f"{cochlea}_{slice_id}.tif") ) scores = compute_scores_for_annotated_slice(segmentation, annotations, matching_tolerance=5) @@ -58,9 +59,10 @@ def main(): parser.add_argument("-i", "--input", default=ROOT) parser.add_argument("--folders", default=ANNOTATION_FOLDERS) parser.add_argument("--result_file", default="results.csv") + parser.add_argument("--segmentation_name", default="IHC_v2") parser.add_argument("--cache_folder") args = parser.parse_args() - run_evaluation(args.input, args.folders, args.result_file, args.cache_folder) + run_evaluation(args.input, args.folders, args.result_file, args.cache_folder, args.segmentation_name) if __name__ == "__main__": diff --git a/scripts/validation/SGNs/consensus_annotations.py b/scripts/validation/SGNs/consensus_annotations.py index 8316bf2..9329b80 100644 --- a/scripts/validation/SGNs/consensus_annotations.py +++ b/scripts/validation/SGNs/consensus_annotations.py @@ -1,14 +1,22 @@ import os from glob import glob -import numpy as np -import pandas as pd from flamingo_tools.validation import create_consensus_annotations ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" -ANNOTATION_FOLDERS = ["AnnotationsAMD", "AnnotationsEK", "AnnotationsLR"] + COLOR = ["blue", "yellow", "orange"] -OUTPUT_FOLDER = os.path.join(ROOT, "Consensus") + +# First iteration of consensus annotations. +OUTPUT_FOLDER = os.path.join(ROOT, "for_consensus_annotation") +ANNOTATION_FOLDERS = ["AnnotationsAMD", "AnnotationsEK", "AnnotationsLR"] + +# Second iteration of consensus annotations for the two images of rescaled cochlea. +# OUTPUT_FOLDER = os.path.join(ROOT, "for_consensus_annotation2") +# ANNOTATION_FOLDERS = [ +# "for_consensus_annotation2/AnnotationsAMD", "for_consensus_annotation2/AnnotationsEK", +# "for_consensus_annotation2/AnnotationsLR" +# ] def match_annotations(image_path): @@ -30,8 +38,9 @@ def match_annotations(image_path): def consensus_annotations(image_path, check): print("Compute consensus annotations for", image_path) annotation_paths = match_annotations(image_path) + matching_distance = 8 consensus_annotations, unmatched_annotations = create_consensus_annotations( - annotation_paths, matching_distance=8.0, min_matches_for_consensus=2, + annotation_paths, matching_distance=matching_distance, min_matches_for_consensus=2, ) fname = os.path.basename(image_path) @@ -57,22 +66,24 @@ def consensus_annotations(image_path, check): napari.run() else: - # Save the combined consensus and unmatched annotation. - combined_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2", "annotators"]] - combined_annotations["annotators"] = "consensus" - combined_annotations = combined_annotations.rename(columns={"annotators": "annotator"}) - combined_annotations = pd.concat([combined_annotations, unmatched_annotations]) + # Save the consensus and unmatched annotation. + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]] + unmatched_annotations = unmatched_annotations[["axis-0", "axis-1", "axis-2"]] + + consensus_out = os.path.join(OUTPUT_FOLDER, "consensus_annotations") + os.makedirs(consensus_out, exist_ok=True) + + unmatched_out = os.path.join(OUTPUT_FOLDER, "unmatched_annotations") + os.makedirs(unmatched_out, exist_ok=True) - print("Saving consensus annotations for", fname, ":") - for name, count in zip(*np.unique(combined_annotations.annotator.values, return_counts=True)): - print(name, count) + out_name = fname.replace(".tif", ".csv") + consensus_out_path = os.path.join(consensus_out, out_name) + unmatched_out_path = os.path.join(unmatched_out, out_name) - os.makedirs(OUTPUT_FOLDER, exist_ok=True) - output_path = os.path.join(OUTPUT_FOLDER, fname.replace(".tif", ".csv")) - combined_annotations.to_csv(output_path, index=False) + consensus_annotations.to_csv(consensus_out_path, index=False) + unmatched_annotations.to_csv(unmatched_out_path, index=False) -# NOTE: we need to treat the rescaled ones differently. def main(): import argparse parser = argparse.ArgumentParser() @@ -81,7 +92,7 @@ def main(): args = parser.parse_args() if args.images is None: - image_paths = sorted(glob(os.path.join(ROOT, "*.tif"))) + image_paths = sorted(glob(os.path.join(OUTPUT_FOLDER, "*.tif"))) else: image_paths = args.images diff --git a/scripts/validation/SGNs/consensus_step2.py b/scripts/validation/SGNs/consensus_step2.py new file mode 100644 index 0000000..e32683b --- /dev/null +++ b/scripts/validation/SGNs/consensus_step2.py @@ -0,0 +1,117 @@ +import os +from glob import glob + +import pandas as pd +from flamingo_tools.validation import create_consensus_annotations + +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/for_consensus_annotation" # noqa + +ANNOTATION_FOLDERS = ["AnnotationsAMD", "AnnotationsEK", "AnnotationsLR"] +COLOR = ["blue", "yellow", "orange"] +CONSENSUS_ANNOTATIONS = "consensus_annotations" +OUTPUT_FOLDER = os.path.join(ROOT, "final_consensus_annotations") + + +def match_annotations(image_path, annotation_folders): + annotations = {} + prefix = os.path.basename(image_path).split("_")[:3] + prefix = "_".join(prefix) + + annotations = {} + for annotation_folder in annotation_folders: + all_annotations = glob(os.path.join(ROOT, annotation_folder, "*.csv")) + matches = [ + ann for ann in all_annotations if (os.path.basename(ann).startswith(prefix) and "negative" not in ann) + ] + if len(matches) != 1: + breakpoint() + assert len(matches) == 1 + annotation_path = matches[0] + annotations[annotation_folder] = annotation_path + + assert len(annotations) == len(annotation_folders) + return annotations + + +def create_consensus_step2(image_path, check): + print("Compute consensus annotations for", image_path) + annotation_paths = match_annotations(image_path, ANNOTATION_FOLDERS) + matching_distance = 8 + consensus_annotations, unmatched_annotations = create_consensus_annotations( + annotation_paths, matching_distance=matching_distance, min_matches_for_consensus=2, + ) + fname = os.path.basename(image_path) + + # We exclude the two images for which we don't have previous consensus annotations yet. + # (This was due to resizing problems.) + if fname in ( + "MLR169R_PV_z1913_base_full_rescaled.tif", + "MLR169R_PV_z2594_mid_full_rescaled.tif", + ): + prev_consensus = None + else: + prev_consensus = match_annotations(image_path, [CONSENSUS_ANNOTATIONS])[CONSENSUS_ANNOTATIONS] + prev_consensus = pd.read_csv(prev_consensus)[["axis-0", "axis-1", "axis-2"]] + + if check: + import napari + import tifffile + + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]].values + unmatched_annotators = unmatched_annotations.annotator.values + unmatched_annotations = unmatched_annotations[["axis-0", "axis-1", "axis-2"]].values + + image = tifffile.imread(image_path) + v = napari.Viewer() + v.add_image(image) + if prev_consensus is not None: + v.add_points(prev_consensus.values, face_color="gray", name="previous-consensus-annotations") + v.add_points(consensus_annotations, face_color="green") + v.add_points( + unmatched_annotations, + properties={"annotator": unmatched_annotators}, + face_color="annotator", + face_color_cycle=COLOR, # TODO reorder + ) + v.title = os.path.basename(fname) + napari.run() + + else: + # Combine consensus and previous annotations. + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]] + n_consensus = len(consensus_annotations) + n_unmatched = len(unmatched_annotations) + if prev_consensus is not None: + n_prev = len(prev_consensus) + print("Number of previous consensus annotations:", n_prev) + + print("Number of new consensus annotations:", n_consensus) + print("Number of unmatched annotations:", n_unmatched) + + consensus_annotations = pd.concat([consensus_annotations, prev_consensus]) + + out_name = fname.replace(".tif", ".csv") + out_path = os.path.join(OUTPUT_FOLDER, out_name) + + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + consensus_annotations.to_csv(out_path, index=False) + + +def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--images", nargs="+") + parser.add_argument("--check", action="store_true") + args = parser.parse_args() + + if args.images is None: + image_paths = sorted(glob(os.path.join(ROOT, "*.tif"))) + else: + image_paths = args.images + + for image_path in image_paths: + create_consensus_step2(image_path, args.check) + + +if __name__ == "__main__": + main() diff --git a/scripts/validation/SGNs/consensus_step3.py b/scripts/validation/SGNs/consensus_step3.py new file mode 100644 index 0000000..d3ebfae --- /dev/null +++ b/scripts/validation/SGNs/consensus_step3.py @@ -0,0 +1,110 @@ +import os +from glob import glob + +import pandas as pd +from flamingo_tools.validation import create_consensus_annotations + +IMAGE_ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/for_consensus_annotation" # noqa +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/final_annotations" # noqa + +ANNOTATION_FOLDERS = ["AnnotationsAMD", "AnnotationsEK", "AnnotationsLR"] +COLOR = ["blue", "yellow", "orange"] +CONSENSUS_ANNOTATIONS = "consensus_annotations" +OUTPUT_FOLDER = os.path.join(ROOT, "final_consensus_annotations") + + +def match_annotations(image_path, annotation_folders): + annotations = {} + prefix = os.path.basename(image_path).split("_")[:3] + prefix = "_".join(prefix) + + annotations = {} + for annotation_folder in annotation_folders: + all_annotations = glob(os.path.join(ROOT, annotation_folder, "*.csv")) + matches = [ + ann for ann in all_annotations if (os.path.basename(ann).startswith(prefix) and "negative" not in ann) + ] + if len(matches) != 1: + breakpoint() + assert len(matches) == 1 + annotation_path = matches[0] + annotations[annotation_folder] = annotation_path + + assert len(annotations) == len(annotation_folders) + return annotations + + +def create_consensus_step3(image_path, check): + print("Compute consensus annotations for", image_path) + annotation_paths = match_annotations(image_path, ANNOTATION_FOLDERS) + matching_distance = 8 + consensus_annotations, unmatched_annotations = create_consensus_annotations( + annotation_paths, matching_distance=matching_distance, min_matches_for_consensus=2, + ) + fname = os.path.basename(image_path) + + prev_consensus = match_annotations(image_path, [CONSENSUS_ANNOTATIONS])[CONSENSUS_ANNOTATIONS] + prev_consensus = pd.read_csv(prev_consensus)[["axis-0", "axis-1", "axis-2"]] + + if check: + import napari + import tifffile + + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]].values + unmatched_annotators = unmatched_annotations.annotator.values + unmatched_annotations = unmatched_annotations[["axis-0", "axis-1", "axis-2"]].values + + image = tifffile.imread(image_path) + v = napari.Viewer() + v.add_image(image) + if prev_consensus is not None: + v.add_points(prev_consensus.values, face_color="gray", name="previous-consensus-annotations") + v.add_points(consensus_annotations, face_color="green") + v.add_points( + unmatched_annotations, + properties={"annotator": unmatched_annotators}, + face_color="annotator", + face_color_cycle=COLOR, # TODO reorder + ) + v.title = os.path.basename(fname) + napari.run() + + else: + # Combine consensus and previous annotations. + consensus_annotations = consensus_annotations[["axis-0", "axis-1", "axis-2"]] + n_consensus = len(consensus_annotations) + n_unmatched = len(unmatched_annotations) + if prev_consensus is not None: + n_prev = len(prev_consensus) + print("Number of previous consensus annotations:", n_prev) + + print("Number of new consensus annotations:", n_consensus) + print("Number of unmatched annotations:", n_unmatched) + + consensus_annotations = pd.concat([consensus_annotations, prev_consensus]) + + out_name = fname.replace(".tif", ".csv") + out_path = os.path.join(OUTPUT_FOLDER, out_name) + + os.makedirs(OUTPUT_FOLDER, exist_ok=True) + consensus_annotations.to_csv(out_path, index=False) + + +def main(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--images", nargs="+") + parser.add_argument("--check", action="store_true") + args = parser.parse_args() + + if args.images is None: + image_paths = sorted(glob(os.path.join(IMAGE_ROOT, "*.tif"))) + else: + image_paths = args.images + + for image_path in image_paths: + create_consensus_step3(image_path, args.check) + + +if __name__ == "__main__": + main() diff --git a/scripts/validation/SGNs/evaluate_consensus.py b/scripts/validation/SGNs/evaluate_consensus.py new file mode 100644 index 0000000..50dad28 --- /dev/null +++ b/scripts/validation/SGNs/evaluate_consensus.py @@ -0,0 +1,75 @@ +import os +from glob import glob +from pathlib import Path + +import pandas as pd +from flamingo_tools.validation import match_detections + +# The regular root folder. +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" +# The root folder for the new annotations for data with scaling issues. +ROOT2 = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/for_consensus_annotation" # noqa + +ANNOTATION_FOLDERS = ["AnnotationsAMD", "AnnotationsEK", "AnnotationsLR"] +CONSENSUS_FOLDER = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/final_annotations/final_consensus_annotations" # noqa + + +def match_annotations(consensus_path, sample_name): + annotations = {} + prefix = os.path.basename(consensus_path).split("_")[:3] + prefix = "_".join(prefix) + + if sample_name in ("MLR169R_PV_z1913_base_full_rescaled", "MLR169R_PV_z2594_mid_full_rescaled"): + root = ROOT2 + else: + root = ROOT + + annotations = {} + for annotation_folder in ANNOTATION_FOLDERS: + all_annotations = glob(os.path.join(root, annotation_folder, "*.csv")) + matches = [ + ann for ann in all_annotations if (os.path.basename(ann).startswith(prefix) and "negative" not in ann) + ] + # TODO continue debugging + if len(matches) != 1: + breakpoint() + assert len(matches) == 1 + annotation_path = matches[0] + annotations[annotation_folder] = annotation_path + + return annotations + + +def main(): + consensus_files = sorted(glob(os.path.join(CONSENSUS_FOLDER, "*.csv"))) + assert len(consensus_files) > 0 + + results = { + "annotator": [], + "sample": [], + "tps": [], + "fps": [], + "fns": [], + } + for consensus_file in consensus_files: + consensus = pd.read_csv(consensus_file) + consensus = consensus[["axis-0", "axis-1", "axis-2"]] + sample_name = Path(consensus_file).stem + + annotations = match_annotations(consensus_file, sample_name) + for name, annotation_path in annotations.items(): + annotation = pd.read_csv(annotation_path)[["axis-0", "axis-1", "axis-2"]] + tp, _, fp, fn = match_detections(annotation, consensus, max_dist=8.0) + results["annotator"].append(name) + results["tps"].append(len(tp)) + results["fps"].append(len(fp)) + results["fns"].append(len(fn)) + results["sample"].append(sample_name) + + results = pd.DataFrame(results) + print(results) + results.to_csv("results/consensus_evaluation.csv", index=False) + + +if __name__ == "__main__": + main() diff --git a/scripts/validation/SGNs/rescale_annotations.py b/scripts/validation/SGNs/rescale_annotations.py index d0d22b1..b447daa 100644 --- a/scripts/validation/SGNs/rescale_annotations.py +++ b/scripts/validation/SGNs/rescale_annotations.py @@ -1,13 +1,15 @@ +import json import os import shutil from glob import glob +import imageio.v3 as imageio import numpy as np import pandas as pd import tifffile import zarr -from flamingo_tools.s3_utils import get_s3_path, BUCKET_NAME, SERVICE_ENDPOINT +from flamingo_tools.s3_utils import get_s3_path, BUCKET_NAME, SERVICE_ENDPOINT, create_s3_target def get_scale_factor(): @@ -28,54 +30,146 @@ def get_scale_factor(): return scale_factor -def rescale_annotations(input_path, scale_factor, bkp_folder): - annotations = pd.read_csv(input_path) +def get_shape(): + cochlea = "M_LR_000169_R_fused" + s3 = create_s3_target() + content = s3.open(f"{BUCKET_NAME}/{cochlea}/dataset.json", mode="r", encoding="utf-8") + info = json.loads(content.read()) + print("Available sources:") + for source in info["sources"].keys(): + print(source) + internal_path = os.path.join(cochlea, "images", "ome-zarr", "PV.ome.zarr") + s3_store, fs = get_s3_path(internal_path, bucket_name=BUCKET_NAME, service_endpoint=SERVICE_ENDPOINT) + + input_key = "s0" + with zarr.open(s3_store, mode="r") as f: + new_shape = f[input_key].shape + return new_shape + + +def rescale_annotations(annotation_file, output_folder, new_shape, original_shape): + # 0.) Split the name into its parts. + fname = os.path.basename(annotation_file) + name_components = fname.split("_") + z = int(name_components[2][1:]) + + # 1.) Find the matching raw file and get its shape. + root = os.path.split(os.path.split(annotation_file)[0])[0] + tif_name = "_".join(name_components[:-1]) + image_file = os.path.join(root, f"{tif_name}.tif") + assert os.path.exists(image_file), image_file + this_shape = tifffile.memmap(image_file).shape + + # 2.) Determine if the annotations have to be reshaped, + if this_shape[1:] == new_shape[1:]: # No, they don't have to be reshaped. + # In this case we copy the annotations and that's it. + print(annotation_file, "does not need to be rescaled") + output_path = os.path.join(output_folder, fname) + shutil.copyfile(annotation_file, output_path) + return + elif this_shape[1:] == original_shape[1:]: # Yes, they have to be reshaped + pass + else: + raise ValueError(f"Unexpected shape: {this_shape}") + + # 3.) Rescale the annotations. + scale_factor = [float(ns) / float(os) for ns, os in zip(new_shape, original_shape)] + + annotations = pd.read_csv(annotation_file) annotations_rescaled = annotations.copy() annotations_rescaled["axis-1"] = annotations["axis-1"] * scale_factor[1] annotations_rescaled["axis-2"] = annotations["axis-2"] * scale_factor[2] - fname = os.path.basename(input_path) - name_components = fname.split("_") - z = int(name_components[2][1:]) new_z = int(np.round(z * scale_factor[0])) - name_components[2] = f"z{new_z}" name_components = name_components[:-1] + ["rescaled"] + name_components[-1:] new_fname = "_".join(name_components) - input_folder = os.path.split(input_path)[0] - out_path = os.path.join(input_folder, new_fname) - bkp_path = os.path.join(bkp_folder, fname) + output_path = os.path.join(output_folder, new_fname) + annotations_rescaled.to_csv(output_path, index=False) - # print(input_path) - # print(out_path) - # print(bkp_path) - # print() - # return - shutil.move(input_path, bkp_path) - annotations_rescaled.to_csv(out_path, index=False) - - -def main(): - # scale_factor = get_scale_factor() - # print(scale_factor) - scale_factor = (2.6314,) * 3 +def rescale_all_annotations(): + prefix = "MLR169R_PV" + # shape = get_shape() + original_shape = (1921, 1479, 2157) + new_shape = (5089, 3915, 5665) root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" annotation_folders = ["AnnotationsEK", "AnnotationsAMD", "AnnotationsLR"] for folder in annotation_folders: - bkp_folder = os.path.join(root, folder, "rescaled_bkp") - os.makedirs(bkp_folder, exist_ok=True) + output_folder = os.path.join(root, folder, "rescaled") + os.makedirs(output_folder, exist_ok=True) files = glob(os.path.join(root, folder, "*.csv")) for annotation_file in files: fname = os.path.basename(annotation_file) - if not fname.startswith(("MLR169R_PV_z722", "MLR169R_PV_z979")): + if not fname.startswith(prefix): continue print("Rescaling", annotation_file) - rescale_annotations(annotation_file, scale_factor, bkp_folder) + rescale_annotations(annotation_file, output_folder, new_shape, original_shape) + + +# Download the two new slices. +def download_new_data(): + from flamingo_tools.validation import fetch_data_for_evaluation + + root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" + output_folder = os.path.join(root, "for_consensus_annotation") + os.makedirs(output_folder, exist_ok=True) + + cochlea = "M_LR_000169_R_fused" + files = [ + "AnnotationsEK/rescaled/MLR169R_PV_z1913_base_full_rescaled_annotationsEK.csv", + "AnnotationsEK/rescaled/MLR169R_PV_z2594_mid_full_rescaled_annotationsEK.csv" + ] + for ff in files: + annotation_path = os.path.join(root, ff) + + fname = os.path.basename(annotation_path) + name_components = fname.split("_") + + tif_name = "_".join(name_components[:-1]) + image_file = os.path.join(output_folder, f"{tif_name}.tif") + + _, _, image = fetch_data_for_evaluation( + annotation_path, cache_path=None, cochlea=cochlea, extra_data="PV", z_extent=10 + ) + print(image.shape) + print("Writing to:", image_file) + imageio.imwrite(image_file, image) + + +def check_rescaled_annotations(): + import napari + from flamingo_tools.validation import fetch_data_for_evaluation + + root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" + annotation_folders = ["AnnotationsEK/rescaled", "AnnotationsAMD/rescaled", "AnnotationsLR/rescaled"] + cochlea = "M_LR_000169_R_fused" + + for folder in annotation_folders: + annotation_paths = sorted(glob(os.path.join(root, folder, "*.csv"))) + for annotation_path in annotation_paths: + segmentation, annotations, image = fetch_data_for_evaluation( + annotation_path, cache_path=None, components_for_postprocessing=[1], cochlea=cochlea, extra_data="PV", + ) + v = napari.Viewer() + v.add_image(image) + v.add_labels(segmentation) + v.add_points(annotations) + v.title = annotation_path + napari.run() + + +def main(): + # rescale_all_annotations() + # check_rescaled_annotations() + + # MLR169R_PV_z1913_base_full_rescaled.tif + # MLR169R_PV_z2594_mid_full_rescaled.tif + download_new_data() # Rescale the point annotations for the cochlea MLR169R, which was diff --git a/scripts/validation/SGNs/run_evaluation.py b/scripts/validation/SGNs/run_evaluation.py index fa65017..1859803 100644 --- a/scripts/validation/SGNs/run_evaluation.py +++ b/scripts/validation/SGNs/run_evaluation.py @@ -3,11 +3,16 @@ import pandas as pd from flamingo_tools.validation import ( - fetch_data_for_evaluation, parse_annotation_path, compute_scores_for_annotated_slice + fetch_data_for_evaluation, _parse_annotation_path, compute_scores_for_annotated_slice ) -ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" -ANNOTATION_FOLDERS = ["AnnotationsEK", "AnnotationsAMD", "AnnotationsLR"] +# OLD EVALUATION FOR INITIAL ANNOTATIONS +# ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs" +# ANNOTATION_FOLDERS = ["AnnotationsEK", "AnnotationsAMD", "AnnotationsLR"] + +# NEW EVALUATION FOR FINAL ANNOTATIONS +ROOT = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/F1ValidationSGNs/final_annotations" # noqa +ANNOTATION_FOLDERS = ["final_consensus_annotations"] def run_evaluation(root, annotation_folders, result_file, cache_folder): @@ -27,11 +32,16 @@ def run_evaluation(root, annotation_folders, result_file, cache_folder): annotator = folder[len("Annotations"):] annotations = sorted(glob(os.path.join(root, folder, "*.csv"))) for annotation_path in annotations: - cochlea, slice_id = parse_annotation_path(annotation_path) + cochlea, slice_id = _parse_annotation_path(annotation_path) print("Run evaluation for", annotator, cochlea, "z=", slice_id) + if cochlea == "M_LR_000169_R": + mobie_name = f"{cochlea}_fused" + else: + mobie_name = None + segmentation, annotations = fetch_data_for_evaluation( - annotation_path, components_for_postprocessing=[1], + annotation_path, components_for_postprocessing=[1], cochlea=mobie_name, cache_path=None if cache_folder is None else os.path.join(cache_folder, f"{cochlea}_{slice_id}.tif") ) scores = compute_scores_for_annotated_slice(segmentation, annotations, matching_tolerance=5) @@ -51,7 +61,7 @@ def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", default=ROOT) - parser.add_argument("--folders", default=ANNOTATION_FOLDERS) + parser.add_argument("--folders", default=ANNOTATION_FOLDERS, nargs="+") parser.add_argument("--result_file", default="results.csv") parser.add_argument("--cache_folder") args = parser.parse_args() diff --git a/scripts/validation/synapses/prediction.py b/scripts/validation/synapses/prediction.py index 2ea3273..3aa5868 100644 --- a/scripts/validation/synapses/prediction.py +++ b/scripts/validation/synapses/prediction.py @@ -51,8 +51,12 @@ def pred_synapse_impl(input_path, output_folder): def predict_synapses(): files = sorted(glob(os.path.join(INPUT_ROOT, "*.zarr"))) for ff in files: - print("Segmenting", ff) output_folder = os.path.join(OUTPUT_ROOT, Path(ff).stem) + if os.path.exists(os.path.join(output_folder, "predictions.zarr", "prediction")): + print("Synapse prediction in", ff, "already done") + continue + else: + print("Predicting synapses in", ff) pred_synapse_impl(ff, output_folder) @@ -68,8 +72,12 @@ def pred_ihc_impl(input_path, output_folder): def predict_ihcs(): files = sorted(glob(os.path.join(INPUT_ROOT, "*.zarr"))) for ff in files: - print("Segmenting", ff) output_folder = os.path.join(OUTPUT_ROOT, f"{Path(ff).stem}_ihc") + if os.path.exists(os.path.join(output_folder, "predictions.zarr", "prediction")): + print("IHC segmentation in", ff, "already done") + continue + else: + print("Segmenting IHCs in", ff) pred_ihc_impl(ff, output_folder) @@ -147,8 +155,8 @@ def process_everything(): def main(): - process_everything() - # check_predictions() + # process_everything() + check_predictions() if __name__ == "__main__": diff --git a/scripts/validation/synapses/run_evaluation.py b/scripts/validation/synapses/run_evaluation.py index 3407d1a..f26c005 100644 --- a/scripts/validation/synapses/run_evaluation.py +++ b/scripts/validation/synapses/run_evaluation.py @@ -1,68 +1,11 @@ import os +from glob import glob +from pathlib import Path -import numpy as np import pandas as pd - from elf.io import open_file -from scipy.spatial import cKDTree -from scipy.optimize import linear_sum_assignment - - -# TODO refactor -def match_detections( - detections: np.ndarray, - annotations: np.ndarray, - max_dist: float -): - """One-to-one matching between 3-D detections and ground-truth points. - - Args: - detections: (N, 3) array-like Candidate points produced by the model. - annotations: (M, 3) array-like Ground-truth reference points. - max_dist: Maximum Euclidean distance allowed for a match. - - Returns: - tp_det_ids : 1-D ndarray. Indices in `detections` that were matched (true positives). - tp_ann_ids : 1-D ndarray. Indices in `annotations` that were matched (true positives). - fp_det_ids : 1-D ndarray. Unmatched detection indices (false positives). - fn_ann_ids : 1-D ndarray, Unmatched annotation indices (false negatives). - """ - det = np.asarray(detections, dtype=float) - ann = np.asarray(annotations, dtype=float) - N, M = len(det), len(ann) - - # trivial corner cases -------------------------------------------------------- - if N == 0: - return np.empty(0, int), np.empty(0, int), np.empty(0, int), np.arange(M) - if M == 0: - return np.empty(0, int), np.empty(0, int), np.arange(N), np.empty(0, int) - - # 1. build sparse radius-filtered distance matrix ----------------------------- - tree_det = cKDTree(det) - tree_ann = cKDTree(ann) - coo = tree_det.sparse_distance_matrix(tree_ann, max_dist, output_type="coo_matrix") - - if coo.nnz == 0: # nothing is close enough - return np.empty(0, int), np.empty(0, int), np.arange(N), np.arange(M) - - cost = np.full((N, M), 5 * max_dist, dtype=float) - cost[coo.row, coo.col] = coo.data # fill only existing edges - - # 2. optimal one-to-one assignment (Hungarian) -------------------------------- - row_ind, col_ind = linear_sum_assignment(cost) - - # Filter assignments that were padded with +∞ cost for non-existent edges - # (linear_sum_assignment automatically does that padding internally). - valid_mask = cost[row_ind, col_ind] <= max_dist - tp_det_ids = row_ind[valid_mask] - tp_ann_ids = col_ind[valid_mask] - assert len(tp_det_ids) == len(tp_ann_ids) - - # 3. derive FP / FN ----------------------------------------------------------- - fp_det_ids = np.setdiff1d(np.arange(N), tp_det_ids, assume_unique=True) - fn_ann_ids = np.setdiff1d(np.arange(M), tp_ann_ids, assume_unique=True) - - return tp_det_ids, tp_ann_ids, fp_det_ids, fn_ann_ids + +from flamingo_tools.validation import match_detections def evaluate_synapse_detections(pred, gt): @@ -107,7 +50,7 @@ def visualize_synapse_detections(pred, gt, heatmap_path=None, ctbp2_path=None): pred = pd.read_csv(pred, sep="\t")[["z", "y", "x"]].values gt = pd.read_csv(gt, sep="\t")[["z", "y", "x"]].values - tps_pred, tps_gt, fps, fns = match_detections(pred, gt, max_dist=5) + tps_pred, tps_gt, fps, fns = match_detections(pred, gt, max_dist=4) tps = pred[tps_pred] fps = pred[fps] @@ -150,15 +93,20 @@ def main(): parser.add_argument("--visualize", action="store_true") args = parser.parse_args() - pred_files = [ - "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/SynapseValidation/m226l_midp330_vglut3-ctbp2/filtered_synapse_detection.tsv", # noqa - ] - gt_files = [ - "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3/labels/m226l_midp330_vglut3-ctbp2_filtered.tsv", # noqa - ] - ctbp2_files = [ - "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3/images/m226l_midp330_vglut3-ctbp2.zarr", # noqa - ] + pred_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/AnnotatedImageCrops/SynapseValidation" + gt_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3/labels" + ctbp2_root = "/mnt/vast-nhr/projects/nim00007/data/moser/cochlea-lightsheet/training_data/synapses/test_data/v3/images" # noqa + + ctbp2_files = sorted(glob(os.path.join(ctbp2_root, "*.zarr"))) + gt_files = sorted(glob(os.path.join(gt_root, "*_filtered.tsv"))) + assert len(ctbp2_files) == len(gt_files) + + pred_files = [] + for ff in ctbp2_files: + fname = Path(ff).stem + pred_file = os.path.join(pred_root, fname, "filtered_synapse_detection.tsv") + assert os.path.exists(pred_file), pred_file + pred_files.append(pred_file) if args.visualize: visualize_evaluation(pred_files, gt_files, ctbp2_files)