computational-cell-analytics
diff --git a/‎scripts/cooper/revision/updated_data_analysis/analysis_segmentations.py‎
Lines changed: 259 additions & 0 deletions b/‎scripts/cooper/revision/updated_data_analysis/analysis_segmentations.py‎
Lines changed: 259 additions & 0 deletions
diff --git a/‎scripts/cooper/revision/updated_data_analysis/data_analysis.py‎
Lines changed: 92 additions & 0 deletions b/‎scripts/cooper/revision/updated_data_analysis/data_analysis.py‎
Lines changed: 92 additions & 0 deletions
@@ -0,0 +1,259 @@
+import os
+import numpy as np
+import h5py
+
+from skimage.measure import regionprops
+from skimage.morphology import remove_small_holes
+from skimage.segmentation import relabel_sequential
+
+from synapse_net.inference.vesicles import segment_vesicles
+from synapse_net.inference.compartments import segment_compartments
+from synapse_net.inference.active_zone import segment_active_zone
+from synapse_net.inference.inference import get_model_path
+
+
+def fill_and_filter_vesicles(vesicles: np.ndarray) -> np.ndarray:
+    """
+    Apply a size filter and fill small holes in vesicle segments.
+
+    Args:
+        vesicles (np.ndarray): 3D volume with vesicle segment labels.
+
+    Returns:
+        np.ndarray: Processed vesicle segmentation volume.
+    """
+    ids, sizes = np.unique(vesicles, return_counts=True)
+    ids, sizes = ids[1:], sizes[1:]  # remove background
+
+    min_size = 2500
+    vesicles_pp = vesicles.copy()
+    filter_ids = ids[sizes < min_size]
+    vesicles_pp[np.isin(vesicles, filter_ids)] = 0
+
+    props = regionprops(vesicles_pp)
+    for prop in props:
+        bb = prop.bbox
+        bb = np.s_[
+            bb[0]:bb[3], bb[1]:bb[4], bb[2]:bb[5]
+        ]
+        mask = vesicles_pp[bb] == prop.label
+        mask = remove_small_holes(mask, area_threshold=1000)
+        vesicles_pp[bb][mask] = prop.label
+
+    return vesicles_pp
+
+
+def SV_pred(raw: np.ndarray, SV_model: str, output_path: str = None, store: bool = False) -> np.ndarray:
+    """
+    Run synaptic vesicle segmentation and optionally store the output.
+
+    Args:
+        raw (np.ndarray): Raw EM image volume.
+        SV_model (str): Path to vesicle model.
+        output_path (str): HDF5 file to store predictions.
+        store (bool): Whether to store predictions.
+
+    Returns:
+        np.ndarray: Segmentation result.
+    """
+    seg, pred = segment_vesicles(input_volume=raw, model_path=SV_model, verbose=False, return_predictions=True)
+
+    if store and output_path:
+        pred_key = f"predictions/SV/pred"
+        seg_key = f"predictions/SV/seg"
+
+        with h5py.File(output_path, "a") as f:
+            if pred_key in f:
+                print(f"{pred_key} already saved")
+            else:
+                f.create_dataset(pred_key, data=pred, compression="lzf")
+            if seg_key in f:
+                print(f"{seg_key} already saved")
+            else:
+                f.create_dataset(seg_key, data=seg, compression="lzf")
+    elif store and not output_path:
+        print("Output path is missing, not storing SV predictions")
+    else:
+        print("Not storing SV predictions")
+    
+    return seg
+
+
+def compartment_pred(raw: np.ndarray, compartment_model: str, output_path: str = None, store: bool = False) -> np.ndarray:
+    """
+    Run compartment segmentation and optionally store the output.
+
+    Args:
+        raw (np.ndarray): Raw EM image volume.
+        compartment_model (str): Path to compartment model.
+        output_path (str): HDF5 file to store predictions.
+        store (bool): Whether to store predictions.
+
+    Returns:
+        np.ndarray: Segmentation result.
+    """
+    seg, pred = segment_compartments(input_volume=raw, model_path=compartment_model, verbose=False, return_predictions=True)
+
+    if store and output_path:
+        pred_key = f"predictions/compartment/pred"
+        seg_key = f"predictions/compartment/seg"
+
+        with h5py.File(output_path, "a") as f:
+            if pred_key in f:
+                print(f"{pred_key} already saved")
+            else:
+                f.create_dataset(pred_key, data=pred, compression="lzf")
+            if seg_key in f:
+                print(f"{seg_key} already saved")
+            else:
+                f.create_dataset(seg_key, data=seg, compression="lzf")
+    elif store and not output_path:
+        print("Output path is missing, not storing compartment predictions")
+    else:
+        print("Not storing compartment predictions")
+    
+    return seg
+
+
+def AZ_pred(raw: np.ndarray, AZ_model: str, output_path: str = None, store: bool = False) -> np.ndarray:
+    """
+    Run active zone segmentation and optionally store the output.
+
+    Args:
+        raw (np.ndarray): Raw EM image volume.
+        AZ_model (str): Path to AZ model.
+        output_path (str): HDF5 file to store predictions.
+        store (bool): Whether to store predictions.
+
+    Returns:
+        np.ndarray: Segmentation result.
+    """
+    seg, pred = segment_active_zone(raw, model_path=AZ_model, verbose=False, return_predictions=True)
+
+    if store and output_path:
+        pred_key = f"predictions/az/pred"
+        seg_key = f"predictions/az/seg"
+
+        with h5py.File(output_path, "a") as f:
+            if pred_key in f:
+                print(f"{pred_key} already saved")
+            else:
+                f.create_dataset(pred_key, data=pred, compression="lzf")
+            if seg_key in f:
+                print(f"{seg_key} already saved")
+            else:
+                f.create_dataset(seg_key, data=seg, compression="lzf")
+    elif store and not output_path:
+        print("Output path is missing, not storing AZ predictions")
+    else:
+        print("Not storing AZ predictions")
+    
+    return seg
+
+
+def filter_presynaptic_SV(sv_seg: np.ndarray, compartment_seg: np.ndarray, output_path: str = None,
+                          store: bool = False, input_path: str = None) -> np.ndarray:
+    """
+    Filters synaptic vesicle segmentation to retain only vesicles in the presynaptic region.
+
+    Args:
+        sv_seg (np.ndarray): Vesicle segmentation.
+        compartment_seg (np.ndarray): Compartment segmentation.
+        output_path (str): Optional HDF5 file to store outputs.
+        store (bool): Whether to store outputs.
+        input_path (str): Path to input file (for filename-based filtering).
+
+    Returns:
+        np.ndarray: Filtered presynaptic vesicle segmentation.
+    """
+    # Fill out small holes in vesicles and then apply a size filter.
+    vesicles_pp = fill_and_filter_vesicles(sv_seg)
+
+    def n_vesicles(mask, ves):
+        return len(np.unique(ves[mask])) - 1
+
+    # Find the segment with most vesicles.
+    props = regionprops(compartment_seg, intensity_image=vesicles_pp, extra_properties=[n_vesicles])
+    compartment_ids = [prop.label for prop in props]
+    vesicle_counts = [prop.n_vesicles for prop in props]
+    if len(compartment_ids) == 0:
+        mask = np.ones(compartment_seg.shape, dtype="bool")
+    else:
+        mask = (compartment_seg == compartment_ids[np.argmax(vesicle_counts)]).astype("uint8")
+
+    # Filter all vesicles that are not in the mask.
+    props = regionprops(vesicles_pp, mask)
+    filter_ids = [prop.label for prop in props if prop.max_intensity == 0]
+
+    name = os.path.basename(input_path) if input_path else "unknown"
+    print(name)
+
+    no_filter = ["C_M13DKO_080212_CTRL6.7B_crop.h5", "E_M13DKO_080212_DKO1.2_crop.h5",
+                 "G_M13DKO_080212_CTRL6.7B_crop.h5", "A_SNAP25_120812_CTRL2.3_14_crop.h5",
+                 "A_SNAP25_12082_KO2.1_6_crop.h5", "B_SNAP25_120812_CTRL2.3_14_crop.h5",
+                 "B_SNAP25_12082_CTRL2.3_5_crop.h5", "D_SNAP25_120812_CTRL2.3_14_crop.h5",
+                 "G_SNAP25_12.08.12_KO1.1_3_crop.h5"]
+    # Don't filter for wrong masks (visual inspection)
+    if name not in no_filter:
+        vesicles_pp[np.isin(vesicles_pp, filter_ids)] = 0
+
+    if store and output_path:
+        seg_presynapse = f"predictions/compartment/presynapse"
+        seg_presynaptic_SV = f"predictions/SV/presynaptic"
+
+        with h5py.File(output_path, "a") as f:
+            if seg_presynapse in f:
+                print(f"{seg_presynapse} already saved")
+            else:
+                f.create_dataset(seg_presynapse, data=mask, compression="lzf")
+            if seg_presynaptic_SV in f:
+                print(f"{seg_presynaptic_SV} already saved")
+            else:
+                f.create_dataset(seg_presynaptic_SV, data=vesicles_pp, compression="lzf")
+    elif store and not output_path:
+        print("Output path is missing, not storing presynapse seg and presynaptic SV seg")
+    else:
+        print("Not storing presynapse seg and presynaptic SV seg")
+
+    #All non-zero labels are relabeled starting from 1.Labels are sequential (1, 2, 3, ..., n).
+    #We do this to make the analysis part easier -> can match distances and diameters better
+    vesicles_pp, _, _ = relabel_sequential(vesicles_pp)
+
+    return vesicles_pp
+
+
+def run_predictions(input_path: str, output_path: str = None, store: bool = False):
+    """
+    Run full inference pipeline: vesicles, compartments, active zone, and presynaptic SV filtering.
+
+    Args:
+        input_path (str): Path to input HDF5 file with 'raw' dataset.
+        output_path (str): Path to output HDF5 file to store predictions.
+        store (bool): Whether to store intermediate and final results.
+
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: (Filtered vesicle segmentation, AZ segmentation)
+    """
+    with h5py.File(input_path, "r") as f:
+        raw = f["raw"][:]
+    
+    SV_model = get_model_path("vesicles_3d")
+    compartment_model = get_model_path("compartments")
+    # TODO upload better AZ model
+    AZ_model = "/mnt/lustre-emmy-hdd/usr/u12095/synapse_net/models/ConstantinAZ/checkpoints/v7/"
+
+    print("Running SV prediction")
+    sv_seg = SV_pred(raw, SV_model, output_path, store)
+
+    print("Running compartment prediction")
+    comp_seg = compartment_pred(raw, compartment_model, output_path, store)
+
+    print("Running AZ prediction")
+    az_seg = AZ_pred(raw, AZ_model, output_path, store)
+
+    print("Filtering the presynaptic SV")
+    presyn_SV_seg = filter_presynaptic_SV(sv_seg, comp_seg, output_path, store, input_path)
+
+    print("Done with predictions")
+
+    return presyn_SV_seg, az_seg
@@ -0,0 +1,92 @@
+from synapse_net.distance_measurements import measure_segmentation_to_object_distances
+from synapse_net.imod.to_imod import convert_segmentation_to_spheres
+
+
+def calc_AZ_SV_distance(vesicles, az, resolution):
+    """
+    Calculate the distance between synaptic vesicles (SVs) and the active zone (AZ).
+
+    Args:
+        vesicles (np.ndarray): Segmentation of synaptic vesicles.
+        az (np.ndarray): Segmentation of the active zone.
+        resolution (tuple): Voxel resolution in nanometers (z, y, x).
+
+    Returns:
+        list of dict: Each dict contains 'seg_id' and 'distance', sorted by seg_id.
+    """
+    distances, _, _, seg_ids = measure_segmentation_to_object_distances(vesicles, az, resolution=resolution)
+
+    dist_list = [{"seg_id": sid, "distance": dist} for sid, dist in zip(seg_ids, distances)]
+    dist_list.sort(key=lambda x: x["seg_id"])
+
+    return dist_list
+
+
+def sort_by_distances(input_list):
+    """
+    Sort a list of dictionaries by the 'distance' key from smallest to largest.
+
+    Args:
+        input_list (list of dict): List containing 'distance' as a key in each dictionary.
+
+    Returns:
+        list of dict: Sorted list by ascending distance.
+    """
+    sorted_list = sorted(input_list, key=lambda x: x["distance"])
+    return sorted_list
+
+
+def combine_lists(list1, list2):
+    """
+    Combine two lists of dictionaries based on the shared 'seg_id' key.
+
+    Args:
+        list1 (list of dict): First list with 'seg_id' key.
+        list2 (list of dict): Second list with 'seg_id' key.
+
+    Returns:
+        list of dict: Combined dictionaries matching by 'seg_id'. Overlapping keys are merged.
+    """
+    combined_dict = {}
+
+    for item in list1:
+        seg_id = item["seg_id"]
+        combined_dict[seg_id] = item.copy()
+
+    for item in list2:
+        seg_id = item["seg_id"]
+        if seg_id in combined_dict:
+            for key, value in item.items():
+                if key != "seg_id":
+                    combined_dict[seg_id][key] = value
+        else:
+            combined_dict[seg_id] = item.copy()
+
+    combined_list = list(combined_dict.values())
+    return combined_list
+
+
+def calc_SV_diameters(vesicles, resolution):
+    """
+    Calculate diameters of synaptic vesicles from segmentation data.
+
+    Args:
+        vesicles (np.ndarray): Segmentation of synaptic vesicles.
+        resolution (tuple): Voxel resolution in nanometers (z, y, x).
+
+    Returns:
+        list of dict: Each dict contains 'seg_id' and 'diameter', sorted by seg_id.
+    """
+    coordinates, radii = convert_segmentation_to_spheres(
+        vesicles, resolution=resolution, radius_factor=0.7, estimate_radius_2d=True
+    )
+
+    # Assuming the segment ID is the index of the vesicle (same order as radii)
+    seg_ids = list(range(len(radii)))
+    radii_nm = radii * resolution[0]
+    diameters = radii_nm * 2
+
+    diam_list = [{"seg_id": sid, "diameter": diam} for sid, diam in zip(seg_ids, diameters)]
+    diam_list.sort(key=lambda x: x["seg_id"])
+
+    return diam_list