Equidistant centers of path

schilling40 · schilling40 · commit 7942d56f92ce · 2025-07-16T18:04:32.000+02:00
diff --git a/flamingo_tools/segmentation/cochlea_mapping.py b/flamingo_tools/segmentation/cochlea_mapping.py
@@ -1,26 +1,25 @@
 import math
-import warnings
 from typing import List, Tuple
 
 import networkx as nx
 import numpy as np
 import pandas as pd
 from networkx.algorithms.approximation import steiner_tree
 from scipy.ndimage import distance_transform_edt, binary_dilation, binary_closing
+from scipy.interpolate import interp1d
 
-import flamingo_tools.segmentation.postprocessing as postprocessing
-from flamingo_tools.segmentation.postprocessing import graph_connected_components
+from flamingo_tools.segmentation.postprocessing import downscaled_centroids
 
 
 def find_most_distant_nodes(G: nx.classes.graph.Graph, weight: str = 'weight') -> Tuple[float, float]:
     """Find the most distant nodes in a graph.
 
     Args:
-        G: Input graph
+        G: Input graph.
 
     Returns:
-        Node 1
-        Node 2
+        Node 1.
+        Node 2.
     """
     all_lengths = dict(nx.all_pairs_dijkstra_path_length(G, weight=weight))
     max_dist = 0
@@ -40,9 +39,12 @@ def central_path_edt_graph(mask: np.ndarray, start: Tuple[int], end: Tuple[int])
     """Find the central path within a binary mask between a start and an end coordinate.
 
     Args:
-        mask: Binary mask of volume
-        start: Starting coordinate
-        end: End coordinate
+        mask: Binary mask of volume.
+        start: Starting coordinate.
+        end: End coordinate.
+
+    Returns:
+        Coordinates of central path.
     """
     dt = distance_transform_edt(mask)
     G = nx.Graph()
@@ -74,11 +76,11 @@ def moving_average_3d(path: np.ndarray, window: int = 5) -> np.ndarray:
     """Smooth a 3D path with a simple moving average filter.
 
     Args:
-        path: ndarray of shape (N, 3)
-        window: half-window size; actual window = 2*window + 1
+        path: ndarray of shape (N, 3).
+        window: half-window size; actual window = 2*window + 1.
 
     Returns:
-        smoothed path: ndarray of same shape
+        smoothed path: ndarray of same shape.
     """
     kernel_size = 2 * window + 1
     kernel = np.ones(kernel_size) / kernel_size
@@ -102,11 +104,15 @@ def measure_run_length_sgns(centroids: np.ndarray, scale_factor=10):
     6) The points of the path are fed into a dictionary along with the fractional length.
 
     Args:
-        centroids: Centroids of the SGN segmentation, ndarray of shape (N, 3)
+        centroids: Centroids of the SGN segmentation, ndarray of shape (N, 3).
         scale_factor: Downscaling factor for finding the central path.
 
+    Returns:
+        Total distance of the path.
+        Path as an nd.array of positions.
+        A dictionary containing the position and the length fraction of each point in the path.
     """
-    mask = postprocessing.downscaled_centroids(centroids, scale_factor=scale_factor, downsample_mode="capped")
+    mask = downscaled_centroids(centroids, scale_factor=scale_factor, downsample_mode="capped")
     mask = binary_dilation(mask, np.ones((3, 3, 3)), iterations=1)
     mask = binary_closing(mask, np.ones((3, 3, 3)), iterations=1)
     pts = np.argwhere(mask == 1)
@@ -137,23 +143,31 @@ def measure_run_length_sgns(centroids: np.ndarray, scale_factor=10):
         path_dict[num + 1] = {"pos": p, "length_fraction": rel_dist}
     path_dict[len(path)] = {"pos": path[-1], "length_fraction": 1}
 
-    return total_distance, path_dict
+    return total_distance, path, path_dict
 
 
-def measure_run_length_ihcs(graph, weight="weight"):
+def measure_run_length_ihcs(centroids):
     """Measure the run lengths of the IHC segmentation
     by finding the shortest path between the most distant nodes in a Steiner Tree.
 
     Args:
-        graph: Input graph.
+        centroids: Centroids of SGN segmentation.
+
+    Returns:
+        Total distance of the path.
+        Path as an nd.array of positions.
+        A dictionary containing the position and the length fraction of each point in the path.
     """
-    u, v = find_most_distant_nodes(graph)
+    graph = nx.Graph()
+    for num, pos in enumerate(centroids):
+        graph.add_node(num, pos=pos)
     # approximate Steiner tree and find shortest path between the two most distant nodes
     terminals = set(graph.nodes())  # All nodes are required
     # Approximate Steiner Tree over all nodes
-    T = steiner_tree(graph, terminals, weight=weight)
-    path = nx.shortest_path(T, source=u, target=v, weight=weight)
-    total_distance = nx.path_weight(T, path, weight=weight)
+    T = steiner_tree(graph, terminals)
+    u, v = find_most_distant_nodes(T)
+    path = nx.shortest_path(T, source=u, target=v)
+    total_distance = nx.path_weight(T, path, weight="weight")
 
     # assign relative distance to points on path
     path_dict = {}
@@ -166,7 +180,7 @@ def measure_run_length_ihcs(graph, weight="weight"):
         path_dict[num + 1] = {"pos": graph.nodes[p]["pos"], "length_fraction": rel_dist}
     path_dict[len(path)] = {"pos": graph.nodes[path[-1]]["pos"], "length_fraction": 1}
 
-    return total_distance, path_dict
+    return total_distance, path, path_dict
 
 
 def map_frequency(table: pd.DataFrame):
@@ -176,7 +190,10 @@ def map_frequency(table: pd.DataFrame):
     For mice: fit values between minimal (1kHz) and maximal (80kHz) values
 
     Args:
-        table:
+        table: Dataframe containing the segmentation.
+
+    Returns:
+        Dataframe containing frequency in an additional column 'frequency[kHz]'.
     """
     var_k = 0.88
     fmin = 1
@@ -189,11 +206,51 @@ def map_frequency(table: pd.DataFrame):
     return table
 
 
+def equidistant_centers(
+    table: pd.DataFrame,
+    component_label: List[int] = [1],
+    cell_type: str = "sgn",
+    n_blocks: int = 10,
+    offset_blocks: bool = True,
+) -> np.ndarray:
+    """Find equidistant centers within the central path of the Rosenthal's canal.
+
+    Args:
+        table: Dataframe containing centroids of SGN segmentation.
+        component_label: List of components for centroid subset.
+        cell_type: Cell type of the segmentation.
+        n_blocks: Number of equidistant centers for block creation.
+        offset_block: Centers are shifted by half a length if True. Avoid centers at the start/end of the path.
+
+    Returns:
+        Equidistant centers as float values
+    """
+    # subset of centroids for given component label(s)
+    new_subset = table[table["component_labels"].isin(component_label)]
+    centroids = list(zip(new_subset["anchor_x"], new_subset["anchor_y"], new_subset["anchor_z"]))
+
+    if cell_type == "ihc":
+        total_distance, path, _ = measure_run_length_ihcs(centroids)
+
+    else:
+        total_distance, path, _ = measure_run_length_sgns(centroids)
+
+    diffs = np.diff(path, axis=0)
+    seg_lens = np.linalg.norm(diffs, axis=1)
+    cum_len = np.insert(np.cumsum(seg_lens), 0, 0)
+    if offset_blocks:
+        target_s = np.linspace(0, total_distance, n_blocks * 2 + 1)
+        target_s = [s for num, s in enumerate(target_s) if num % 2 == 1]
+    else:
+        target_s = np.linspace(0, total_distance, n_blocks)
+    f = interp1d(cum_len, path, axis=0)
+    centers = f(target_s)
+    return centers
+
+
 def tonotopic_mapping(
     table: pd.DataFrame,
     component_label: List[int] = [1],
-    max_edge_distance: float = 30,
-    min_component_length: int = 50,
     cell_type: str = "ihc"
 ) -> pd.DataFrame:
     """Tonotopic mapping of IHCs by supplying a table with component labels.
@@ -202,10 +259,7 @@ def tonotopic_mapping(
     Args:
         table: Dataframe of segmentation table.
         component_label: List of component labels to evaluate.
-        max_edge_distance: Maximal edge distance to connect nodes.
-        min_component_length: Minimal number of nodes in component.
         cell_type: Cell type of segmentation.
-        Filter factor: Fraction of nodes to remove before mapping.
 
     Returns:
         Table with tonotopic label for cells.
@@ -215,33 +269,20 @@ def tonotopic_mapping(
     centroids = list(zip(new_subset["anchor_x"], new_subset["anchor_y"], new_subset["anchor_z"]))
     label_ids = [int(i) for i in list(new_subset["label_id"])]
 
-    # create graph with connected components
-    coords = {}
-    for index, element in zip(label_ids, centroids):
-        coords[index] = element
-
-    components, graph = graph_connected_components(coords, max_edge_distance, min_component_length)
-    if len(components) > 1:
-        warnings.warn(f"There are {len(components)} connected components, expected 1. "
-                      "Check parameters for post-processing (max_edge_distance, min_component_length).")
-
-    unfiltered_graph = graph.copy()
-
     if cell_type == "ihc":
-        total_distance, path_dict = measure_run_length_ihcs(graph)
+        total_distance, _, path_dict = measure_run_length_ihcs(centroids)
 
     else:
-        total_distance, path_dict = measure_run_length_sgns(centroids)
+        total_distance, _, path_dict = measure_run_length_sgns(centroids)
 
     # add missing nodes from component and compute distance to path
-    pos = nx.get_node_attributes(unfiltered_graph, 'pos')
     node_dict = {}
-    for c in label_ids:
+    for num, c in enumerate(label_ids):
         min_dist = float('inf')
         nearest_node = None
 
         for key in path_dict.keys():
-            dist = math.dist(pos[c], path_dict[key]["pos"])
+            dist = math.dist(centroids[num], path_dict[key]["pos"])
             if dist < min_dist:
                 min_dist = dist
                 nearest_node = key
diff --git a/reproducibility/block_extraction/repro_equidistant_centers.py b/reproducibility/block_extraction/repro_equidistant_centers.py
@@ -0,0 +1,90 @@
+import argparse
+import json
+import os
+from typing import Optional
+
+import pandas as pd
+from flamingo_tools.s3_utils import get_s3_path
+from flamingo_tools.segmentation.cochlea_mapping import equidistant_centers
+
+
+def repro_equidistant_centers(
+    ddict: dict,
+    output_path: str,
+    s3_credentials: Optional[str] = None,
+    s3_bucket_name: Optional[str] = None,
+    s3_service_endpoint: Optional[str] = None,
+    force_overwrite: Optional[bool] = None,
+):
+    default_cell_type = "ihc"
+    default_component_list = [1]
+    default_halo_size = [256, 256, 100]
+    default_n_blocks = 6
+
+    with open(ddict, 'r') as myfile:
+        data = myfile.read()
+    param_dicts = json.loads(data)
+
+    out_dict = []
+
+    if os.path.isfile(output_path) and not force_overwrite:
+        print(f"Skipping {output_path}. File already exists.")
+
+    for dic in param_dicts:
+        cochlea = dic["cochlea"]
+        img_channel = dic["image_channel"]
+        seg_channel = dic["segmentation_channel"]
+
+        s3_path = os.path.join(f"{cochlea}", "tables", f"{seg_channel}", "default.tsv")
+        print(f"Finding equidistant centers for {cochlea}.")
+
+        tsv_path, fs = get_s3_path(s3_path, bucket_name=s3_bucket_name,
+                                   service_endpoint=s3_service_endpoint, credential_file=s3_credentials)
+        with fs.open(tsv_path, 'r') as f:
+            table = pd.read_csv(f, sep="\t")
+
+        cell_type = dic["type"] if "type" in dic else default_cell_type
+        component_list = dic["component_list"] if "component_list" in dic else default_component_list
+        halo_size = dic["halo_size"] if "halo_size" in dic else default_halo_size
+        n_blocks = dic["n_blocks"] if "n_blocks" in dic else default_n_blocks
+
+        centers = equidistant_centers(table, component_label=component_list, cell_type=cell_type, n_blocks=n_blocks)
+        centers = [[int(c) for c in center] for center in centers]
+        ddict = {"cochlea": cochlea}
+        ddict["image_channel"] = img_channel
+        ddict["crop_centers"] = centers
+        ddict["halo_size"] = halo_size
+        out_dict.append(ddict)
+
+    with open(output_path, "w") as f:
+        json.dump(out_dict, f, indent='\t', separators=(',', ': '))
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Script to extract region of interest (ROI) block around center coordinate.")
+
+    parser.add_argument('-i', '--input', type=str, required=True, help="Input JSON dictionary.")
+    parser.add_argument('-o', "--output", type=str, required=True, help="Output JSON dictionary.")
+
+    parser.add_argument("--force", action="store_true", help="Forcefully overwrite output.")
+    parser.add_argument("--s3_credentials", type=str, default=None,
+                        help="Input file containing S3 credentials. "
+                        "Optional if AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY were exported.")
+    parser.add_argument("--s3_bucket_name", type=str, default=None,
+                        help="S3 bucket name. Optional if BUCKET_NAME was exported.")
+    parser.add_argument("--s3_service_endpoint", type=str, default=None,
+                        help="S3 service endpoint. Optional if SERVICE_ENDPOINT was exported.")
+
+    args = parser.parse_args()
+
+    repro_equidistant_centers(
+        args.input, args.output,
+        args.s3_credentials, args.s3_bucket_name, args.s3_service_endpoint,
+        args.force,
+    )
+
+
+if __name__ == "__main__":
+
+    main()
diff --git a/reproducibility/tonotopic_mapping/repro_tonotopic_mapping.py b/reproducibility/tonotopic_mapping/repro_tonotopic_mapping.py
@@ -17,8 +17,6 @@ def repro_tonotopic_mapping(
     force_overwrite: Optional[bool] = None,
 ):
     default_cell_type = "ihc"
-    default_max_edge_distance = 30
-    default_min_length = 50
     default_component_list = [1]
 
     remove_columns = ["tonotopic_label",
@@ -49,17 +47,14 @@ def repro_tonotopic_mapping(
             table = pd.read_csv(f, sep="\t")
 
         cell_type = dic["type"] if "type" in dic else default_cell_type
-        max_edge_distance = dic["max_edge_distance"] if "max_edge_distance" in dic else default_max_edge_distance
-        min_component_length = dic["min_component_length"] if "min_component_length" in dic else default_min_length
         component_list = dic["component_list"] if "component_list" in dic else default_component_list
 
         for column in remove_columns:
             if column in list(table.columns):
                 table = table.drop(column, axis=1)
 
         if not os.path.isfile(output_table_path) or force_overwrite:
-            table = tonotopic_mapping(table, component_label=component_list, max_edge_distance=max_edge_distance,
-                                      min_component_length=min_component_length, cell_type=cell_type)
+            table = tonotopic_mapping(table, component_label=component_list, cell_type=cell_type)
 
             table.to_csv(output_table_path, sep="\t", index=False)
 
diff --git a/scripts/prediction/tonotopic_mapping.py b/scripts/prediction/tonotopic_mapping.py
@@ -13,11 +13,9 @@ def main():
         "Either locally or on an S3 bucket.")
 
     parser.add_argument("-i", "--input", required=True, help="Input table with IHC segmentation.")
-    parser.add_argument("-o", "--output", required=True, help="Output path for post-processed table.")
+    parser.add_argument("-o", "--output", required=True, help="Output path for json file with cropping parameters.")
 
-    parser.add_argument("-t", "--type", type=str, default="ihc", help="Cell type of segmentation.")
-    parser.add_argument("--edge_distance", type=float, default=30, help="Maximal edge distance between nodes.")
-    parser.add_argument("--component_length", type=int, default=50, help="Minimal number of nodes in component.")
+    parser.add_argument("-t", "--type", type=str, default="sgn", help="Cell type of segmentation.")
 
     parser.add_argument("--s3", action="store_true", help="Flag for using S3 bucket.")
     parser.add_argument("--s3_credentials", type=str, default=None,
@@ -41,8 +39,7 @@ def main():
             tsv_table = pd.read_csv(f, sep="\t")
 
     table = tonotopic_mapping(
-        tsv_table, max_edge_distance=args.edge_distance, min_component_length=args.component_length,
-        cell_type=args.type,
+        tsv_table, cell_type=args.type,
     )
 
     table.to_csv(args.output, sep="\t", index=False)