11import os
22import re
3+ from collections import defaultdict
34from typing import Dict , List , Optional , Tuple
45
56import imageio .v3 as imageio
910
1011from scipy .ndimage import distance_transform_edt
1112from scipy .optimize import linear_sum_assignment
13+ from scipy .spatial import cKDTree
1214from skimage .measure import regionprops_table
1315from skimage .segmentation import relabel_sequential
1416from tqdm import tqdm
@@ -27,7 +29,7 @@ def _normalize_cochlea_name(name):
2729 return f"{ prefix } _{ number :06d} _{ postfix } "
2830
2931
30- def parse_annotation_path (annotation_path ):
32+ def _parse_annotation_path (annotation_path ):
3133 fname = os .path .basename (annotation_path )
3234 name_parts = fname .split ("_" )
3335 cochlea = _normalize_cochlea_name (name_parts [0 ])
@@ -42,7 +44,19 @@ def fetch_data_for_evaluation(
4244 z_extent : int = 0 ,
4345 components_for_postprocessing : Optional [List [int ]] = None ,
4446) -> Tuple [np .ndarray , pd .DataFrame ]:
45- """
47+ """Fetch segmentation from S3 matching the annotation path for evaluation.
48+
49+ Args:
50+ annotation_path: The path to the manual annotations.
51+ cache_path: An optional path for caching the downloaded segmentation.
52+ seg_name: The name of the segmentation in the bucket.
53+ z_extent: Additional z-slices to load from the segmentation.
54+ components_for_postprocessing: The component ids for restricting the segmentation to.
55+ Choose [1] for the default componentn containing the helix.
56+
57+ Returns:
58+ The segmentation downloaded from the S3 bucket.
59+ The annotations loaded from pandas and matching the segmentation.
4660 """
4761 # Load the annotations and normalize them for the given z-extent.
4862 annotations = pd .read_csv (annotation_path )
@@ -60,7 +74,7 @@ def fetch_data_for_evaluation(
6074 return segmentation , annotations
6175
6276 # Parse which ID and which cochlea from the name.
63- cochlea , slice_id = parse_annotation_path (annotation_path )
77+ cochlea , slice_id = _parse_annotation_path (annotation_path )
6478
6579 # Open the S3 connection, get the path to the SGN segmentation in S3.
6680 internal_path = os .path .join (cochlea , "images" , "ome-zarr" , f"{ seg_name } .ome.zarr" )
@@ -239,9 +253,98 @@ def compute_scores_for_annotated_slice(
239253 return {"tp" : tp , "fp" : fp , "fn" : fn }
240254
241255
242- # TODO
243- def create_consensus_annotations ():
244- pass
256+ def create_consensus_annotations (
257+ annotation_paths : Dict [str , str ],
258+ matching_distance : float = 5.0 ,
259+ min_matches_for_consensus : int = 2 ,
260+ ) -> Tuple [pd .DataFrame , pd .DataFrame ]:
261+ """Create a consensus annotation from multiple manual annotations.
262+
263+ Args:
264+ annotation_paths: A dictionary that maps annotator names to the path to the manual annotations.
265+ matching_distance: The maximum distance for matching annotations to a consensus annotation.
266+ min_matches_for_consensus: The minimum number of matching annotations to consider an annotation as consensus.
267+
268+ Returns:
269+ A dataframe with the consensus annotations.
270+ A dataframe with the unmatched annotations.
271+ """
272+ dfs , coords , ann_id = [], [], []
273+ for name , path in annotation_paths .items ():
274+ df = pd .read_csv (path , usecols = ["axis-0" , "axis-1" , "axis-2" ])
275+ df ["annotator" ] = name
276+ dfs .append (df )
277+ big = pd .concat (dfs , ignore_index = True )
278+ coords = big [["axis-0" , "axis-1" , "axis-2" ]].values
279+ ann_id = big ["annotator" ].values
280+
281+ trees , idx_by_ann = {}, {}
282+ for ann in np .unique (ann_id ):
283+ idx = np .where (ann_id == ann )[0 ]
284+ idx_by_ann [ann ] = idx
285+ trees [ann ] = cKDTree (coords [idx ])
286+
287+ edges = []
288+ for i , annA in enumerate (trees ):
289+ idxA , treeA = idx_by_ann [annA ], trees [annA ]
290+ for annB in list (trees )[i + 1 :]:
291+ idxB , treeB = idx_by_ann [annB ], trees [annB ]
292+
293+ # A -> B
294+ dAB , jB = treeB .query (coords [idxA ], distance_upper_bound = matching_distance )
295+ # B -> A
296+ dBA , jA = treeA .query (coords [idxB ], distance_upper_bound = matching_distance )
297+
298+ for k , (d , j ) in enumerate (zip (dAB , jB )):
299+ if np .isfinite (d ):
300+ a_idx = idxA [k ]
301+ b_idx = idxB [j ]
302+ # check reciprocity
303+ if jA [j ] == k and np .isfinite (dBA [j ]):
304+ edges .append ((a_idx , b_idx ))
305+
306+ # --- union–find to group ---------------------------------
307+ parent = np .arange (len (coords ))
308+
309+ def find (x ):
310+ while parent [x ] != x :
311+ parent [x ] = parent [parent [x ]]
312+ x = parent [x ]
313+ return x
314+
315+ def union (a , b ):
316+ ra , rb = find (a ), find (b )
317+ if ra != rb :
318+ parent [rb ] = ra
319+
320+ for a , b in edges :
321+ union (a , b )
322+
323+ # --- collect results -------------------------------------
324+ cluster = defaultdict (list )
325+ for i in range (len (coords )):
326+ cluster [find (i )].append (i )
327+
328+ consensus_rows , unmatched = [], []
329+ for members in cluster .values ():
330+ if len (members ) >= min_matches_for_consensus :
331+ anns = {ann_id [m ] for m in members }
332+ # by construction anns are unique
333+ subset = coords [members ]
334+ rep_pt = subset .mean (0 )
335+ consensus_rows .append ({
336+ "axis-0" : rep_pt [0 ],
337+ "axis-1" : rep_pt [1 ],
338+ "axis-2" : rep_pt [2 ],
339+ "annotators" : anns ,
340+ "member_indices" : members
341+ })
342+ else :
343+ unmatched .extend (members )
344+
345+ consensus_df = pd .DataFrame (consensus_rows )
346+ unmatched_df = big .iloc [unmatched ].reset_index (drop = True )
347+ return consensus_df , unmatched_df
245348
246349
247350def for_visualization (segmentation , annotations , matches ):
0 commit comments