@@ -77,6 +77,7 @@ def _load_json_as_list(ddict_path: str) -> List[dict]:
7777def label_components_single (
7878 table_path : str ,
7979 out_path : str ,
80+ force_overwrite : bool = False ,
8081 cell_type : str = "sgn" ,
8182 component_list : List [int ] = [1 ],
8283 max_edge_distance : float = 30 ,
@@ -89,53 +90,89 @@ def label_components_single(
8990 custom_dic : Optional [dict ] = None ,
9091 ** _
9192):
92- """Process a single cochlea using one set of parameters or a custom_dic.
93+ """Process a single cochlea using one set of parameters or a custom dictionary.
94+ The cochlea is analyzed using graph-connected components
95+ to label segmentation instances that are closer than a given maximal edge distance.
96+ This process acts on an input segmentation table to which a "component_labels" column is added.
97+ Each entry in this column refers to the index of a connected component.
98+ The largest connected component has an index of 1; the others follow in decreasing order.
99+
100+ Args:
101+ table_path: File path to segmentation table.
102+ out_path: Output path to segmentation table with new column "component_labels".
103+ force_overwrite: Forcefully overwrite existing output path.
104+ cell_type: Cell type of the segmentation. Currently supports "sgn" and "ihc".
105+ component_list: List of components. Can be passed to obtain the number of instances within the component list.
106+ max_edge_distance: Maximal edge distance between graph nodes to create an edge between nodes.
107+ min_component_length: Minimal length of nodes of connected component. Filtered out if lower.
108+ min_size: Minimal number of pixels for filtering small instances.
109+ s3: Use S3 bucket.
110+ s3_credentials:
111+ s3_bucket_name:
112+ s3_service_endpoint:
113+ custom_dic: Custom dictionary which allows multiple post-processing configurations and combines the
114+ results into final components.
93115 """
116+ if os .path .isdir (out_path ):
117+ raise ValueError (f"Output path { out_path } is a directory. Provide a path to a single output file." )
118+
94119 if s3 :
95120 tsv_path , fs = get_s3_path (table_path , bucket_name = s3_bucket_name ,
96121 service_endpoint = s3_service_endpoint , credential_file = s3_credentials )
97- with fs .open (tsv_path , "r" ) as f :
98- table = pd .read_csv (f , sep = "\t " )
122+ with fs .open (tsv_path , "r" ) as f :
123+ table = pd .read_csv (f , sep = "\t " )
124+ else :
125+ table = pd .read_csv (table_path , sep = "\t " )
126+
127+ # overwrite input file
128+ if os .path .realpath (out_path ) == os .path .realpath (table_path ) and not s3 :
129+ force_overwrite = True
130+
131+ if os .path .isfile (out_path ) and not force_overwrite :
132+ print (f"Skipping { out_path } . Table already exists." )
99133
100- if custom_dic is not None :
101- tsv_table = label_custom_components (table , custom_dic )
102134 else :
103- if cell_type == "sgn" :
104- tsv_table = label_components_sgn (table , min_size = min_size ,
105- min_component_length = min_component_length ,
106- max_edge_distance = max_edge_distance )
107- elif cell_type == "ihc" :
108- tsv_table = label_components_ihc (table , min_size = min_size ,
109- min_component_length = min_component_length ,
110- max_edge_distance = max_edge_distance )
135+ if custom_dic is not None :
136+ # use multiple post-processing configurations
137+ tsv_table = label_custom_components (table , custom_dic )
111138 else :
112- raise ValueError ("Choose a supported cell type. Either 'sgn' or 'ihc'." )
139+ if cell_type == "sgn" :
140+ tsv_table = label_components_sgn (table , min_size = min_size ,
141+ min_component_length = min_component_length ,
142+ max_edge_distance = max_edge_distance )
143+ elif cell_type == "ihc" :
144+ tsv_table = label_components_ihc (table , min_size = min_size ,
145+ min_component_length = min_component_length ,
146+ max_edge_distance = max_edge_distance )
147+ else :
148+ raise ValueError ("Choose a supported cell type. Either 'sgn' or 'ihc'." )
113149
114- custom_comp = len (tsv_table [tsv_table ["component_labels" ].isin (component_list )])
115- print (f"Total { cell_type .upper ()} s: { len (tsv_table )} " )
116- if component_list == [1 ]:
117- print (f"Largest component has { custom_comp } { cell_type .upper ()} s." )
118- else :
119- for comp in component_list :
120- num_instances = len (tsv_table [tsv_table ["component_labels" ] == comp ])
121- print (f"Component { comp } has { num_instances } instances." )
122- print (f"Custom component(s) have { custom_comp } { cell_type .upper ()} s." )
150+ custom_comp = len (tsv_table [tsv_table ["component_labels" ].isin (component_list )])
151+ print (f"Total { cell_type .upper ()} s: { len (tsv_table )} " )
152+ if component_list == [1 ]:
153+ print (f"Largest component has { custom_comp } { cell_type .upper ()} s." )
154+ else :
155+ for comp in component_list :
156+ num_instances = len (tsv_table [tsv_table ["component_labels" ] == comp ])
157+ print (f"Component { comp } has { num_instances } instances." )
158+ print (f"Custom component(s) have { custom_comp } { cell_type .upper ()} s." )
123159
124- tsv_table .to_csv (out_path , sep = "\t " , index = False )
160+ tsv_table .to_csv (out_path , sep = "\t " , index = False )
125161
126162
127- def repro_label_components (
163+ def wrapper_label_components (
128164 output_path : str ,
129165 table_path : Optional [str ] = None ,
130166 ddict : Optional [str ] = None ,
167+ s3 : bool = False ,
131168 ** kwargs
132169):
133170 """Wrapper function for labeling connected components using a segmentation table.
134171 The function is used to distinguish between a passed parameter dictionary in JSON format
135172 and the explicit setting of parameters.
136173 """
137174 if ddict is None :
138- label_components_single (table_path , output_path , ** kwargs )
175+ label_components_single (table_path , output_path , s3 = s3 , ** kwargs )
139176 else :
140177 param_dicts = _load_json_as_list (ddict )
141178 for params in param_dicts :
@@ -151,23 +188,23 @@ def repro_label_components(
151188 save_path = os .path .join (output_path , "_" .join ([cochlea_str , f"{ table_str } .tsv" ]))
152189 else :
153190 save_path = output_path
154- label_components_single (table_path = table_path , out_path = save_path , ** params )
191+ label_components_single (table_path = table_path , out_path = save_path , s3 = s3 ,
192+ ** params )
155193
156194
157195def main ():
158196 parser = argparse .ArgumentParser (
159197 description = "Script to label segmentation using a segmentation table and graph connected components." )
160198
161199 parser .add_argument ("-o" , "--output" , type = str , required = True ,
162- help = "Output path. Either directory or specific file." )
163-
200+ help = "Output path. Either directory (for --json) or specific file otherwise." )
164201 parser .add_argument ("-i" , "--input" , type = str , default = None , help = "Input path to segmentation table." )
165202 parser .add_argument ("-j" , "--json" , type = str , default = None , help = "Input JSON dictionary." )
203+ parser .add_argument ("--force" , action = "store_true" , help = "Forcefully overwrite output." )
166204
205+ # options for post-processing
167206 parser .add_argument ("--cell_type" , type = str , default = "sgn" ,
168207 help = "Cell type of segmentation. Either 'sgn' or 'ihc'." )
169-
170- # options for post-processing
171208 parser .add_argument ("--min_size" , type = int , default = 1000 ,
172209 help = "Minimal number of pixels for filtering small instances." )
173210 parser .add_argument ("--min_component_length" , type = int , default = 50 ,
@@ -188,7 +225,7 @@ def main():
188225
189226 args = parser .parse_args ()
190227
191- repro_label_components (
228+ wrapper_label_components (
192229 output_path = args .output ,
193230 table_path = args .input ,
194231 ddict = args .json ,
@@ -197,6 +234,7 @@ def main():
197234 max_edge_distance = args .max_edge_distance ,
198235 min_component_length = args .min_component_length ,
199236 min_size = args .min_size ,
237+ force_overwrite = args .force ,
200238 s3 = args .s3 ,
201239 s3_credentials = args .s3_credentials ,
202240 s3_bucket_name = args .s3_bucket_name ,
0 commit comments