77import numpy as np
88import pandas as pd
99import yaml
10+ from tqdm import tqdm
1011
11- sys .path .append (".." )
12+ from dance .settings import DANCEDIR , SIMILARITYDIR
13+
14+ sys .path .append (str (DANCEDIR ))
1215import ast
1316
1417from get_result_web import get_sweep_url , spilt_web
1518
1619from dance import logger
20+ from dance .settings import entity , project
1721from dance .utils import try_import
1822
1923file_root = str (Path (__file__ ).resolve ().parent .parent )
@@ -70,8 +74,6 @@ def is_match(config_str):
7074
7175
7276wandb = try_import ("wandb" )
73- entity = "xzy11632"
74- project = "dance-dev"
7577
7678
7779def is_matching_dict (yaml_str , target_dict ):
@@ -156,18 +158,20 @@ def get_ans_from_cache(query_dataset, method):
156158 # Get best method from step2 of atlas datasets
157159 # Search accuracy according to best method (all values should exist)
158160 ans = pd .DataFrame (index = [method ], columns = [f"{ atlas_dataset } _from_cache" for atlas_dataset in atlas_datasets ])
159-
160- sweep_url = re .search (r"step2:([^|]+)" ,
161- conf_data [conf_data ["dataset_id" ] == query_dataset ][method ].iloc [0 ]).group (1 )
161+ step_str = conf_data [conf_data ["dataset_id" ] == query_dataset ][method ].iloc [0 ]
162+ if pd .isna (step_str ):
163+ logger .warning (f"{ query_dataset } is nan in { method } " )
164+ return ans
165+ sweep_url = re .search (r"step2:([^|]+)" , step_str ).group (1 )
162166 _ , _ , sweep_id = spilt_web (sweep_url )
163167 sweep = wandb .Api ().sweep (f"{ entity } /{ project } /{ sweep_id } " )
164-
165- for atlas_dataset in atlas_datasets :
166- best_yaml = conf_data [conf_data ["dataset_id" ] == atlas_dataset ][f"{ method } _best_yaml " ].iloc [0 ]
168+ runs = sweep . runs
169+ for atlas_dataset in tqdm ( atlas_datasets ) :
170+ best_yaml = conf_data [conf_data ["dataset_id" ] == atlas_dataset ][f"{ method } _step2_best_yaml " ].iloc [0 ]
167171 match_run = None
168172
169173 # Find matching run configuration
170- for run in sweep . runs :
174+ for run in tqdm ( runs , leave = False ) :
171175 if isinstance (best_yaml , float ) and np .isnan (best_yaml ):
172176 continue
173177 if is_matching_dict (best_yaml , run .config ):
@@ -188,7 +192,7 @@ def get_ans_from_cache(query_dataset, method):
188192parser = argparse .ArgumentParser (formatter_class = argparse .ArgumentDefaultsHelpFormatter )
189193parser .add_argument ("--methods" , default = ["cta_actinn" , "cta_celltypist" , "cta_scdeepsort" , "cta_singlecellnet" ],
190194 nargs = "+" )
191- parser .add_argument ("--tissue" , type = str , default = "blood " )
195+ parser .add_argument ("--tissue" , type = str , default = "pancreas " )
192196args = parser .parse_args ()
193197methods = args .methods
194198tissue = args .tissue
@@ -208,7 +212,7 @@ def get_ans_from_cache(query_dataset, method):
208212# "738942eb-ac72-44ff-a64b-8943b5ecd8d9", "a5d95a42-0137-496f-8a60-101e17f263c8",
209213# "71be997d-ff75-41b9-8a9f-1288c865f921"
210214# ]
211- conf_data = pd .read_excel (" Cell Type Annotation Atlas.xlsx" , sheet_name = tissue )
215+ conf_data = pd .read_excel (SIMILARITYDIR / "data/ Cell Type Annotation Atlas.xlsx" , sheet_name = tissue )
212216# conf_data = pd.read_csv(f"results/{tissue}_result.csv", index_col=0)
213217atlas_datasets = list (conf_data [conf_data ["queryed" ] == False ]["dataset_id" ])
214218query_datasets = list (conf_data [conf_data ["queryed" ] == True ]["dataset_id" ])
@@ -219,8 +223,9 @@ def get_ans_from_cache(query_dataset, method):
219223 ans .append (get_ans_from_cache (query_dataset , method ))
220224 ans = pd .concat (ans )
221225 ans_all [query_dataset ] = ans
222- for k , v in ans_all .items ():
223- file_path = f"in_atlas_datas/{ tissue } /{ str (methods )} _{ k } _in_atlas.csv"
226+ print (query_dataset )
227+ # for k, v in ans_all.items():
228+ file_path = SIMILARITYDIR / f"data/in_atlas_datas/{ tissue } /{ str (methods )} _{ query_dataset } _in_atlas.csv"
224229 folder_path = Path (file_path ).parent
225230 folder_path .mkdir (parents = True , exist_ok = True )
226- v .to_csv (file_path )
231+ ans .to_csv (file_path )
0 commit comments