@@ -49,11 +49,11 @@ def get_results():
4949 for model , info in model_info .items ():
5050 model = model .replace ("/" , "--" )
5151 hf_model = ""
52- if "https://huggingface.co/" in info ["link" ]:
53- hf_model = info ["link" ].split ("https://huggingface.co/" )[- 1 ]
54- model = hf_model .replace ("/" , "--" )
5552 files = glob (f"results/{ model } --bigcodebench-*.json" )
5653 assert files , f"No files found for results/{ model } --bigcodebench-*.json"
54+ # if "https://huggingface.co/" in info["link"]:
55+ # hf_model = info["link"].split("https://huggingface.co/")[-1]
56+ # model = hf_model.replace("/", "--")
5757 for file in files :
5858 _ , suffix = os .path .basename (file ).split ("--bigcodebench-" )
5959 status = []
@@ -152,8 +152,8 @@ def read_task_perf(task="complete"):
152152
153153 task_perf = {f"BigCodeBench/{ task_id } " : 0 for task_id in range (1140 )}
154154 model = model .replace ("/" , "--" )
155- if info ["link" ].startswith ("https://huggingface.co/" ):
156- model = info ["link" ].split ("https://huggingface.co/" )[- 1 ].replace ("/" , "--" )
155+ # if info["link"].startswith("https://huggingface.co/"):
156+ # model = info["link"].split("https://huggingface.co/")[-1].replace("/", "--")
157157 try :
158158 if info ["prompted" ] and not info ["direct_complete" ]:
159159 files = glob (f"results/{ model } --bigcodebench-{ task } *-0-1-sanitized-calibrated_eval_results.json" )
@@ -316,15 +316,15 @@ def push_ds(ds, path, local=False):
316316 files = []
317317 complete_data , complete_files = read_task_perf ("complete" )
318318 instruct_data , instruct_files = read_task_perf ("instruct" )
319-
320- complete_map = {model .replace ("-" ,"_" ).replace ("+" ,"_plus" ).replace (" " ,"_" ):
321- Dataset .from_dict ({"task_id" : list (task_perf .keys ()), "status" : list (task_perf .values ())}) for model , task_perf in complete_data .items ()}
322- instruct_map = {model .replace ("-" ,"_" ).replace ("+" ,"_plus" ).replace (" " ,"_" ):
323- Dataset .from_dict ({"task_id" : list (task_perf .keys ()), "status" : list (task_perf .values ())}) for model , task_perf in instruct_data .items ()}
324- complete_ds = DatasetDict (complete_map )
325- instruct_ds = DatasetDict (instruct_map )
326- push_ds (complete_ds , "bigcode/bigcodebench-complete-perf" )
327- push_ds (instruct_ds , "bigcode/bigcodebench-instruct-perf" )
319+ assert len ( model_info ) == len ( complete_data )
320+ # complete_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
321+ # Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in complete_data.items()}
322+ # instruct_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
323+ # Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in instruct_data.items()}
324+ # complete_ds = DatasetDict(complete_map)
325+ # instruct_ds = DatasetDict(instruct_map)
326+ # push_ds(complete_ds, "bigcode/bigcodebench-complete-perf")
327+ # push_ds(instruct_ds, "bigcode/bigcodebench-instruct-perf")
328328
329329 files .extend (complete_files )
330330 files .extend (instruct_files )
0 commit comments