Skip to content

Commit 6e53f63

Browse files
committed
fix: update model names
1 parent b882aef commit 6e53f63

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

analysis/get_results.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ def get_results():
4949
for model, info in model_info.items():
5050
model = model.replace("/", "--")
5151
hf_model = ""
52-
if "https://huggingface.co/" in info["link"]:
53-
hf_model = info["link"].split("https://huggingface.co/")[-1]
54-
model = hf_model.replace("/", "--")
5552
files = glob(f"results/{model}--bigcodebench-*.json")
5653
assert files, f"No files found for results/{model}--bigcodebench-*.json"
54+
# if "https://huggingface.co/" in info["link"]:
55+
# hf_model = info["link"].split("https://huggingface.co/")[-1]
56+
# model = hf_model.replace("/", "--")
5757
for file in files:
5858
_, suffix = os.path.basename(file).split("--bigcodebench-")
5959
status = []
@@ -152,8 +152,8 @@ def read_task_perf(task="complete"):
152152

153153
task_perf = {f"BigCodeBench/{task_id}": 0 for task_id in range(1140)}
154154
model = model.replace("/", "--")
155-
if info["link"].startswith("https://huggingface.co/"):
156-
model = info["link"].split("https://huggingface.co/")[-1].replace("/", "--")
155+
# if info["link"].startswith("https://huggingface.co/"):
156+
# model = info["link"].split("https://huggingface.co/")[-1].replace("/", "--")
157157
try:
158158
if info["prompted"] and not info["direct_complete"]:
159159
files = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized-calibrated_eval_results.json")
@@ -316,15 +316,15 @@ def push_ds(ds, path, local=False):
316316
files = []
317317
complete_data, complete_files = read_task_perf("complete")
318318
instruct_data, instruct_files = read_task_perf("instruct")
319-
320-
complete_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
321-
Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in complete_data.items()}
322-
instruct_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
323-
Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in instruct_data.items()}
324-
complete_ds = DatasetDict(complete_map)
325-
instruct_ds = DatasetDict(instruct_map)
326-
push_ds(complete_ds, "bigcode/bigcodebench-complete-perf")
327-
push_ds(instruct_ds, "bigcode/bigcodebench-instruct-perf")
319+
assert len(model_info) == len(complete_data)
320+
# complete_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
321+
# Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in complete_data.items()}
322+
# instruct_map = {model.replace("-","_").replace("+","_plus").replace(" ","_"):
323+
# Dataset.from_dict({"task_id": list(task_perf.keys()), "status": list(task_perf.values())}) for model, task_perf in instruct_data.items()}
324+
# complete_ds = DatasetDict(complete_map)
325+
# instruct_ds = DatasetDict(instruct_map)
326+
# push_ds(complete_ds, "bigcode/bigcodebench-complete-perf")
327+
# push_ds(instruct_ds, "bigcode/bigcodebench-instruct-perf")
328328

329329
files.extend(complete_files)
330330
files.extend(instruct_files)

0 commit comments

Comments
 (0)