Skip to content

Commit 19f63da

Browse files
committed
add task perf push
1 parent e61bda0 commit 19f63da

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

analysis/get_results.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,17 @@ def push_ds(ds, path, local=False):
374374
ds.push_to_hub(path)
375375

376376

377+
def get_perf_df(data_dict):
378+
perfs = {"Model": []}
379+
for task_id in data_dict[list(data_dict.keys())[0]]:
380+
perfs[task_id] = []
381+
for model, task_perf in data_dict.items():
382+
perfs["Model"].append(model)
383+
for task_id, status in task_perf.items():
384+
perfs[task_id].append(status)
385+
return pd.DataFrame(perfs)
386+
387+
377388
if __name__ == "__main__":
378389

379390
# bcb_orig = load_dataset("bigcode/bigcodebench", split="v0.1.0_hf")
@@ -388,6 +399,9 @@ def push_ds(ds, path, local=False):
388399
files = []
389400
complete_data, complete_files = read_task_perf(bcb["task_id"], "complete")
390401
instruct_data, instruct_files = read_task_perf(bcb["task_id"], "instruct")
402+
complete_df = get_perf_df(complete_data)
403+
instruct_df = get_perf_df(instruct_data)
404+
push_ds(DatasetDict({"complete": Dataset.from_pandas(complete_df), "instruct": Dataset.from_pandas(instruct_df)}), f"bigcode/bigcodebench{suffix}-perf")
391405
assert len(model_info) == len(complete_data),\
392406
f"Missing results for {set([val['name'] for val in model_info.values()]) - set([model for model in complete_data.keys()])}"
393407
with open("task2domain.json", "r") as f:

0 commit comments

Comments
 (0)