66from glob import glob
77from sentence_transformers import SentenceTransformer , util
88import matplotlib .pyplot as plt
9- from datasets import load_dataset , Dataset , Features , Value , Sequence
9+ from datasets import load_dataset , Dataset , Features , Value , Sequence , DatasetDict
1010
1111from utils import *
1212
@@ -117,12 +117,27 @@ def read_task_perf(top_tid, task="complete"):
117117 top_tid = top_id .keys () & length_filter & rate_filter .keys () & lib_filter
118118 # hard_results = read_task_perf(top_tid)
119119
120- hard_bcb = bcb .filter (lambda x : x ["task_id" ] in top_tid )
120+ filtered_bcb = bcb .filter (lambda x : x ["task_id" ] in top_tid )
121121 hard_bcb_tid = hard_bcb ["task_id" ]
122122 se_qid = [top_id [_id ][0 ] for _id in hard_bcb_tid ]
123123 se_q = se .select (se_qid )
124124 se_scores = [top_id [_id ][1 ] for _id in hard_bcb_tid ]
125- hard_bcb = hard_bcb .add_column ("qid" , se_qid )
126- hard_bcb = hard_bcb .add_column ("question" , se_q ["question" ])
127- hard_bcb = hard_bcb .add_column ("score" , se_scores )
128- hard_bcb .push_to_hub ("bigcode/bigcodebench-hard" )
125+
126+ hard_bcb_dict = {
127+ "task_id" : [f"BigCodeBenchHard/{ i } " for i in range (len (hard_bcb ))],
128+ "complete_prompt" : hard_bcb ["complete_prompt" ],
129+ "instruct_prompt" : hard_bcb ["instruct_prompt" ],
130+ "canonical_solution" : hard_bcb ["canonical_solution" ],
131+ "code_prompt" : hard_bcb ["code_prompt" ],
132+ "test" : hard_bcb ["test" ],
133+ "entry_point" : hard_bcb ["entry_point" ],
134+ "doc_struct" : hard_bcb ["doc_struct" ],
135+ "libs" : hard_bcb ["libs" ],
136+ "q_idx" : se_qid ,
137+ "question" : se_q ["question" ],
138+ "score" : se_scores ,
139+ "_id" : hard_bcb_tid
140+ }
141+
142+ hard_bcb = Dataset .from_dict (hard_bcb_dict )
143+ DatasetDict ({"v0.1.0_hf" : hard_bcb }).push_to_hub ("bigcode/bigcodebench-hard" )
0 commit comments