1616BIGCODEBENCH_HF = "bigcode/bigcodebench"
1717BIGCODEBENCH_VERSION = "v0.1.0_hf"
1818
19- def _ready_bigcodebench_path (mini = False , noextreme = False , version = "default" ) -> str :
19+ def _ready_bigcodebench_path (hard = False , version = "default" ) -> str :
2020 if BIGCODEBENCH_OVERRIDE_PATH :
2121 return BIGCODEBENCH_OVERRIDE_PATH
2222
2323 version = BIGCODEBENCH_VERSION if version == "default" else version
2424 url , path = get_dataset_metadata (
25- "BigCodeBench" , BIGCODEBENCH_VERSION , mini , noextreme
25+ BIGCODEBENCH_VERSION , hard
2626 )
2727
28+ extra = "-hard" if hard else ""
29+
2830 try :
29- dataset = load_dataset (BIGCODEBENCH_HF , split = BIGCODEBENCH_VERSION )
31+ dataset = load_dataset (BIGCODEBENCH_HF + extra , split = BIGCODEBENCH_VERSION )
3032 make_cache (url , dataset , path )
3133 except :
3234 if os .path .exists (path ):
@@ -37,7 +39,7 @@ def _ready_bigcodebench_path(mini=False, noextreme=False, version="default") ->
3739
3840
3941def get_bigcodebench (
40- err_incomplete = True , mini = False , noextreme = False , version = "default"
42+ err_incomplete = True , hard = False , version = "default"
4143 ) -> Dict [str , Dict ]:
4244 """Get BigCodeBench from BigCode's github repo and return as a list of parsed dicts.
4345
@@ -54,19 +56,19 @@ def get_bigcodebench(
5456 """
5557 # Check if open eval file exists in CACHE_DIR
5658 data_path = _ready_bigcodebench_path (
57- mini = mini , noextreme = noextreme , version = version
59+ hard = hard , version = version
5860 )
5961 data = {task ["task_id" ]: task for task in stream_jsonl (data_path )}
6062 if err_incomplete :
6163 completeness_check ("BigCodeBench" , data )
6264 return data
6365
64- def get_bigcodebench_hash (mini = False , noextreme = False , version = "default" ) -> str :
66+ def get_bigcodebench_hash (hard = False , version = "default" ) -> str :
6567 """Get the hash of BigCodeBench.
6668 Returns:
6769 str: The hash of BigCodeBench
6870 """
69- data_path = _ready_bigcodebench_path (mini , noextreme , version = "default" )
71+ data_path = _ready_bigcodebench_path (hard , version = "default" )
7072 with open (data_path , "rb" ) as f :
7173 data = f .read ()
7274 return hashlib .md5 (data ).hexdigest ()
0 commit comments