@@ -115,7 +115,7 @@ def evaluate(
115115 split : str ,
116116 subset : str ,
117117 samples : Optional [str ] = None ,
118- remote_execute : bool = True ,
118+ local_execute : bool = False ,
119119 remote_execute_api : str = "https://bigcode-bigcodebench-evaluator.hf.space/" ,
120120 pass_k : str = "1,5,10" ,
121121 save_pass_rate : bool = True ,
@@ -135,16 +135,14 @@ def evaluate(
135135 ** model_kwargs ,
136136 )
137137 assert samples is not None , "No samples provided"
138-
139- extra = subset + "_" if subset != "full" else ""
140-
138+
141139 if os .path .isdir (samples ):
142- result_path = os .path .join (samples , f" { extra } eval_results.json" )
140+ result_path = os .path .join (samples , " eval_results.json" )
143141 else :
144142 assert samples .endswith (".jsonl" )
145- result_path = samples .replace (".jsonl" , f"_ { extra } eval_results .json" )
143+ result_path = samples .replace (".jsonl" , "_eval_results .json" )
146144
147- if remote_execute :
145+ if not local_execute :
148146
149147 client = Client (remote_execute_api )
150148 results , pass_at_k = client .predict (
@@ -351,7 +349,7 @@ def stucking_checker():
351349 json .dump (results , f , indent = 2 )
352350
353351 if save_pass_rate :
354- pass_at_k_path = result_path .replace ("_eval_results .json" , "_pass_at_k .json" )
352+ pass_at_k_path = result_path .replace ("eval_results .json" , "pass_at_k .json" )
355353
356354 if os .path .isfile (pass_at_k_path ):
357355 saved_pass_at_k = json .load (open (pass_at_k_path , "r" ))
0 commit comments