@@ -277,27 +277,27 @@ def stucking_checker():
277277 warn ("No samples had finished testing in the last 240s" )
278278 warn (f"{ len (remainings )} samples to be tested: { remainings } " )
279279
280- threading .Thread (target = stucking_checker ).start ()
281-
282- for future in tqdm (as_completed (futures ), total = n_samples ):
283- result = future .result ()
284- remainings .remove (result ["_identifier" ])
285- eval_results [result ["task_id" ]].append (result )
286-
287- # sort the results for each problem by completion_id
288- for task_id , task_results in eval_results .items ():
289- task_results .sort (key = lambda x : x ["completion_id" ])
290- results ["eval" ][task_id ] = []
291- for res in task_results :
292- stat , details = res ["base" ]
293- results ["eval" ][task_id ].append (
294- {
295- "task_id" : task_id ,
296- "solution" : res ["solution" ],
297- "status" : stat ,
298- "details" : details ,
299- }
300- )
280+ threading .Thread (target = stucking_checker ).start ()
281+
282+ for future in tqdm (as_completed (futures ), total = n_samples ):
283+ result = future .result ()
284+ remainings .remove (result ["_identifier" ])
285+ eval_results [result ["task_id" ]].append (result )
286+
287+ # sort the results for each problem by completion_id
288+ for task_id , task_results in eval_results .items ():
289+ task_results .sort (key = lambda x : x ["completion_id" ])
290+ results ["eval" ][task_id ] = []
291+ for res in task_results :
292+ stat , details = res ["base" ]
293+ results ["eval" ][task_id ].append (
294+ {
295+ "task_id" : task_id ,
296+ "solution" : res ["solution" ],
297+ "status" : stat ,
298+ "details" : details ,
299+ }
300+ )
301301
302302 # Calculate pass@k.
303303 total = np .array ([len (r ) for k , r in results ["eval" ].items () if k in problems ])
0 commit comments