11import asyncio
22import json
33import traceback
4- import uuid
5- from datetime import datetime
64from pathlib import Path
7-
8- import click
5+ import uuid
96import modal
7+ import click
8+ from datetime import datetime
9+ from codegen .extensions .swebench .utils import SWEBenchDataset , SweBenchExample , get_swe_bench_examples
1010from codegen .extensions .swebench .report import generate_report
11- from codegen .extensions .swebench .utils import (
12- SWEBenchDataset ,
13- SweBenchExample ,
14- get_swe_bench_examples ,
15- )
1611
1712PREDS_DNAME = Path (__file__ ).parent / "predictions"
1813LOG_DIR = Path (__file__ ).parent / "logs"
@@ -66,26 +61,11 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
6661 print ("Traceback:" )
6762 print ("" .join (error_info ["traceback" ]))
6863
69- results .append (
70- {
71- "instance_id" : example .instance_id ,
72- "status" : "error" ,
73- "error_info" : error_info ,
74- }
75- )
64+ results .append ({"instance_id" : example .instance_id , "status" : "error" , "error_info" : error_info })
7665 else :
7766 if result is None :
7867 print (f"Warning: Null result for { example .instance_id } " )
79- results .append (
80- {
81- "instance_id" : example .instance_id ,
82- "status" : "error" ,
83- "error_info" : {
84- "error_type" : "NullResult" ,
85- "error_message" : "Process returned None" ,
86- },
87- }
88- )
68+ results .append ({"instance_id" : example .instance_id , "status" : "error" , "error_info" : {"error_type" : "NullResult" , "error_message" : "Process returned None" }})
8969 else :
9070 results .append (result )
9171
@@ -101,24 +81,14 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
10181 {
10282 "instance_id" : example .instance_id ,
10383 "status" : "error" ,
104- "error_info" : {
105- "error_type" : type (e ).__name__ ,
106- "error_message" : str (e ),
107- "traceback" : traceback .format_exc (),
108- "batch_failure" : True ,
109- },
84+ "error_info" : {"error_type" : type (e ).__name__ , "error_message" : str (e ), "traceback" : traceback .format_exc (), "batch_failure" : True },
11085 }
11186 )
11287
11388 return results
11489
11590
116- async def run_eval (
117- use_existing_preds : str | None ,
118- dataset : str ,
119- length : int ,
120- instance_id : str | None = None ,
121- ):
91+ async def run_eval (use_existing_preds : str | None , dataset : str , length : int , instance_id : str | None = None ):
12292 run_id = use_existing_preds or str (uuid .uuid4 ())
12393 predictions_dir = PREDS_DNAME / f"results_{ run_id } "
12494 dataset = SWEBenchDataset (dataset )
@@ -185,25 +155,10 @@ async def run_eval(
185155
186156
187157@click .command ()
188- @click .option (
189- "--use-existing-preds" ,
190- help = "The run ID of the existing predictions to use." ,
191- type = str ,
192- default = None ,
193- )
194- @click .option (
195- "--dataset" ,
196- help = "The dataset to use." ,
197- type = click .Choice ([dataset .value for dataset in SWEBenchDataset ]),
198- default = SWEBenchDataset .LITE .value ,
199- )
158+ @click .option ("--use-existing-preds" , help = "The run ID of the existing predictions to use." , type = str , default = None )
159+ @click .option ("--dataset" , help = "The dataset to use." , type = click .Choice ([dataset .value for dataset in SWEBenchDataset ]), default = SWEBenchDataset .LITE .value )
200160@click .option ("--length" , help = "The number of examples to process." , type = int , default = 10 )
201- @click .option (
202- "--instance-id" ,
203- help = "The instance ID of the example to process." ,
204- type = str ,
205- default = None ,
206- )
161+ @click .option ("--instance-id" , help = "The instance ID of the example to process." , type = str , default = None )
207162def run_eval_command (use_existing_preds , dataset , length , instance_id ):
208163 asyncio .run (run_eval (use_existing_preds , dataset , length , instance_id ))
209164
0 commit comments