11import asyncio
22import json
33import traceback
4- from pathlib import Path
54import uuid
6- import modal
7- import click
85from datetime import datetime
9- from codegen .extensions .swebench .utils import SWEBenchDataset , SweBenchExample , get_swe_bench_examples
6+ from pathlib import Path
7+
8+ import click
9+ import modal
1010from codegen .extensions .swebench .report import generate_report
11+ from codegen .extensions .swebench .utils import (
12+ SWEBenchDataset ,
13+ SweBenchExample ,
14+ get_swe_bench_examples ,
15+ )
1116
1217PREDS_DNAME = Path (__file__ ).parent / "predictions"
1318LOG_DIR = Path (__file__ ).parent / "logs"
@@ -61,11 +66,26 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
6166 print ("Traceback:" )
6267 print ("" .join (error_info ["traceback" ]))
6368
64- results .append ({"instance_id" : example .instance_id , "status" : "error" , "error_info" : error_info })
69+ results .append (
70+ {
71+ "instance_id" : example .instance_id ,
72+ "status" : "error" ,
73+ "error_info" : error_info ,
74+ }
75+ )
6576 else :
6677 if result is None :
6778 print (f"Warning: Null result for { example .instance_id } " )
68- results .append ({"instance_id" : example .instance_id , "status" : "error" , "error_info" : {"error_type" : "NullResult" , "error_message" : "Process returned None" }})
79+ results .append (
80+ {
81+ "instance_id" : example .instance_id ,
82+ "status" : "error" ,
83+ "error_info" : {
84+ "error_type" : "NullResult" ,
85+ "error_message" : "Process returned None" ,
86+ },
87+ }
88+ )
6989 else :
7090 results .append (result )
7191
@@ -81,14 +101,24 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
81101 {
82102 "instance_id" : example .instance_id ,
83103 "status" : "error" ,
84- "error_info" : {"error_type" : type (e ).__name__ , "error_message" : str (e ), "traceback" : traceback .format_exc (), "batch_failure" : True },
104+ "error_info" : {
105+ "error_type" : type (e ).__name__ ,
106+ "error_message" : str (e ),
107+ "traceback" : traceback .format_exc (),
108+ "batch_failure" : True ,
109+ },
85110 }
86111 )
87112
88113 return results
89114
90115
91- async def run_eval (use_existing_preds : str | None , dataset : str , length : int , instance_id : str | None = None ):
116+ async def run_eval (
117+ use_existing_preds : str | None ,
118+ dataset : str ,
119+ length : int ,
120+ instance_id : str | None = None ,
121+ ):
92122 run_id = use_existing_preds or str (uuid .uuid4 ())
93123 predictions_dir = PREDS_DNAME / f"results_{ run_id } "
94124 dataset = SWEBenchDataset (dataset )
@@ -155,10 +185,25 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in
155185
156186
157187@click .command ()
158- @click .option ("--use-existing-preds" , help = "The run ID of the existing predictions to use." , type = str , default = None )
159- @click .option ("--dataset" , help = "The dataset to use." , type = click .Choice ([dataset .value for dataset in SWEBenchDataset ]), default = SWEBenchDataset .LITE .value )
188+ @click .option (
189+ "--use-existing-preds" ,
190+ help = "The run ID of the existing predictions to use." ,
191+ type = str ,
192+ default = None ,
193+ )
194+ @click .option (
195+ "--dataset" ,
196+ help = "The dataset to use." ,
197+ type = click .Choice ([dataset .value for dataset in SWEBenchDataset ]),
198+ default = SWEBenchDataset .LITE .value ,
199+ )
160200@click .option ("--length" , help = "The number of examples to process." , type = int , default = 10 )
161- @click .option ("--instance-id" , help = "The instance ID of the example to process." , type = str , default = None )
201+ @click .option (
202+ "--instance-id" ,
203+ help = "The instance ID of the example to process." ,
204+ type = str ,
205+ default = None ,
206+ )
162207def run_eval_command (use_existing_preds , dataset , length , instance_id ):
163208 asyncio .run (run_eval (use_existing_preds , dataset , length , instance_id ))
164209
0 commit comments