@@ -207,19 +207,33 @@ async def fix_with_model(inputs: MermaidInput) -> MermaidOutput:
207
207
logfire .warning (
208
208
"Evaluation timeout" , model = model , run_index = run_index , timeout = timeout
209
209
)
210
- self .results [model ].add_failed_run (run_index , error_msg )
210
+ self .results [model ].add_failed_run (run_index , "evaluation_timeout" )
211
211
return None
212
212
213
213
except Exception as e :
214
+ # Categorize the error for better reporting
215
+ error_type = type (e ).__name__
216
+ if "ValidationError" in error_type :
217
+ categorized_error = "evaluation_validation_failed"
218
+ elif "timeout" in str (e ).lower () or "timed out" in str (e ).lower ():
219
+ categorized_error = "evaluation_timeout"
220
+ elif "ModelHTTPError" in error_type :
221
+ categorized_error = "model_api_error"
222
+ elif "ConnectionError" in error_type or "network" in str (e ).lower ():
223
+ categorized_error = "network_error"
224
+ else :
225
+ categorized_error = f"evaluation_error_{ error_type } "
226
+
214
227
error_msg = f"Error during evaluation: { str (e )} "
215
228
logfire .error (
216
229
"Evaluation error" ,
217
230
model = model ,
218
231
run_index = run_index ,
219
232
error = str (e ),
220
- error_type = type (e ).__name__ ,
233
+ error_type = error_type ,
234
+ categorized_error = categorized_error ,
221
235
)
222
- self .results [model ].add_failed_run (run_index , error_msg )
236
+ self .results [model ].add_failed_run (run_index , categorized_error )
223
237
return None
224
238
225
239
async def run_model_evaluations (
@@ -442,7 +456,7 @@ async def main():
442
456
parser .add_argument (
443
457
"--parallel" ,
444
458
action = "store_true" ,
445
- default = False ,
459
+ default = True ,
446
460
help = "Run evaluations in parallel" ,
447
461
)
448
462
parser .add_argument (
0 commit comments