Skip to content

Commit 7ada57c

Browse files
committed
chore: Better error tracking and cleanup
1 parent 486789b commit 7ada57c

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

agents_mcp_usage/multi_mcp/eval_multi_mcp/evals_pydantic_mcp.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env python3
21
"""
32
Single-Model Evaluation Module for Mermaid Diagram Fixing
43

agents_mcp_usage/multi_mcp/eval_multi_mcp/run_multi_evals.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,19 +207,33 @@ async def fix_with_model(inputs: MermaidInput) -> MermaidOutput:
207207
logfire.warning(
208208
"Evaluation timeout", model=model, run_index=run_index, timeout=timeout
209209
)
210-
self.results[model].add_failed_run(run_index, error_msg)
210+
self.results[model].add_failed_run(run_index, "evaluation_timeout")
211211
return None
212212

213213
except Exception as e:
214+
# Categorize the error for better reporting
215+
error_type = type(e).__name__
216+
if "ValidationError" in error_type:
217+
categorized_error = "evaluation_validation_failed"
218+
elif "timeout" in str(e).lower() or "timed out" in str(e).lower():
219+
categorized_error = "evaluation_timeout"
220+
elif "ModelHTTPError" in error_type:
221+
categorized_error = "model_api_error"
222+
elif "ConnectionError" in error_type or "network" in str(e).lower():
223+
categorized_error = "network_error"
224+
else:
225+
categorized_error = f"evaluation_error_{error_type}"
226+
214227
error_msg = f"Error during evaluation: {str(e)}"
215228
logfire.error(
216229
"Evaluation error",
217230
model=model,
218231
run_index=run_index,
219232
error=str(e),
220-
error_type=type(e).__name__,
233+
error_type=error_type,
234+
categorized_error=categorized_error,
221235
)
222-
self.results[model].add_failed_run(run_index, error_msg)
236+
self.results[model].add_failed_run(run_index, categorized_error)
223237
return None
224238

225239
async def run_model_evaluations(
@@ -442,7 +456,7 @@ async def main():
442456
parser.add_argument(
443457
"--parallel",
444458
action="store_true",
445-
default=False,
459+
default=True,
446460
help="Run evaluations in parallel",
447461
)
448462
parser.add_argument(

0 commit comments

Comments
 (0)