11import threading
2- from typing import Any , Optional
2+ from collections .abc import Sequence
3+ from typing import Any
34
4- from crewai .experimental .evaluation .base_evaluator import (
5- AgentEvaluationResult ,
6- AggregationStrategy ,
7- )
85from crewai .agent import Agent
9- from crewai .task import Task
10- from crewai .experimental . evaluation . evaluation_display import EvaluationDisplayFormatter
6+ from crewai .agents . agent_builder . base_agent import BaseAgent
7+ from crewai .events . event_bus import crewai_event_bus
118from crewai .events .types .agent_events import (
12- AgentEvaluationStartedEvent ,
139 AgentEvaluationCompletedEvent ,
1410 AgentEvaluationFailedEvent ,
11+ AgentEvaluationStartedEvent ,
12+ LiteAgentExecutionCompletedEvent ,
1513)
16- from crewai .experimental .evaluation import BaseEvaluator , create_evaluation_callbacks
17- from collections .abc import Sequence
18- from crewai .events .event_bus import crewai_event_bus
19- from crewai .events .utils .console_formatter import ConsoleFormatter
2014from crewai .events .types .task_events import TaskCompletedEvent
21- from crewai .events .types . agent_events import LiteAgentExecutionCompletedEvent
15+ from crewai .events .utils . console_formatter import ConsoleFormatter
2216from crewai .experimental .evaluation .base_evaluator import (
2317 AgentAggregatedEvaluationResult ,
18+ AgentEvaluationResult ,
19+ AggregationStrategy ,
20+ BaseEvaluator ,
2421 EvaluationScore ,
2522 MetricCategory ,
2623)
24+ from crewai .experimental .evaluation .evaluation_display import EvaluationDisplayFormatter
25+ from crewai .experimental .evaluation .evaluation_listener import (
26+ create_evaluation_callbacks ,
27+ )
28+ from crewai .task import Task
2729
2830
2931class ExecutionState :
30- current_agent_id : Optional [ str ] = None
31- current_task_id : Optional [ str ] = None
32+ current_agent_id : str | None = None
33+ current_task_id : str | None = None
3234
3335 def __init__ (self ):
3436 self .traces = {}
@@ -40,10 +42,10 @@ def __init__(self):
4042class AgentEvaluator :
4143 def __init__ (
4244 self ,
43- agents : list [Agent ],
45+ agents : list [Agent ] | list [ BaseAgent ] ,
4446 evaluators : Sequence [BaseEvaluator ] | None = None ,
4547 ):
46- self .agents : list [Agent ] = agents
48+ self .agents : list [Agent ] | list [ BaseAgent ] = agents
4749 self .evaluators : Sequence [BaseEvaluator ] | None = evaluators
4850
4951 self .callback = create_evaluation_callbacks ()
@@ -75,7 +77,8 @@ def _subscribe_to_events(self) -> None:
7577 )
7678
7779 def _handle_task_completed (self , source : Any , event : TaskCompletedEvent ) -> None :
78- assert event .task is not None
80+ if event .task is None :
81+ raise ValueError ("TaskCompletedEvent must have a task" )
7982 agent = event .task .agent
8083 if (
8184 agent
@@ -92,9 +95,8 @@ def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None
9295 state .current_agent_id = str (agent .id )
9396 state .current_task_id = str (event .task .id )
9497
95- assert (
96- state .current_agent_id is not None and state .current_task_id is not None
97- )
98+ if state .current_agent_id is None or state .current_task_id is None :
99+ raise ValueError ("Agent ID and Task ID must not be None" )
98100 trace = self .callback .get_trace (
99101 state .current_agent_id , state .current_task_id
100102 )
@@ -146,9 +148,8 @@ def _handle_lite_agent_completed(
146148 if not target_agent :
147149 return
148150
149- assert (
150- state .current_agent_id is not None and state .current_task_id is not None
151- )
151+ if state .current_agent_id is None or state .current_task_id is None :
152+ raise ValueError ("Agent ID and Task ID must not be None" )
152153 trace = self .callback .get_trace (
153154 state .current_agent_id , state .current_task_id
154155 )
@@ -244,7 +245,7 @@ def display_evaluation_with_feedback(self) -> None:
244245
245246 def evaluate (
246247 self ,
247- agent : Agent ,
248+ agent : Agent | BaseAgent ,
248249 execution_trace : dict [str , Any ],
249250 final_output : Any ,
250251 state : ExecutionState ,
@@ -255,7 +256,8 @@ def evaluate(
255256 task_id = state .current_task_id or (str (task .id ) if task else "unknown_task" ),
256257 )
257258
258- assert self .evaluators is not None
259+ if self .evaluators is None :
260+ raise ValueError ("Evaluators must be initialized" )
259261 task_id = str (task .id ) if task else None
260262 for evaluator in self .evaluators :
261263 try :
@@ -276,15 +278,15 @@ def evaluate(
276278 metric_category = evaluator .metric_category ,
277279 score = score ,
278280 )
279- except Exception as e :
281+ except Exception as e : # noqa: PERF203
280282 self .emit_evaluation_failed_event (
281283 agent_role = agent .role ,
282284 agent_id = str (agent .id ),
283285 task_id = task_id ,
284286 error = str (e ),
285287 )
286288 self .console_formatter .print (
287- f"Error in { evaluator .metric_category .value } evaluator: { str ( e ) } "
289+ f"Error in { evaluator .metric_category .value } evaluator: { e !s } "
288290 )
289291
290292 return result
@@ -337,14 +339,14 @@ def emit_evaluation_failed_event(
337339 )
338340
339341
340- def create_default_evaluator (agents : list [Agent ], llm : None = None ):
342+ def create_default_evaluator (agents : list [Agent ] | list [ BaseAgent ] , llm : None = None ):
341343 from crewai .experimental .evaluation import (
342344 GoalAlignmentEvaluator ,
343- SemanticQualityEvaluator ,
344- ToolSelectionEvaluator ,
345345 ParameterExtractionEvaluator ,
346- ToolInvocationEvaluator ,
347346 ReasoningEfficiencyEvaluator ,
347+ SemanticQualityEvaluator ,
348+ ToolInvocationEvaluator ,
349+ ToolSelectionEvaluator ,
348350 )
349351
350352 evaluators = [
0 commit comments