Skip to content

Commit 85d989a

Browse files
fix: resolve all ruff and mypy issues in experimental module
1 parent 138093f commit 85d989a

17 files changed

+584
-394
lines changed
Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,39 @@
11
from crewai.experimental.evaluation import (
2+
AgentEvaluationResult,
3+
AgentEvaluator,
24
BaseEvaluator,
35
EvaluationScore,
4-
MetricCategory,
5-
AgentEvaluationResult,
6-
SemanticQualityEvaluator,
6+
EvaluationTraceCallback,
7+
ExperimentResult,
8+
ExperimentResults,
9+
ExperimentRunner,
710
GoalAlignmentEvaluator,
8-
ReasoningEfficiencyEvaluator,
9-
ToolSelectionEvaluator,
11+
MetricCategory,
1012
ParameterExtractionEvaluator,
13+
ReasoningEfficiencyEvaluator,
14+
SemanticQualityEvaluator,
1115
ToolInvocationEvaluator,
12-
EvaluationTraceCallback,
13-
create_evaluation_callbacks,
14-
AgentEvaluator,
16+
ToolSelectionEvaluator,
1517
create_default_evaluator,
16-
ExperimentRunner,
17-
ExperimentResults,
18-
ExperimentResult,
18+
create_evaluation_callbacks,
1919
)
2020

21-
2221
__all__ = [
22+
"AgentEvaluationResult",
23+
"AgentEvaluator",
2324
"BaseEvaluator",
2425
"EvaluationScore",
25-
"MetricCategory",
26-
"AgentEvaluationResult",
27-
"SemanticQualityEvaluator",
26+
"EvaluationTraceCallback",
27+
"ExperimentResult",
28+
"ExperimentResults",
29+
"ExperimentRunner",
2830
"GoalAlignmentEvaluator",
29-
"ReasoningEfficiencyEvaluator",
30-
"ToolSelectionEvaluator",
31+
"MetricCategory",
3132
"ParameterExtractionEvaluator",
33+
"ReasoningEfficiencyEvaluator",
34+
"SemanticQualityEvaluator",
3235
"ToolInvocationEvaluator",
33-
"EvaluationTraceCallback",
34-
"create_evaluation_callbacks",
35-
"AgentEvaluator",
36+
"ToolSelectionEvaluator",
3637
"create_default_evaluator",
37-
"ExperimentRunner",
38-
"ExperimentResults",
39-
"ExperimentResult"
40-
]
38+
"create_evaluation_callbacks",
39+
]
Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,47 @@
1+
from crewai.experimental.evaluation.agent_evaluator import (
2+
AgentEvaluator,
3+
create_default_evaluator,
4+
)
15
from crewai.experimental.evaluation.base_evaluator import (
6+
AgentEvaluationResult,
27
BaseEvaluator,
38
EvaluationScore,
49
MetricCategory,
5-
AgentEvaluationResult
6-
)
7-
8-
from crewai.experimental.evaluation.metrics import (
9-
SemanticQualityEvaluator,
10-
GoalAlignmentEvaluator,
11-
ReasoningEfficiencyEvaluator,
12-
ToolSelectionEvaluator,
13-
ParameterExtractionEvaluator,
14-
ToolInvocationEvaluator
1510
)
16-
1711
from crewai.experimental.evaluation.evaluation_listener import (
1812
EvaluationTraceCallback,
19-
create_evaluation_callbacks
13+
create_evaluation_callbacks,
2014
)
21-
22-
from crewai.experimental.evaluation.agent_evaluator import (
23-
AgentEvaluator,
24-
create_default_evaluator
25-
)
26-
2715
from crewai.experimental.evaluation.experiment import (
28-
ExperimentRunner,
16+
ExperimentResult,
2917
ExperimentResults,
30-
ExperimentResult
18+
ExperimentRunner,
19+
)
20+
from crewai.experimental.evaluation.metrics import (
21+
GoalAlignmentEvaluator,
22+
ParameterExtractionEvaluator,
23+
ReasoningEfficiencyEvaluator,
24+
SemanticQualityEvaluator,
25+
ToolInvocationEvaluator,
26+
ToolSelectionEvaluator,
3127
)
3228

3329
__all__ = [
30+
"AgentEvaluationResult",
31+
"AgentEvaluator",
3432
"BaseEvaluator",
3533
"EvaluationScore",
36-
"MetricCategory",
37-
"AgentEvaluationResult",
38-
"SemanticQualityEvaluator",
34+
"EvaluationTraceCallback",
35+
"ExperimentResult",
36+
"ExperimentResults",
37+
"ExperimentRunner",
3938
"GoalAlignmentEvaluator",
40-
"ReasoningEfficiencyEvaluator",
41-
"ToolSelectionEvaluator",
39+
"MetricCategory",
4240
"ParameterExtractionEvaluator",
41+
"ReasoningEfficiencyEvaluator",
42+
"SemanticQualityEvaluator",
4343
"ToolInvocationEvaluator",
44-
"EvaluationTraceCallback",
45-
"create_evaluation_callbacks",
46-
"AgentEvaluator",
44+
"ToolSelectionEvaluator",
4745
"create_default_evaluator",
48-
"ExperimentRunner",
49-
"ExperimentResults",
50-
"ExperimentResult"
46+
"create_evaluation_callbacks",
5147
]

src/crewai/experimental/evaluation/agent_evaluator.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,33 @@
11
import threading
2-
from typing import Any, Optional
2+
from collections.abc import Sequence
3+
from typing import Any
34

4-
from crewai.experimental.evaluation.base_evaluator import (
5-
AgentEvaluationResult,
6-
AggregationStrategy,
7-
)
85
from crewai.agent import Agent
9-
from crewai.task import Task
10-
from crewai.experimental.evaluation.evaluation_display import EvaluationDisplayFormatter
6+
from crewai.agents.agent_builder.base_agent import BaseAgent
7+
from crewai.events.event_bus import crewai_event_bus
118
from crewai.events.types.agent_events import (
12-
AgentEvaluationStartedEvent,
139
AgentEvaluationCompletedEvent,
1410
AgentEvaluationFailedEvent,
11+
AgentEvaluationStartedEvent,
12+
LiteAgentExecutionCompletedEvent,
1513
)
16-
from crewai.experimental.evaluation import BaseEvaluator, create_evaluation_callbacks
17-
from collections.abc import Sequence
18-
from crewai.events.event_bus import crewai_event_bus
19-
from crewai.events.utils.console_formatter import ConsoleFormatter
2014
from crewai.events.types.task_events import TaskCompletedEvent
21-
from crewai.events.types.agent_events import LiteAgentExecutionCompletedEvent
15+
from crewai.events.utils.console_formatter import ConsoleFormatter
16+
from crewai.experimental.evaluation import BaseEvaluator, create_evaluation_callbacks
2217
from crewai.experimental.evaluation.base_evaluator import (
2318
AgentAggregatedEvaluationResult,
19+
AgentEvaluationResult,
20+
AggregationStrategy,
2421
EvaluationScore,
2522
MetricCategory,
2623
)
24+
from crewai.experimental.evaluation.evaluation_display import EvaluationDisplayFormatter
25+
from crewai.task import Task
2726

2827

2928
class ExecutionState:
30-
current_agent_id: Optional[str] = None
31-
current_task_id: Optional[str] = None
29+
current_agent_id: str | None = None
30+
current_task_id: str | None = None
3231

3332
def __init__(self):
3433
self.traces = {}
@@ -40,10 +39,10 @@ def __init__(self):
4039
class AgentEvaluator:
4140
def __init__(
4241
self,
43-
agents: list[Agent],
42+
agents: list[Agent] | list[BaseAgent],
4443
evaluators: Sequence[BaseEvaluator] | None = None,
4544
):
46-
self.agents: list[Agent] = agents
45+
self.agents: list[Agent] | list[BaseAgent] = agents
4746
self.evaluators: Sequence[BaseEvaluator] | None = evaluators
4847

4948
self.callback = create_evaluation_callbacks()
@@ -75,7 +74,8 @@ def _subscribe_to_events(self) -> None:
7574
)
7675

7776
def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None:
78-
assert event.task is not None
77+
if event.task is None:
78+
raise ValueError("TaskCompletedEvent must have a task")
7979
agent = event.task.agent
8080
if (
8181
agent
@@ -92,9 +92,8 @@ def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None
9292
state.current_agent_id = str(agent.id)
9393
state.current_task_id = str(event.task.id)
9494

95-
assert (
96-
state.current_agent_id is not None and state.current_task_id is not None
97-
)
95+
if state.current_agent_id is None or state.current_task_id is None:
96+
raise ValueError("Agent ID and Task ID must not be None")
9897
trace = self.callback.get_trace(
9998
state.current_agent_id, state.current_task_id
10099
)
@@ -146,9 +145,8 @@ def _handle_lite_agent_completed(
146145
if not target_agent:
147146
return
148147

149-
assert (
150-
state.current_agent_id is not None and state.current_task_id is not None
151-
)
148+
if state.current_agent_id is None or state.current_task_id is None:
149+
raise ValueError("Agent ID and Task ID must not be None")
152150
trace = self.callback.get_trace(
153151
state.current_agent_id, state.current_task_id
154152
)
@@ -244,7 +242,7 @@ def display_evaluation_with_feedback(self) -> None:
244242

245243
def evaluate(
246244
self,
247-
agent: Agent,
245+
agent: Agent | BaseAgent,
248246
execution_trace: dict[str, Any],
249247
final_output: Any,
250248
state: ExecutionState,
@@ -255,7 +253,8 @@ def evaluate(
255253
task_id=state.current_task_id or (str(task.id) if task else "unknown_task"),
256254
)
257255

258-
assert self.evaluators is not None
256+
if self.evaluators is None:
257+
raise ValueError("Evaluators must be initialized")
259258
task_id = str(task.id) if task else None
260259
for evaluator in self.evaluators:
261260
try:
@@ -276,15 +275,15 @@ def evaluate(
276275
metric_category=evaluator.metric_category,
277276
score=score,
278277
)
279-
except Exception as e:
278+
except Exception as e: # noqa: PERF203
280279
self.emit_evaluation_failed_event(
281280
agent_role=agent.role,
282281
agent_id=str(agent.id),
283282
task_id=task_id,
284283
error=str(e),
285284
)
286285
self.console_formatter.print(
287-
f"Error in {evaluator.metric_category.value} evaluator: {str(e)}"
286+
f"Error in {evaluator.metric_category.value} evaluator: {e!s}"
288287
)
289288

290289
return result
@@ -337,14 +336,14 @@ def emit_evaluation_failed_event(
337336
)
338337

339338

340-
def create_default_evaluator(agents: list[Agent], llm: None = None):
339+
def create_default_evaluator(agents: list[Agent] | list[BaseAgent], llm: None = None):
341340
from crewai.experimental.evaluation import (
342341
GoalAlignmentEvaluator,
343-
SemanticQualityEvaluator,
344-
ToolSelectionEvaluator,
345342
ParameterExtractionEvaluator,
346-
ToolInvocationEvaluator,
347343
ReasoningEfficiencyEvaluator,
344+
SemanticQualityEvaluator,
345+
ToolInvocationEvaluator,
346+
ToolSelectionEvaluator,
348347
)
349348

350349
evaluators = [

0 commit comments

Comments
 (0)