Skip to content

Commit 0e37059

Browse files
chore: resolve all ruff and mypy issues in experimental module
resolve linting, typing, and import issues; update Okta test
1 parent aa8dc9d commit 0e37059

17 files changed

+587
-394
lines changed
Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,39 @@
11
from crewai.experimental.evaluation import (
2+
AgentEvaluationResult,
3+
AgentEvaluator,
24
BaseEvaluator,
35
EvaluationScore,
4-
MetricCategory,
5-
AgentEvaluationResult,
6-
SemanticQualityEvaluator,
6+
EvaluationTraceCallback,
7+
ExperimentResult,
8+
ExperimentResults,
9+
ExperimentRunner,
710
GoalAlignmentEvaluator,
8-
ReasoningEfficiencyEvaluator,
9-
ToolSelectionEvaluator,
11+
MetricCategory,
1012
ParameterExtractionEvaluator,
13+
ReasoningEfficiencyEvaluator,
14+
SemanticQualityEvaluator,
1115
ToolInvocationEvaluator,
12-
EvaluationTraceCallback,
13-
create_evaluation_callbacks,
14-
AgentEvaluator,
16+
ToolSelectionEvaluator,
1517
create_default_evaluator,
16-
ExperimentRunner,
17-
ExperimentResults,
18-
ExperimentResult,
18+
create_evaluation_callbacks,
1919
)
2020

21-
2221
__all__ = [
22+
"AgentEvaluationResult",
23+
"AgentEvaluator",
2324
"BaseEvaluator",
2425
"EvaluationScore",
25-
"MetricCategory",
26-
"AgentEvaluationResult",
27-
"SemanticQualityEvaluator",
26+
"EvaluationTraceCallback",
27+
"ExperimentResult",
28+
"ExperimentResults",
29+
"ExperimentRunner",
2830
"GoalAlignmentEvaluator",
29-
"ReasoningEfficiencyEvaluator",
30-
"ToolSelectionEvaluator",
31+
"MetricCategory",
3132
"ParameterExtractionEvaluator",
33+
"ReasoningEfficiencyEvaluator",
34+
"SemanticQualityEvaluator",
3235
"ToolInvocationEvaluator",
33-
"EvaluationTraceCallback",
34-
"create_evaluation_callbacks",
35-
"AgentEvaluator",
36+
"ToolSelectionEvaluator",
3637
"create_default_evaluator",
37-
"ExperimentRunner",
38-
"ExperimentResults",
39-
"ExperimentResult"
40-
]
38+
"create_evaluation_callbacks",
39+
]
Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,47 @@
1+
from crewai.experimental.evaluation.agent_evaluator import (
2+
AgentEvaluator,
3+
create_default_evaluator,
4+
)
15
from crewai.experimental.evaluation.base_evaluator import (
6+
AgentEvaluationResult,
27
BaseEvaluator,
38
EvaluationScore,
49
MetricCategory,
5-
AgentEvaluationResult
6-
)
7-
8-
from crewai.experimental.evaluation.metrics import (
9-
SemanticQualityEvaluator,
10-
GoalAlignmentEvaluator,
11-
ReasoningEfficiencyEvaluator,
12-
ToolSelectionEvaluator,
13-
ParameterExtractionEvaluator,
14-
ToolInvocationEvaluator
1510
)
16-
1711
from crewai.experimental.evaluation.evaluation_listener import (
1812
EvaluationTraceCallback,
19-
create_evaluation_callbacks
13+
create_evaluation_callbacks,
2014
)
21-
22-
from crewai.experimental.evaluation.agent_evaluator import (
23-
AgentEvaluator,
24-
create_default_evaluator
25-
)
26-
2715
from crewai.experimental.evaluation.experiment import (
28-
ExperimentRunner,
16+
ExperimentResult,
2917
ExperimentResults,
30-
ExperimentResult
18+
ExperimentRunner,
19+
)
20+
from crewai.experimental.evaluation.metrics import (
21+
GoalAlignmentEvaluator,
22+
ParameterExtractionEvaluator,
23+
ReasoningEfficiencyEvaluator,
24+
SemanticQualityEvaluator,
25+
ToolInvocationEvaluator,
26+
ToolSelectionEvaluator,
3127
)
3228

3329
__all__ = [
30+
"AgentEvaluationResult",
31+
"AgentEvaluator",
3432
"BaseEvaluator",
3533
"EvaluationScore",
36-
"MetricCategory",
37-
"AgentEvaluationResult",
38-
"SemanticQualityEvaluator",
34+
"EvaluationTraceCallback",
35+
"ExperimentResult",
36+
"ExperimentResults",
37+
"ExperimentRunner",
3938
"GoalAlignmentEvaluator",
40-
"ReasoningEfficiencyEvaluator",
41-
"ToolSelectionEvaluator",
39+
"MetricCategory",
4240
"ParameterExtractionEvaluator",
41+
"ReasoningEfficiencyEvaluator",
42+
"SemanticQualityEvaluator",
4343
"ToolInvocationEvaluator",
44-
"EvaluationTraceCallback",
45-
"create_evaluation_callbacks",
46-
"AgentEvaluator",
44+
"ToolSelectionEvaluator",
4745
"create_default_evaluator",
48-
"ExperimentRunner",
49-
"ExperimentResults",
50-
"ExperimentResult"
46+
"create_evaluation_callbacks",
5147
]

src/crewai/experimental/evaluation/agent_evaluator.py

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,36 @@
11
import threading
2-
from typing import Any, Optional
2+
from collections.abc import Sequence
3+
from typing import Any
34

4-
from crewai.experimental.evaluation.base_evaluator import (
5-
AgentEvaluationResult,
6-
AggregationStrategy,
7-
)
85
from crewai.agent import Agent
9-
from crewai.task import Task
10-
from crewai.experimental.evaluation.evaluation_display import EvaluationDisplayFormatter
6+
from crewai.agents.agent_builder.base_agent import BaseAgent
7+
from crewai.events.event_bus import crewai_event_bus
118
from crewai.events.types.agent_events import (
12-
AgentEvaluationStartedEvent,
139
AgentEvaluationCompletedEvent,
1410
AgentEvaluationFailedEvent,
11+
AgentEvaluationStartedEvent,
12+
LiteAgentExecutionCompletedEvent,
1513
)
16-
from crewai.experimental.evaluation import BaseEvaluator, create_evaluation_callbacks
17-
from collections.abc import Sequence
18-
from crewai.events.event_bus import crewai_event_bus
19-
from crewai.events.utils.console_formatter import ConsoleFormatter
2014
from crewai.events.types.task_events import TaskCompletedEvent
21-
from crewai.events.types.agent_events import LiteAgentExecutionCompletedEvent
15+
from crewai.events.utils.console_formatter import ConsoleFormatter
2216
from crewai.experimental.evaluation.base_evaluator import (
2317
AgentAggregatedEvaluationResult,
18+
AgentEvaluationResult,
19+
AggregationStrategy,
20+
BaseEvaluator,
2421
EvaluationScore,
2522
MetricCategory,
2623
)
24+
from crewai.experimental.evaluation.evaluation_display import EvaluationDisplayFormatter
25+
from crewai.experimental.evaluation.evaluation_listener import (
26+
create_evaluation_callbacks,
27+
)
28+
from crewai.task import Task
2729

2830

2931
class ExecutionState:
30-
current_agent_id: Optional[str] = None
31-
current_task_id: Optional[str] = None
32+
current_agent_id: str | None = None
33+
current_task_id: str | None = None
3234

3335
def __init__(self):
3436
self.traces = {}
@@ -40,10 +42,10 @@ def __init__(self):
4042
class AgentEvaluator:
4143
def __init__(
4244
self,
43-
agents: list[Agent],
45+
agents: list[Agent] | list[BaseAgent],
4446
evaluators: Sequence[BaseEvaluator] | None = None,
4547
):
46-
self.agents: list[Agent] = agents
48+
self.agents: list[Agent] | list[BaseAgent] = agents
4749
self.evaluators: Sequence[BaseEvaluator] | None = evaluators
4850

4951
self.callback = create_evaluation_callbacks()
@@ -75,7 +77,8 @@ def _subscribe_to_events(self) -> None:
7577
)
7678

7779
def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None:
78-
assert event.task is not None
80+
if event.task is None:
81+
raise ValueError("TaskCompletedEvent must have a task")
7982
agent = event.task.agent
8083
if (
8184
agent
@@ -92,9 +95,8 @@ def _handle_task_completed(self, source: Any, event: TaskCompletedEvent) -> None
9295
state.current_agent_id = str(agent.id)
9396
state.current_task_id = str(event.task.id)
9497

95-
assert (
96-
state.current_agent_id is not None and state.current_task_id is not None
97-
)
98+
if state.current_agent_id is None or state.current_task_id is None:
99+
raise ValueError("Agent ID and Task ID must not be None")
98100
trace = self.callback.get_trace(
99101
state.current_agent_id, state.current_task_id
100102
)
@@ -146,9 +148,8 @@ def _handle_lite_agent_completed(
146148
if not target_agent:
147149
return
148150

149-
assert (
150-
state.current_agent_id is not None and state.current_task_id is not None
151-
)
151+
if state.current_agent_id is None or state.current_task_id is None:
152+
raise ValueError("Agent ID and Task ID must not be None")
152153
trace = self.callback.get_trace(
153154
state.current_agent_id, state.current_task_id
154155
)
@@ -244,7 +245,7 @@ def display_evaluation_with_feedback(self) -> None:
244245

245246
def evaluate(
246247
self,
247-
agent: Agent,
248+
agent: Agent | BaseAgent,
248249
execution_trace: dict[str, Any],
249250
final_output: Any,
250251
state: ExecutionState,
@@ -255,7 +256,8 @@ def evaluate(
255256
task_id=state.current_task_id or (str(task.id) if task else "unknown_task"),
256257
)
257258

258-
assert self.evaluators is not None
259+
if self.evaluators is None:
260+
raise ValueError("Evaluators must be initialized")
259261
task_id = str(task.id) if task else None
260262
for evaluator in self.evaluators:
261263
try:
@@ -276,15 +278,15 @@ def evaluate(
276278
metric_category=evaluator.metric_category,
277279
score=score,
278280
)
279-
except Exception as e:
281+
except Exception as e: # noqa: PERF203
280282
self.emit_evaluation_failed_event(
281283
agent_role=agent.role,
282284
agent_id=str(agent.id),
283285
task_id=task_id,
284286
error=str(e),
285287
)
286288
self.console_formatter.print(
287-
f"Error in {evaluator.metric_category.value} evaluator: {str(e)}"
289+
f"Error in {evaluator.metric_category.value} evaluator: {e!s}"
288290
)
289291

290292
return result
@@ -337,14 +339,14 @@ def emit_evaluation_failed_event(
337339
)
338340

339341

340-
def create_default_evaluator(agents: list[Agent], llm: None = None):
342+
def create_default_evaluator(agents: list[Agent] | list[BaseAgent], llm: None = None):
341343
from crewai.experimental.evaluation import (
342344
GoalAlignmentEvaluator,
343-
SemanticQualityEvaluator,
344-
ToolSelectionEvaluator,
345345
ParameterExtractionEvaluator,
346-
ToolInvocationEvaluator,
347346
ReasoningEfficiencyEvaluator,
347+
SemanticQualityEvaluator,
348+
ToolInvocationEvaluator,
349+
ToolSelectionEvaluator,
348350
)
349351

350352
evaluators = [

0 commit comments

Comments
 (0)