Skip to content

Commit a6d3fa9

Browse files
authored
fix(evals): auto generate evals (#3529)
1 parent 9290933 commit a6d3fa9

File tree

14 files changed

+1593
-882
lines changed

14 files changed

+1593
-882
lines changed

packages/traceloop-sdk/.flake8

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,6 @@ exclude =
99
.venv,
1010
.pytest_cache
1111
max-line-length = 120
12-
per-file-ignores = __init__.py:F401
12+
per-file-ignores =
13+
__init__.py:F401
14+
traceloop/sdk/generated/**/*.py:E501

packages/traceloop-sdk/poetry.lock

Lines changed: 286 additions & 170 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/traceloop-sdk/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ mypy = "^1.18.2"
8383
types-requests = "^2.31.0"
8484
types-colorama = "^0.4.15"
8585
pandas-stubs = "*"
86+
datamodel-code-generator = "^0.26.0"
8687

8788
[tool.poetry.group.test.dependencies]
8889
openai = "^1.31.1"
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from .evaluator import Evaluator
22
from .config import EvaluatorDetails
3-
from .evaluators_made_by_traceloop import EvaluatorMadeByTraceloop
3+
from .evaluators_made_by_traceloop import EvaluatorMadeByTraceloop, create_evaluator
44

55
__all__ = [
66
"Evaluator",
77
"EvaluatorDetails",
88
"EvaluatorMadeByTraceloop",
9+
"create_evaluator",
910
]

packages/traceloop-sdk/traceloop/sdk/evaluator/evaluator.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import httpx
22
from typing import Dict, Optional, Any, List
3+
from pydantic import ValidationError
34
from .field_mapping import normalize_task_output, get_field_suggestions, format_field_help
45

56
from .model import (
@@ -11,6 +12,25 @@
1112
)
1213
from .stream_client import SSEClient
1314
from .config import EvaluatorDetails
15+
from ..generated.evaluators import get_request_model
16+
17+
18+
def _validate_evaluator_input(slug: str, input: Dict[str, str]) -> None:
19+
"""Validate input against the evaluator's request model if available.
20+
21+
Args:
22+
slug: The evaluator slug (e.g., "pii-detector")
23+
input: Dictionary of input field names to values
24+
25+
Raises:
26+
ValueError: If input fails validation against the request model
27+
"""
28+
request_model = get_request_model(slug)
29+
if request_model:
30+
try:
31+
request_model(**input)
32+
except ValidationError as e:
33+
raise ValueError(f"Invalid input for '{slug}': {e}") from e
1434

1535

1636
class Evaluator:
@@ -94,6 +114,8 @@ async def run_experiment_evaluator(
94114
Returns:
95115
ExecutionResponse: The evaluation result from SSE stream
96116
"""
117+
_validate_evaluator_input(evaluator_slug, input)
118+
97119
request = self._build_evaluator_request(
98120
task_id, experiment_id, experiment_run_id, input, evaluator_version, evaluator_config
99121
)
@@ -136,6 +158,8 @@ async def trigger_experiment_evaluator(
136158
Returns:
137159
str: The execution_id that can be used to check results later
138160
"""
161+
_validate_evaluator_input(evaluator_slug, input)
162+
139163
request = self._build_evaluator_request(
140164
task_id, experiment_id, experiment_run_id, input, evaluator_version, evaluator_config
141165
)

0 commit comments

Comments
 (0)