Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion packages/traceloop-sdk/.flake8
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ exclude =
.venv,
.pytest_cache
max-line-length = 120
per-file-ignores = __init__.py:F401
per-file-ignores =
__init__.py:F401
traceloop/sdk/generated/**/*.py:E501
456 changes: 286 additions & 170 deletions packages/traceloop-sdk/poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/traceloop-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ mypy = "^1.18.2"
types-requests = "^2.31.0"
types-colorama = "^0.4.15"
pandas-stubs = "*"
datamodel-code-generator = "^0.26.0"

[tool.poetry.group.test.dependencies]
openai = "^1.31.1"
Expand Down
3 changes: 2 additions & 1 deletion packages/traceloop-sdk/traceloop/sdk/evaluator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from .evaluator import Evaluator
from .config import EvaluatorDetails
from .evaluators_made_by_traceloop import EvaluatorMadeByTraceloop
from .evaluators_made_by_traceloop import EvaluatorMadeByTraceloop, create_evaluator

__all__ = [
"Evaluator",
"EvaluatorDetails",
"EvaluatorMadeByTraceloop",
"create_evaluator",
]
24 changes: 24 additions & 0 deletions packages/traceloop-sdk/traceloop/sdk/evaluator/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import httpx
from typing import Dict, Optional, Any, List
from pydantic import ValidationError
from .field_mapping import normalize_task_output, get_field_suggestions, format_field_help

from .model import (
Expand All @@ -11,6 +12,25 @@
)
from .stream_client import SSEClient
from .config import EvaluatorDetails
from ..generated.evaluators import get_request_model


def _validate_evaluator_input(slug: str, input: Dict[str, str]) -> None:
"""Validate input against the evaluator's request model if available.

Args:
slug: The evaluator slug (e.g., "pii-detector")
input: Dictionary of input field names to values

Raises:
ValueError: If input fails validation against the request model
"""
request_model = get_request_model(slug)
if request_model:
try:
request_model(**input)
except ValidationError as e:
raise ValueError(f"Invalid input for '{slug}': {e}") from e


class Evaluator:
Expand Down Expand Up @@ -94,6 +114,8 @@ async def run_experiment_evaluator(
Returns:
ExecutionResponse: The evaluation result from SSE stream
"""
_validate_evaluator_input(evaluator_slug, input)

request = self._build_evaluator_request(
task_id, experiment_id, experiment_run_id, input, evaluator_version, evaluator_config
)
Expand Down Expand Up @@ -136,6 +158,8 @@ async def trigger_experiment_evaluator(
Returns:
str: The execution_id that can be used to check results later
"""
_validate_evaluator_input(evaluator_slug, input)

request = self._build_evaluator_request(
task_id, experiment_id, experiment_run_id, input, evaluator_version, evaluator_config
)
Expand Down
Loading