-
Notifications
You must be signed in to change notification settings - Fork 0
feat: feedback tool #36
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
123e04d
077139d
a3037c7
7349349
8a0eaad
fdaee7b
81c14bc
d7dbf5c
c4bd88e
ab50631
59e792e
2f702d7
4e4d527
12afe70
e583a41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,14 @@ | ||
| """StackOne AI SDK""" | ||
|
|
||
| from .implicit_feedback import configure_implicit_feedback, get_implicit_feedback_manager | ||
| from .models import StackOneTool, Tools | ||
| from .toolset import StackOneToolSet | ||
|
|
||
| __all__ = ["StackOneToolSet", "StackOneTool", "Tools"] | ||
| __all__ = [ | ||
| "StackOneToolSet", | ||
| "StackOneTool", | ||
| "Tools", | ||
| "configure_implicit_feedback", | ||
| "get_implicit_feedback_manager", | ||
| ] | ||
| __version__ = "0.3.2" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| """Feedback collection tool for StackOne.""" | ||
|
|
||
| from .tool import create_feedback_tool | ||
|
|
||
| __all__ = ["create_feedback_tool"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| """Feedback collection tool for StackOne.""" | ||
|
|
||
| # TODO: Remove when Python 3.9 support is dropped | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
|
|
||
| from pydantic import BaseModel, Field, field_validator | ||
|
|
||
| from ..models import ( | ||
| ExecuteConfig, | ||
| JsonDict, | ||
| ParameterLocation, | ||
| StackOneError, | ||
| StackOneTool, | ||
| ToolParameters, | ||
| ) | ||
|
|
||
|
|
||
| class FeedbackInput(BaseModel): | ||
| """Input schema for feedback tool.""" | ||
|
|
||
| feedback: str = Field(..., min_length=1, description="User feedback text") | ||
| account_id: str = Field(..., min_length=1, description="Account identifier") | ||
| tool_names: list[str] = Field(..., min_length=1, description="List of tool names") | ||
|
|
||
| @field_validator("feedback", "account_id") | ||
| @classmethod | ||
| def validate_non_empty_trimmed(cls, v: str) -> str: | ||
| """Validate that string is non-empty after trimming.""" | ||
| trimmed = v.strip() | ||
| if not trimmed: | ||
| raise ValueError("Field must be a non-empty string") | ||
| return trimmed | ||
|
|
||
| @field_validator("tool_names") | ||
| @classmethod | ||
| def validate_tool_names(cls, v: list[str]) -> list[str]: | ||
| """Validate and clean tool names.""" | ||
| cleaned = [name.strip() for name in v if name.strip()] | ||
| if not cleaned: | ||
| raise ValueError("At least one tool name is required") | ||
| return cleaned | ||
|
|
||
|
|
||
| class FeedbackTool(StackOneTool): | ||
| """Extended tool for collecting feedback with enhanced validation.""" | ||
|
|
||
| def execute( | ||
| self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None | ||
| ) -> JsonDict: | ||
| """ | ||
| Execute the feedback tool with enhanced validation. | ||
| Args: | ||
| arguments: Tool arguments as string or dict | ||
| options: Execution options | ||
| Returns: | ||
| Response from the API | ||
| Raises: | ||
| StackOneError: If validation or API call fails | ||
| """ | ||
| try: | ||
| # Parse input | ||
| if isinstance(arguments, str): | ||
| raw_params = json.loads(arguments) | ||
| else: | ||
| raw_params = arguments or {} | ||
|
|
||
| # Validate with Pydantic | ||
| parsed_params = FeedbackInput(**raw_params) | ||
|
|
||
| # Build validated request body | ||
| validated_arguments = { | ||
| "feedback": parsed_params.feedback, | ||
| "account_id": parsed_params.account_id, | ||
| "tool_names": parsed_params.tool_names, | ||
| } | ||
|
|
||
| # Use the parent execute method with validated arguments | ||
| return super().execute(validated_arguments, options=options) | ||
|
|
||
| except json.JSONDecodeError as exc: | ||
| raise StackOneError(f"Invalid JSON in arguments: {exc}") from exc | ||
| except ValueError as exc: | ||
| raise StackOneError(f"Validation error: {exc}") from exc | ||
| except Exception as error: | ||
| if isinstance(error, StackOneError): | ||
| raise | ||
| raise StackOneError(f"Error executing feedback tool: {error}") from error | ||
|
Comment on lines
69
to
157
|
||
|
|
||
|
|
||
| def create_feedback_tool( | ||
| api_key: str, | ||
| account_id: str | None = None, | ||
| base_url: str = "https://api.stackone.com", | ||
| ) -> FeedbackTool: | ||
| """ | ||
| Create a feedback collection tool. | ||
| Args: | ||
| api_key: API key for authentication | ||
| account_id: Optional account ID | ||
| base_url: Base URL for the API | ||
| Returns: | ||
| FeedbackTool configured for feedback collection | ||
| """ | ||
| name = "meta_collect_tool_feedback" | ||
| description = ( | ||
| "Collects user feedback on StackOne tool performance. " | ||
| "First ask the user, \"Are you ok with sending feedback to StackOne?\" " | ||
| "and mention that the LLM will take care of sending it. " | ||
| "Call this tool only when the user explicitly answers yes." | ||
| ) | ||
|
|
||
| parameters = ToolParameters( | ||
| type="object", | ||
| properties={ | ||
| "account_id": { | ||
| "type": "string", | ||
| "description": 'Account identifier (e.g., "acc_123456")', | ||
| }, | ||
| "feedback": { | ||
| "type": "string", | ||
| "description": "Verbatim feedback from the user about their experience with StackOne tools.", | ||
| }, | ||
| "tool_names": { | ||
| "type": "array", | ||
| "items": { | ||
| "type": "string", | ||
| }, | ||
| "description": "Array of tool names being reviewed", | ||
| }, | ||
| }, | ||
| ) | ||
|
|
||
| execute_config = ExecuteConfig( | ||
| name=name, | ||
| method="POST", | ||
| url=f"{base_url}/ai/tool-feedback", | ||
| body_type="json", | ||
| parameter_locations={ | ||
| "feedback": ParameterLocation.BODY, | ||
| "account_id": ParameterLocation.BODY, | ||
| "tool_names": ParameterLocation.BODY, | ||
| }, | ||
| ) | ||
|
|
||
| # Create instance by calling parent class __init__ directly since FeedbackTool is a subclass | ||
| tool = FeedbackTool.__new__(FeedbackTool) | ||
| StackOneTool.__init__( | ||
| tool, | ||
| description=description, | ||
| parameters=parameters, | ||
| _execute_config=execute_config, | ||
| _api_key=api_key, | ||
| _account_id=account_id, | ||
| ) | ||
|
|
||
| return tool | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| """Implicit feedback instrumentation for the StackOne Python SDK.""" | ||
|
|
||
| from .analyzer import BehaviorAnalyzer, BehaviorAnalyzerConfig | ||
| from .data import ImplicitFeedbackEvent, ToolCallQualitySignals, ToolCallRecord | ||
| from .langsmith_client import LangsmithFeedbackClient | ||
| from .manager import ImplicitFeedbackManager, configure_implicit_feedback, get_implicit_feedback_manager | ||
| from .session import SessionTracker | ||
|
|
||
| __all__ = [ | ||
| "BehaviorAnalyzer", | ||
| "BehaviorAnalyzerConfig", | ||
| "ImplicitFeedbackEvent", | ||
| "ImplicitFeedbackManager", | ||
| "LangsmithFeedbackClient", | ||
| "SessionTracker", | ||
| "ToolCallQualitySignals", | ||
| "ToolCallRecord", | ||
| "configure_implicit_feedback", | ||
| "get_implicit_feedback_manager", | ||
| ] | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
| from typing import Sequence | ||
|
|
||
| from .data import ToolCallQualitySignals, ToolCallRecord | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class BehaviorAnalyzerConfig: | ||
| quick_refinement_window_seconds: float = 12.0 | ||
| task_switch_window_seconds: float = 180.0 | ||
| failure_penalty: float = 0.3 | ||
| quick_refinement_penalty: float = 0.25 | ||
| task_switch_penalty: float = 0.2 | ||
|
|
||
|
|
||
| class BehaviorAnalyzer: | ||
| """Derive behavioural quality signals from a stream of tool calls.""" | ||
|
|
||
| def __init__(self, config: BehaviorAnalyzerConfig | None = None) -> None: | ||
| self._config = config or BehaviorAnalyzerConfig() | ||
|
|
||
| def analyze(self, history: Sequence[ToolCallRecord], current: ToolCallRecord) -> ToolCallQualitySignals: | ||
| """Compute quality signals for a tool call.""" | ||
|
|
||
| session_history = [call for call in history if call.session_id == current.session_id and call.call_id != current.call_id] | ||
|
|
||
| quick_refinement, refinement_window = self._detect_quick_refinement(session_history, current) | ||
| task_switch = self._detect_task_switch(session_history, current) | ||
| suitability_score = self._compute_suitability_score(current.status, quick_refinement, task_switch) | ||
|
|
||
| return ToolCallQualitySignals( | ||
| quick_refinement=quick_refinement, | ||
| task_switch=task_switch, | ||
| suitability_score=suitability_score, | ||
| refinement_window_seconds=refinement_window, | ||
| ) | ||
|
|
||
| def _detect_quick_refinement( | ||
| self, history: Sequence[ToolCallRecord], current: ToolCallRecord | ||
| ) -> tuple[bool, float | None]: | ||
| if not current.session_id or not history: | ||
|
||
| return False, None | ||
|
|
||
| last_event = history[-1] | ||
| elapsed = (current.start_time - last_event.end_time).total_seconds() | ||
| if elapsed < 0: | ||
| # Ignore out-of-order events | ||
| return False, None | ||
|
|
||
| if ( | ||
| last_event.tool_name == current.tool_name | ||
| and elapsed <= self._config.quick_refinement_window_seconds | ||
| ): | ||
| return True, elapsed | ||
|
|
||
| return False, None | ||
|
|
||
| def _detect_task_switch(self, history: Sequence[ToolCallRecord], current: ToolCallRecord) -> bool: | ||
| if not current.session_id or not history: | ||
| return False | ||
|
|
||
| for previous in reversed(history): | ||
| elapsed = (current.start_time - previous.end_time).total_seconds() | ||
| if elapsed < 0: | ||
| continue | ||
| if elapsed > self._config.task_switch_window_seconds: | ||
| break | ||
| if previous.tool_name != current.tool_name: | ||
| return True | ||
|
|
||
| return False | ||
|
|
||
| def _compute_suitability_score(self, status: str, quick_refinement: bool, task_switch: bool) -> float: | ||
| score = 1.0 | ||
| if status != "success": | ||
| score -= self._config.failure_penalty | ||
| if quick_refinement: | ||
| score -= self._config.quick_refinement_penalty | ||
| if task_switch: | ||
| score -= self._config.task_switch_penalty | ||
| return max(0.0, min(1.0, score)) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Example passes a filesystem path as api_key; configure_implicit_feedback expects the actual LangSmith API key string, not a file path. Replace '/path/to/langsmith.key' with an API key value or adjust the example to show reading the key file first.