diff --git a/docs/docs.json b/docs/docs.json index d3f7332c..71392f4e 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -33,7 +33,7 @@ "icon": "code", "versions": [ { - "version": "0.5.3", + "version": "0.5.4", "groups": [ { "group": "Get Started", diff --git a/hud/agents/claude.py b/hud/agents/claude.py index 1fc41559..7c7bc58a 100644 --- a/hud/agents/claude.py +++ b/hud/agents/claude.py @@ -76,10 +76,18 @@ def __init__(self, params: ClaudeCreateParams | None = None, **kwargs: Any) -> N model_client = self.config.model_client if model_client is None: - api_key = settings.anthropic_api_key - if not api_key: - raise ValueError("Anthropic API key not found. Set ANTHROPIC_API_KEY.") - model_client = AsyncAnthropic(api_key=api_key) + # Default to HUD gateway when HUD_API_KEY is available + if settings.api_key: + from hud.agents.gateway import build_gateway_client + + model_client = build_gateway_client("anthropic") + elif settings.anthropic_api_key: + model_client = AsyncAnthropic(api_key=settings.anthropic_api_key) + else: + raise ValueError( + "No API key found. Set HUD_API_KEY for HUD gateway, " + "or ANTHROPIC_API_KEY for direct Anthropic access." + ) self.anthropic_client = model_client self.max_tokens = self.config.max_tokens diff --git a/hud/agents/gemini.py b/hud/agents/gemini.py index d9cfeb71..c069ba65 100644 --- a/hud/agents/gemini.py +++ b/hud/agents/gemini.py @@ -61,10 +61,18 @@ def __init__(self, params: GeminiCreateParams | None = None, **kwargs: Any) -> N model_client = self.config.model_client if model_client is None: - api_key = settings.gemini_api_key - if not api_key: - raise ValueError("Gemini API key not found. Set GEMINI_API_KEY.") - model_client = genai.Client(api_key=api_key) + # Default to HUD gateway when HUD_API_KEY is available + if settings.api_key: + from hud.agents.gateway import build_gateway_client + + model_client = build_gateway_client("gemini") + elif settings.gemini_api_key: + model_client = genai.Client(api_key=settings.gemini_api_key) + else: + raise ValueError( + "No API key found. Set HUD_API_KEY for HUD gateway, " + "or GEMINI_API_KEY for direct Gemini access." + ) if self.config.validate_api_key: try: diff --git a/hud/agents/openai.py b/hud/agents/openai.py index 84e7c85c..c47b798e 100644 --- a/hud/agents/openai.py +++ b/hud/agents/openai.py @@ -79,10 +79,18 @@ def __init__(self, params: OpenAICreateParams | None = None, **kwargs: Any) -> N model_client = self.config.model_client if model_client is None: - api_key = settings.openai_api_key - if not api_key: - raise ValueError("OpenAI API key not found. Set OPENAI_API_KEY.") - model_client = AsyncOpenAI(api_key=api_key) + # Default to HUD gateway when HUD_API_KEY is available + if settings.api_key: + from hud.agents.gateway import build_gateway_client + + model_client = build_gateway_client("openai") + elif settings.openai_api_key: + model_client = AsyncOpenAI(api_key=settings.openai_api_key) + else: + raise ValueError( + "No API key found. Set HUD_API_KEY for HUD gateway, " + "or OPENAI_API_KEY for direct OpenAI access." + ) if self.config.validate_api_key: try: diff --git a/hud/agents/tests/test_openai.py b/hud/agents/tests/test_openai.py index cae578da..d1e5e460 100644 --- a/hud/agents/tests/test_openai.py +++ b/hud/agents/tests/test_openai.py @@ -128,8 +128,9 @@ async def test_init_with_parameters(self, mock_openai: AsyncOpenAI) -> None: async def test_init_without_client_no_api_key(self) -> None: """Test agent initialization fails without API key.""" with patch("hud.agents.openai.settings") as mock_settings: + mock_settings.api_key = None mock_settings.openai_api_key = None - with pytest.raises(ValueError, match="OpenAI API key not found"): + with pytest.raises(ValueError, match="No API key found"): OpenAIAgent.create() @pytest.mark.asyncio diff --git a/hud/environment/environment.py b/hud/environment/environment.py index 4ed44b32..62b68dd2 100644 --- a/hud/environment/environment.py +++ b/hud/environment/environment.py @@ -129,6 +129,7 @@ def __init__( super().__init__(name=name, instructions=instructions, **fastmcp_kwargs) self._connections: dict[str, Connector] = {} self._router = ToolRouter(conflict_resolution=conflict_resolution) + self._routing_built = False # Track if _build_routing has been called self._in_context = False # Tool call queues - run after connections established @@ -361,6 +362,7 @@ async def __aexit__( if self._connections: await asyncio.gather(*[c.disconnect() for c in self._connections.values()]) self._router.clear() + self._routing_built = False async def run_async( self, @@ -389,6 +391,7 @@ async def _build_routing(self) -> None: connections=self._connections, connection_order=list(self._connections.keys()), ) + self._routing_built = True # Populate mock schemas for auto-generated mock values self._populate_mock_schemas() @@ -406,6 +409,8 @@ def _setup_handlers(self) -> None: async def _env_list_tools(self) -> list[mcp_types.Tool]: """Return all tools including those from connectors.""" + if not self._routing_built: + await self._build_routing() return self._router.tools async def _env_call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> list[Any]: diff --git a/hud/environment/scenarios.py b/hud/environment/scenarios.py index 566422d3..8f9e9f14 100644 --- a/hud/environment/scenarios.py +++ b/hud/environment/scenarios.py @@ -199,8 +199,23 @@ async def run_scenario_setup(self, scenario_name: str, args: dict[str, Any]) -> except Exception: available = "(could not fetch available scenarios)" + # Check if the prompt exists - if so, the error is something else + original_error = str(e) + if prompt_id in scenario_prompts: + # Prompt exists but get_prompt failed for another reason + raise ValueError( + f"⚠️ ERROR: Scenario '{prompt_id}' exists but failed to execute.\n\n" + f"The scenario was found but encountered an error during setup:\n" + f" {original_error}\n\n" + f"This could be caused by:\n" + f" - Missing or invalid scenario arguments\n" + f" - An error in the scenario's setup function\n" + f" - Connection or serialization issues\n\n" + f"Check the scenario definition and required arguments." + ) from e + raise ValueError( - f"Scenario not found.\n\n" + f"⚠️ ERROR: Scenario not found.\n\n" f"Scenario IDs have the format 'environment_name:scenario_name'.\n" f"If you only specify 'scenario_name', the SDK uses your task's env name " f"as the prefix.\n" @@ -362,7 +377,7 @@ def decorator( # Only include JSON-serializable defaults default_val = p.default if default_val is None or isinstance( - default_val, (str, int, float, bool, list, dict) + default_val, (str | int | float | bool | list | dict) ): arg_info["default"] = default_val @@ -413,26 +428,51 @@ async def prompt_handler(**handler_args: Any) -> list[str]: # Deserialize JSON-encoded arguments using Pydantic TypeAdapter # This properly handles: Pydantic models, enums, datetime, lists, dicts + # MCP prompts only support string arguments, so we JSON-serialize complex + # types on the sending side and deserialize them here deserialized_args: dict[str, Any] = {} for arg_name, arg_value in handler_args.items(): annotation = param_annotations.get(arg_name) - if ( - annotation is not None - and annotation is not str - and isinstance(arg_value, str) - ): - # Try TypeAdapter.validate_json for proper type coercion + + # Only attempt deserialization on string values + if not isinstance(arg_value, str): + deserialized_args[arg_name] = arg_value + continue + + # If annotation is explicitly str, keep as string (no deserialization) + if annotation is str: + deserialized_args[arg_name] = arg_value + continue + + # If we have a non-str type annotation, use TypeAdapter + if annotation is not None: try: adapter = TypeAdapter(annotation) deserialized_args[arg_name] = adapter.validate_json(arg_value) - except Exception: - # Fall back to plain json.loads if TypeAdapter fails - try: - deserialized_args[arg_name] = json.loads(arg_value) - except json.JSONDecodeError: - deserialized_args[arg_name] = arg_value - else: - deserialized_args[arg_name] = arg_value + continue + except Exception: # noqa: S110 + pass # Fall through to generic JSON decode + + # No type annotation - try JSON decode for strings that look like JSON + # (arrays, objects, numbers, booleans, null) + stripped = arg_value.strip() + if (stripped and stripped[0] in "[{") or stripped in ("true", "false", "null"): + try: + deserialized_args[arg_name] = json.loads(arg_value) + continue + except json.JSONDecodeError: + pass # Keep as string + + # Also try to decode if it looks like a number + if stripped.lstrip("-").replace(".", "", 1).isdigit(): + try: + deserialized_args[arg_name] = json.loads(arg_value) + continue + except json.JSONDecodeError: + pass + + # Keep as string + deserialized_args[arg_name] = arg_value # Create generator instance with deserialized args gen = scenario_fn(**deserialized_args) diff --git a/hud/utils/tests/test_version.py b/hud/utils/tests/test_version.py index 1d40fb59..9d1d34ff 100644 --- a/hud/utils/tests/test_version.py +++ b/hud/utils/tests/test_version.py @@ -5,4 +5,4 @@ def test_import(): """Test that the package can be imported.""" import hud - assert hud.__version__ == "0.5.3" + assert hud.__version__ == "0.5.4" diff --git a/hud/version.py b/hud/version.py index c9ba33a6..68167e84 100644 --- a/hud/version.py +++ b/hud/version.py @@ -4,4 +4,4 @@ from __future__ import annotations -__version__ = "0.5.3" +__version__ = "0.5.4" diff --git a/pyproject.toml b/pyproject.toml index b1ad8b5a..a6092da9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "hud-python" -version = "0.5.3" +version = "0.5.4" description = "SDK for the HUD platform." readme = "README.md" requires-python = ">=3.11, <3.13"