pyright and ruff

lorenss-m · lorenss-m · commit 6433c5927d19 · 2025-08-04T16:19:19.000-07:00
diff --git a/hud/adapters/common/tests/test_adapter.py b/hud/adapters/common/tests/test_adapter.py
@@ -40,7 +40,7 @@ def test_image():
     }
 
     if HAS_NUMPY:
-        img_array = np.array(img)
+        img_array = np.array(img)  # type: ignore
         result["array"] = img_array
 
     return result
diff --git a/hud/datasets.py b/hud/datasets.py
@@ -15,7 +15,7 @@
 if TYPE_CHECKING:
     from datasets import Dataset
 
-    from hud.mcp.base import BaseMCPAgent
+    from hud.mcp.base import AgentResult, BaseMCPAgent
 
 logger = logging.getLogger("hud.datasets")
 
@@ -143,9 +143,8 @@ async def run_dataset(
         ... )
     """
     # Import here to avoid circular imports
-    from hud.mcp.client import MCPClient
-
     import hud
+    from hud.mcp.client import MCPClient
 
     # Convert dataset to TaskConfigs internally
     tasks = to_taskconfigs(dataset)
@@ -159,23 +158,23 @@ async def run_dataset(
     with job(name, metadata=job_metadata):
         # Run tasks with semaphore for concurrency control
         sem = asyncio.Semaphore(max_concurrent)
-        results = [None] * len(tasks)
+        results: list[AgentResult | None] = [None] * len(tasks)
 
-        async def _worker(index: int, row: dict[str, Any]) -> None:
+        async def _worker(index: int, row: Any) -> None:
             async with sem:
                 task = row["task"]
 
                 # Create trace for this task
                 with hud.trace(f"task_{index}"):
                     # Create fresh MCP client per task
                     if task.mcp_config:
-                        client = MCPClient.from_dict({"mcp_config": task.mcp_config})
+                        client = MCPClient(mcp_config=task.mcp_config)
                         agent = agent_class(client=client, **(agent_config or {}))
 
                         try:
                             results[index] = await agent.run(task)
                         finally:
-                            await client.close_all_sessions()
+                            await client.close()
                     else:
                         logger.warning("Task %d has no mcp_config defined", index)
                         results[index] = None
diff --git a/hud/env/local_docker_client.py b/hud/env/local_docker_client.py
@@ -52,7 +52,7 @@ async def build_image(cls, build_context: Path) -> tuple[str, dict[str, Any]]:
                 "aiodocker is required for LocalDockerClient. "
                 "Please install it with 'pip install aiodocker'"
             )
-        docker_client = aiodocker.Docker()
+        docker_client = aiodocker.Docker()  # type: ignore
 
         # Create a tar file from the path
         tar_bytes = directory_to_tar_bytes(build_context)
@@ -99,7 +99,7 @@ async def create(
                 "aiodocker is required for LocalDockerClient. "
                 "Please install it with 'pip install aiodocker'"
             )
-        docker_client = aiodocker.Docker()
+        docker_client = aiodocker.Docker()  # type: ignore
 
         # Default host config
         if host_config is None:
@@ -173,7 +173,7 @@ async def _stream_logs() -> None:
         client._log_task = log_task  # type: ignore[attr-defined]
         return client
 
-    def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:
+    def __init__(self, docker_conn: aiodocker.Docker, container_id: str) -> None:  # type: ignore
         """
         Initialize the DockerClient.
 
@@ -261,7 +261,7 @@ async def execute(
         exec_result = await container.exec(
             cmd=command,
         )
-        output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)
+        output: Stream = exec_result.start(timeout=ClientTimeout(timeout), detach=False)  # type: ignore
 
         stdout_data = bytearray()
         stderr_data = bytearray()
diff --git a/hud/mcp/base.py b/hud/mcp/base.py
@@ -351,9 +351,9 @@ async def _run_task(self, task: TaskConfig, max_steps: int = 10) -> AgentResult:
                     and eval_result.structuredContent is not None
                 ):
                     return AgentResult(
-                        reward=eval_result.structuredContent.reward,
+                        reward=self._find_reward(eval_result),
                         done=True,
-                        content=eval_result.structuredContent.content,
+                        content=eval_result.structuredContent["content"],
                         messages=prompt_result.messages,
                     )
                 else:
@@ -377,6 +377,19 @@ async def _run_task(self, task: TaskConfig, max_steps: int = 10) -> AgentResult:
         except Exception as e:
             return AgentResult(reward=0.0, done=True, error=str(e))
 
+    def _find_reward(self, result: MCPToolResult) -> float:
+        """Find the reward in the result.
+
+        Agent accepts "reward", "grade", "score"
+
+        If not found, return 0.0
+        """
+        accept_keys = ["reward", "grade", "score"]
+        for key in accept_keys:
+            if isinstance(result.structuredContent, dict) and key in result.structuredContent:
+                return result.structuredContent[key]
+        return 0.0
+
     def _format_error_result(self, error_message: str) -> MCPToolResult:
         return MCPToolResult(
             content=[types.TextContent(text=error_message, type="text")], isError=True
diff --git a/hud/mcp/claude.py b/hud/mcp/claude.py
@@ -198,11 +198,12 @@ async def get_model_response(
                 mcp_tool_name = self._claude_to_mcp_tool_map.get(block.name, block.name)
 
                 # Create MCPToolCall object with Claude metadata as extra fields
+                # Pyright will complain but the tool class accepts extra fields
                 tool_call = MCPToolCall(
                     name=mcp_tool_name,
                     arguments=block.input,
-                    tool_use_id=block.id,  # Extra field for format_tool_results
-                    claude_name=block.name,  # Keep original Claude name
+                    tool_use_id=block.id,  # type: ignore
+                    claude_name=block.name,  # type: ignore
                 )
                 result.tool_calls.append(tool_call)
                 result.done = False
diff --git a/hud/mcp/client.py b/hud/mcp/client.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING, Any
 
 from mcp_use.client import MCPClient as MCPUseClient
+from pydantic import AnyUrl
 
 if TYPE_CHECKING:
     from typing import Self
@@ -137,12 +138,11 @@ async def discover_tools(self) -> list[types.Tool]:
                 # Log detailed tool info in verbose mode
                 if self.verbose:
                     for tool in tools_result.tools:
+                        description = tool.description or ""
                         logger.debug(
                             "  Tool '%s': %s",
                             tool.name,
-                            tool.description[:100] + "..."
-                            if len(tool.description) > 100
-                            else tool.description,
+                            description[:100] + "..." if len(description) > 100 else description,
                         )
 
             except Exception as e:
@@ -170,10 +170,10 @@ async def fetch_telemetry(self) -> dict[str, Any]:
                 # Try to read telemetry resource
                 try:
                     result = await session.connector.client_session.read_resource(
-                        "telemetry://live"
+                        AnyUrl("telemetry://live")
                     )
                     if result and result.contents and len(result.contents) > 0:
-                        telemetry_data = json.loads(result.contents[0].text)
+                        telemetry_data = json.loads(result.contents[0].text)  # type: ignore
                         self._telemetry_data[server_name] = telemetry_data
 
                         logger.info("📡 Telemetry data from server '%s':", server_name)
@@ -232,6 +232,9 @@ async def call_tool(
                 json.dumps(arguments, indent=2) if arguments else "None",
             )
 
+        if session.connector.client_session is None:
+            raise ValueError(f"Client session not initialized for {server_name}")
+
         result = await session.connector.client_session.call_tool(
             name=tool_name, arguments=arguments or {}
         )
@@ -241,7 +244,7 @@ async def call_tool(
 
         return result
 
-    async def read_resource(self, uri: str) -> types.ReadResourceResult | None:
+    async def read_resource(self, uri: AnyUrl) -> types.ReadResourceResult | None:
         """
         Read a resource by URI from any server that provides it.
 
@@ -301,12 +304,7 @@ def get_all_active_sessions(self) -> dict[str, MCPUseSession]:
 
     async def close(self) -> None:
         """Close all active sessions."""
-        for session in self._sessions.values():
-            try:
-                if hasattr(session, "close"):
-                    await session.close()
-            except Exception as e:
-                logger.error("Error closing session: %s", e)
+        await self._mcp_client.close_all_sessions()
 
         self._sessions = {}
         self._available_tools = []
diff --git a/hud/mcp/langchain.py b/hud/mcp/langchain.py
@@ -47,7 +47,11 @@ def __init__(
         self.adapter = LangChainAdapter(disallowed_tools=self.disallowed_tools)
         self._langchain_tools: list[BaseTool] | None = None
 
-        self.model_name = "langchain-" + self.llm.model_name
+        self.model_name = (
+            "langchain-" + self.llm.model_name  # type: ignore
+            if hasattr(self.llm, "model_name")
+            else "unknown"
+        )
 
     def _get_langchain_tools(self) -> list[BaseTool]:
         """Get or create LangChain tools from MCP tools."""
diff --git a/hud/mcp/openai.py b/hud/mcp/openai.py
@@ -19,7 +19,7 @@
 
 from hud.settings import settings
 
-from .base import BaseMCPAgent, ModelResponse
+from .base import AgentResult, BaseMCPAgent, ModelResponse
 
 if TYPE_CHECKING:
     from hud.datasets import TaskConfig
@@ -92,9 +92,7 @@ def __init__(
         Remember: You are expected to complete tasks autonomously. The user trusts you to do what they asked.
         """  # noqa: E501
 
-    async def run(
-        self, prompt_or_task: str | TaskConfig, max_steps: int = 10
-    ) -> dict[str, Any]:
+    async def run(self, prompt_or_task: str | TaskConfig, max_steps: int = 10) -> AgentResult:
         """
         Run the agent with the given prompt or task.
 
@@ -260,11 +258,12 @@ async def get_model_response(self, messages: list[Any], step: int) -> ModelRespo
                 action = computer_call.action.model_dump()
 
                 # Create MCPToolCall object with OpenAI metadata as extra fields
+                # Pyright will complain but the tool class accepts extra fields
                 tool_call = MCPToolCall(
                     name=computer_tool_name,
                     arguments=action,
-                    call_id=computer_call.call_id,  # Extra field for format_tool_results
-                    pending_safety_checks=computer_call.pending_safety_checks,
+                    call_id=computer_call.call_id,  # type: ignore
+                    pending_safety_checks=computer_call.pending_safety_checks,  # type: ignore
                 )
                 result.tool_calls.append(tool_call)
         else:
diff --git a/hud/mcp/tests/test_base.py b/hud/mcp/tests/test_base.py
@@ -7,6 +7,7 @@
 
 import pytest
 from mcp import types
+from mcp.types import CallToolRequestParams as MCPToolCall
 
 from hud.mcp.base import BaseMCPAgent
 from hud.tools.executors.base import BaseExecutor
@@ -102,7 +103,7 @@ def test_init_with_params(self):
     async def test_initialize_no_client(self):
         """Test initialize fails without client."""
         agent = MockMCPAgent()
-        agent.client = None
+        agent.client = None  # type: ignore
 
         with pytest.raises(ValueError, match="Client is not initialized"):
             await agent.initialize()
@@ -218,12 +219,12 @@ async def mock_call_tool(name, args):
 
         assert agent.client is not None
         agent.client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
-        agent.client.get_session = MagicMock(return_value=mock_session)
 
         await agent.initialize()
 
         # Call the tool
-        result = await agent.call_tool({"name": "test_tool", "arguments": {"param": "value"}})
+        tool_call = MCPToolCall(name="test_tool", arguments={"param": "value"})
+        result = await agent.call_tool(tool_call)
 
         assert result == mock_result
         assert not result.isError
@@ -247,15 +248,17 @@ async def mock_list_tools():
 
         # Try to call unknown tool
         with pytest.raises(ValueError, match="Tool 'unknown_tool' not found"):
-            await agent.call_tool({"name": "unknown_tool", "arguments": {}})
+            tool_call = MCPToolCall(name="unknown_tool", arguments={})
+            await agent.call_tool(tool_call)
 
     @pytest.mark.asyncio
     async def test_call_tool_no_name(self):
         """Test calling tool without name."""
-        agent = MockMCPAgent()
+        from pydantic import ValidationError
 
-        with pytest.raises(ValueError, match="Tool call must have a 'name' field"):
-            await agent.call_tool({"arguments": {}})
+        # MCPToolCall requires name, so it will raise ValidationError
+        with pytest.raises(ValidationError):
+            MCPToolCall(name="", arguments={})  # Empty name should fail validation
 
     def test_get_system_prompt_default(self):
         """Test get_system_prompt with default settings."""
@@ -362,34 +365,14 @@ async def mock_call_tool(name, args):
 
         assert agent.client is not None
         agent.client.get_all_active_sessions = MagicMock(return_value={"server1": mock_session})
-        agent.client.get_session = MagicMock(return_value=mock_session)
 
         await agent.initialize()
 
         screenshot = await agent.capture_screenshot()
         assert screenshot == "base64imagedata"
 
-    def test_process_tool_results_extracts_text(self):
-        """Test processing tool results extracts text content."""
-        agent = MockMCPAgent()
-
-        # Create a proper CallToolResult object
-        result = types.CallToolResult(
-            content=[
-                types.TextContent(type="text", text="Result text"),
-                types.ImageContent(type="image", data="imagedata", mimeType="image/png"),
-            ],
-            isError=False,
-        )
-
-        tool_results = [{"tool_name": "test_tool", "result": result}]
-
-        processed = agent.process_tool_results(tool_results)
-
-        assert "text" in processed
-        assert "Result text" in processed["text"]
-        assert "results" in processed
-        assert len(processed["results"]) == 1
+    # process_tool_results method was removed from base class
+    # This functionality is now handled internally
 
     def test_get_tools_by_server(self):
         """Test getting tools grouped by server."""
diff --git a/hud/task.py b/hud/task.py
diff --git a/hud/taskset.py b/hud/taskset.py

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ def test_image():`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`if HAS_NUMPY:`
`43`		`- img_array = np.array(img)`
	`43`	`+ img_array = np.array(img) # type: ignore`
`44`	`44`	`result["array"] = img_array`
`45`	`45`
`46`	`46`	`return result`