Converting bad ToolRequestMessage from agent LLM into MalformedMessageError (#302)

jamesbraza · web-flow · commit b1b1a3e4f272 · 2025-10-09T11:46:30.000-07:00
diff --git a/src/aviary/tools/utils.py b/src/aviary/tools/utils.py
@@ -3,7 +3,7 @@
 from functools import partial
 from typing import TYPE_CHECKING, Any, ClassVar, cast
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, ValidationError
 
 from aviary.message import MalformedMessageError, Message
 
@@ -103,24 +103,32 @@ async def __call__(
 
         if (num_choices := len(model_response.choices)) != 1:
             raise MalformedMessageError(
-                f"Expected one choice in LiteLLM model response, got {num_choices}"
+                f"Expected one choice in model response, got {num_choices}"
                 f" choices, full response was {model_response}."
             )
         choice = model_response.choices[0]
         if choice.finish_reason not in expected_finish_reason:
             raise MalformedMessageError(
-                f"Expected a finish reason in {expected_finish_reason} in LiteLLM"
+                f"Expected a finish reason in {expected_finish_reason} in"
                 f" model response, got finish reason {choice.finish_reason!r}, full"
-                f" response was {model_response} and tool choice was {tool_choice}."
+                f" response was {model_response} and tool choice was {tool_choice!r}."
             )
         usage = model_response.usage
-        selection = ToolRequestMessage(
-            **choice.message.model_dump(),
-            info={
-                "usage": (usage.prompt_tokens, usage.completion_tokens),
-                "model": self._model_name,
-            },
-        )
+        try:
+            selection = ToolRequestMessage(
+                **choice.message.model_dump(),
+                info={
+                    "usage": (usage.prompt_tokens, usage.completion_tokens),
+                    "model": self._model_name,
+                },
+            )
+        except ValidationError as exc:
+            raise MalformedMessageError(
+                f"Failed to convert model response's message {choice.message}"
+                f" into a tool request message."
+                f" Got finish reason {choice.finish_reason!r}, full"
+                f" response was {model_response} and tool choice was {tool_choice!r}."
+            ) from exc
         if self._ledger is not None:
             self._ledger.messages.append(selection)
         return selection
diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -10,7 +10,7 @@
 import litellm
 import pytest
 from httpx import ASGITransport, AsyncClient
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from pytest_subtests import SubTests
 
 from aviary.core import (
@@ -30,6 +30,7 @@
     ToolSelectorLedger,
 )
 from aviary.dataset_server import TaskDatasetServer
+from aviary.message import MalformedMessageError
 from aviary.tools import FunctionInfo, Messages
 from tests import CILLMModelNames
 from tests.conftest import VCR_DEFAULT_MATCH_ON
@@ -483,6 +484,31 @@ async def inner1() -> None:  # noqa: RUF029
             "Expected sub-exceptions to be displayed"
         )
 
+    @pytest.mark.asyncio
+    async def test_tool_selector_bad_agent_llm_response(
+        self, dummy_env: DummyEnv
+    ) -> None:
+        obs, tools = await dummy_env.reset()
+
+        async def stub_acompletion(*_, **__) -> litellm.ModelResponse:  # noqa: RUF029
+            return litellm.ModelResponse(
+                choices=[
+                    litellm.Choices(
+                        # Malformatted because it contains null tool calls
+                        message=ToolRequestMessage().model_dump() | {"tool_calls": None}
+                    )
+                ]
+            )
+
+        selector = ToolSelector("stub", acompletion=stub_acompletion)
+        with pytest.raises(
+            MalformedMessageError, match="tool request message"
+        ) as exc_info:
+            await selector(obs, tools=tools)
+        assert isinstance(exc_info.value.__cause__, ValidationError), (
+            "We should be able to retrieve the original validation error"
+        )
+
     @pytest.mark.vcr(match_on=[*VCR_DEFAULT_MATCH_ON, "body"])
     @pytest.mark.parametrize("model_name", [CILLMModelNames.OPENAI.value])
     @pytest.mark.asyncio