More tests

bcherry · bcherry · commit cda53ff08bac · 2025-07-09T15:57:05.000-07:00
diff --git a/evals/test_agent.py b/evals/test_agent.py
@@ -1,5 +1,6 @@
 import pytest
 from livekit.agents import AgentSession, llm
+from livekit.agents.voice.run_result import mock_tools
 from livekit.plugins import openai
 
 from agent import Assistant
@@ -70,7 +71,57 @@ async def test_weather_tool() -> None:
 
 
 @pytest.mark.asyncio
-async def test_no_hallucination() -> None:
+async def test_weather_unavailable() -> None:
+    """Evaluation of the agent's ability to handle tool errors."""
+    async with (
+        _llm() as llm,
+        AgentSession(llm=llm) as sess,
+    ):
+        await sess.start(Assistant())
+
+        # Simulate a tool error
+        with mock_tools(
+            Assistant,
+            {"lookup_weather": lambda: RuntimeError("Weather service is unavailable")},
+        ):
+            result = await sess.run(user_input="What's the weather in Tokyo?")
+            result.expect.skip_next_event_if(type="message", role="assistant")
+            result.expect.next_event().is_function_call(
+                name="lookup_weather", arguments={"location": "Tokyo"}
+            )
+            result.expect.next_event().is_function_call_output()
+            await result.expect.next_event(type="message").judge(
+                llm, intent="Should inform the user that an error occurred."
+            )
+
+            # leaving this commented, some LLMs may occasionally try to retry.
+            # result.expect.no_more_events()
+
+
+@pytest.mark.asyncio
+async def test_unsupported_location() -> None:
+    """Evaluation of the agent's ability to handle a weather response with an unsupported location."""
+    async with (
+        _llm() as llm,
+        AgentSession(llm=llm) as sess,
+    ):
+        await sess.start(Assistant())
+
+        with mock_tools(Assistant, {"lookup_weather": lambda: "UNSUPPORTED_LOCATION"}):
+            result = await sess.run(user_input="What's the weather in Tokyo?")
+
+            # Evaluate the agent's response for an unsupported location
+            await result.expect.next_event(type="message").judge(
+                llm,
+                intent="Should inform the user that weather information is not available for the given location.",
+            )
+
+        # Ensures there are no function calls or other unexpected events
+        result.expect.no_more_events()
+
+
+@pytest.mark.asyncio
+async def test_grounding() -> None:
     """Evaluation of the agent's ability to refuse to answer when it doesn't know something."""
     async with (
         _llm() as llm,
diff --git a/src/agent.py b/src/agent.py
@@ -26,7 +26,7 @@
 class Assistant(Agent):
     def __init__(self) -> None:
         super().__init__(
-            instructions="""You are a helpful voice AI assistant named Kit.
+            instructions="""You are a helpful voice AI assistant.
             You eagerly assist users with their questions by providing information from your extensive knowledge.
             Your responses are concise, to the point, and without any complex formatting or punctuation.
             You are curious, friendly, and have a sense of humor.""",
@@ -35,18 +35,13 @@ def __init__(self) -> None:
     # all functions annotated with @function_tool will be passed to the LLM when this
     # agent is active
     @function_tool
-    async def lookup_weather(
-        self, context: RunContext, location: str, latitude: str, longitude: str
-    ):
-        """Called when the user asks for weather related information.
-        Ensure the user's location (city or region) is provided.
-        When given a location, please estimate the latitude and longitude of the location and
-        do not ask the user for them.
+    async def lookup_weather(self, context: RunContext, location: str):
+        """Use this tool to look up current weather information in the given location.
+
+        If the location is not supported by the weather service, the tool will indicate this.
 
         Args:
-            location: The location they are asking for
-            latitude: The latitude of the location, do not ask user for it
-            longitude: The longitude of the location, do not ask user for it
+            location: The location to look up weather information for (e.g. city name)
         """
 
         logger.info(f"Looking up weather for {location}")