Skip to content

Commit cda53ff

Browse files
committed
More tests
1 parent 742c600 commit cda53ff

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

evals/test_agent.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pytest
22
from livekit.agents import AgentSession, llm
3+
from livekit.agents.voice.run_result import mock_tools
34
from livekit.plugins import openai
45

56
from agent import Assistant
@@ -70,7 +71,57 @@ async def test_weather_tool() -> None:
7071

7172

7273
@pytest.mark.asyncio
73-
async def test_no_hallucination() -> None:
74+
async def test_weather_unavailable() -> None:
75+
"""Evaluation of the agent's ability to handle tool errors."""
76+
async with (
77+
_llm() as llm,
78+
AgentSession(llm=llm) as sess,
79+
):
80+
await sess.start(Assistant())
81+
82+
# Simulate a tool error
83+
with mock_tools(
84+
Assistant,
85+
{"lookup_weather": lambda: RuntimeError("Weather service is unavailable")},
86+
):
87+
result = await sess.run(user_input="What's the weather in Tokyo?")
88+
result.expect.skip_next_event_if(type="message", role="assistant")
89+
result.expect.next_event().is_function_call(
90+
name="lookup_weather", arguments={"location": "Tokyo"}
91+
)
92+
result.expect.next_event().is_function_call_output()
93+
await result.expect.next_event(type="message").judge(
94+
llm, intent="Should inform the user that an error occurred."
95+
)
96+
97+
# leaving this commented, some LLMs may occasionally try to retry.
98+
# result.expect.no_more_events()
99+
100+
101+
@pytest.mark.asyncio
102+
async def test_unsupported_location() -> None:
103+
"""Evaluation of the agent's ability to handle a weather response with an unsupported location."""
104+
async with (
105+
_llm() as llm,
106+
AgentSession(llm=llm) as sess,
107+
):
108+
await sess.start(Assistant())
109+
110+
with mock_tools(Assistant, {"lookup_weather": lambda: "UNSUPPORTED_LOCATION"}):
111+
result = await sess.run(user_input="What's the weather in Tokyo?")
112+
113+
# Evaluate the agent's response for an unsupported location
114+
await result.expect.next_event(type="message").judge(
115+
llm,
116+
intent="Should inform the user that weather information is not available for the given location.",
117+
)
118+
119+
# Ensures there are no function calls or other unexpected events
120+
result.expect.no_more_events()
121+
122+
123+
@pytest.mark.asyncio
124+
async def test_grounding() -> None:
74125
"""Evaluation of the agent's ability to refuse to answer when it doesn't know something."""
75126
async with (
76127
_llm() as llm,

src/agent.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class Assistant(Agent):
2727
def __init__(self) -> None:
2828
super().__init__(
29-
instructions="""You are a helpful voice AI assistant named Kit.
29+
instructions="""You are a helpful voice AI assistant.
3030
You eagerly assist users with their questions by providing information from your extensive knowledge.
3131
Your responses are concise, to the point, and without any complex formatting or punctuation.
3232
You are curious, friendly, and have a sense of humor.""",
@@ -35,18 +35,13 @@ def __init__(self) -> None:
3535
# all functions annotated with @function_tool will be passed to the LLM when this
3636
# agent is active
3737
@function_tool
38-
async def lookup_weather(
39-
self, context: RunContext, location: str, latitude: str, longitude: str
40-
):
41-
"""Called when the user asks for weather related information.
42-
Ensure the user's location (city or region) is provided.
43-
When given a location, please estimate the latitude and longitude of the location and
44-
do not ask the user for them.
38+
async def lookup_weather(self, context: RunContext, location: str):
39+
"""Use this tool to look up current weather information in the given location.
40+
41+
If the location is not supported by the weather service, the tool will indicate this.
4542
4643
Args:
47-
location: The location they are asking for
48-
latitude: The latitude of the location, do not ask user for it
49-
longitude: The longitude of the location, do not ask user for it
44+
location: The location to look up weather information for (e.g. city name)
5045
"""
5146

5247
logger.info(f"Looking up weather for {location}")

0 commit comments

Comments
 (0)