|
1 | 1 | import pytest |
2 | 2 | from livekit.agents import AgentSession, llm |
| 3 | +from livekit.agents.voice.run_result import mock_tools |
3 | 4 | from livekit.plugins import openai |
4 | 5 |
|
5 | 6 | from agent import Assistant |
@@ -70,7 +71,57 @@ async def test_weather_tool() -> None: |
70 | 71 |
|
71 | 72 |
|
72 | 73 | @pytest.mark.asyncio |
73 | | -async def test_no_hallucination() -> None: |
| 74 | +async def test_weather_unavailable() -> None: |
| 75 | + """Evaluation of the agent's ability to handle tool errors.""" |
| 76 | + async with ( |
| 77 | + _llm() as llm, |
| 78 | + AgentSession(llm=llm) as sess, |
| 79 | + ): |
| 80 | + await sess.start(Assistant()) |
| 81 | + |
| 82 | + # Simulate a tool error |
| 83 | + with mock_tools( |
| 84 | + Assistant, |
| 85 | + {"lookup_weather": lambda: RuntimeError("Weather service is unavailable")}, |
| 86 | + ): |
| 87 | + result = await sess.run(user_input="What's the weather in Tokyo?") |
| 88 | + result.expect.skip_next_event_if(type="message", role="assistant") |
| 89 | + result.expect.next_event().is_function_call( |
| 90 | + name="lookup_weather", arguments={"location": "Tokyo"} |
| 91 | + ) |
| 92 | + result.expect.next_event().is_function_call_output() |
| 93 | + await result.expect.next_event(type="message").judge( |
| 94 | + llm, intent="Should inform the user that an error occurred." |
| 95 | + ) |
| 96 | + |
| 97 | + # leaving this commented, some LLMs may occasionally try to retry. |
| 98 | + # result.expect.no_more_events() |
| 99 | + |
| 100 | + |
| 101 | +@pytest.mark.asyncio |
| 102 | +async def test_unsupported_location() -> None: |
| 103 | + """Evaluation of the agent's ability to handle a weather response with an unsupported location.""" |
| 104 | + async with ( |
| 105 | + _llm() as llm, |
| 106 | + AgentSession(llm=llm) as sess, |
| 107 | + ): |
| 108 | + await sess.start(Assistant()) |
| 109 | + |
| 110 | + with mock_tools(Assistant, {"lookup_weather": lambda: "UNSUPPORTED_LOCATION"}): |
| 111 | + result = await sess.run(user_input="What's the weather in Tokyo?") |
| 112 | + |
| 113 | + # Evaluate the agent's response for an unsupported location |
| 114 | + await result.expect.next_event(type="message").judge( |
| 115 | + llm, |
| 116 | + intent="Should inform the user that weather information is not available for the given location.", |
| 117 | + ) |
| 118 | + |
| 119 | + # Ensures there are no function calls or other unexpected events |
| 120 | + result.expect.no_more_events() |
| 121 | + |
| 122 | + |
| 123 | +@pytest.mark.asyncio |
| 124 | +async def test_grounding() -> None: |
74 | 125 | """Evaluation of the agent's ability to refuse to answer when it doesn't know something.""" |
75 | 126 | async with ( |
76 | 127 | _llm() as llm, |
|
0 commit comments