@@ -27,7 +27,15 @@ async def test_offers_assistance() -> None:
2727 result .expect .next_event ()
2828 .is_message (role = "assistant" )
2929 .judge (
30- llm , intent = "Offers a friendly introduction and offer of assistance."
30+ llm ,
31+ intent = """
32+ Offers assistance to the user.
33+
34+ Optional context that may or may not be included:
35+ - A friendly greeting or introduction
36+ - Welcoming tone
37+ - Invitation to ask questions or request help
38+ """ ,
3139 )
3240 )
3341
@@ -64,7 +72,12 @@ async def test_weather_tool() -> None:
6472 .is_message (role = "assistant" )
6573 .judge (
6674 llm ,
67- intent = "Informs the user that the weather in Tokyo is sunny with a temperature of 70 degrees." ,
75+ intent = """
76+ Informs the user that the weather is sunny with a temperature of 70 degrees.
77+
78+ Optional context that may or may not be included (but the response must not contradict these facts)
79+ - The location for the weather report is Tokyo
80+ """ ,
6881 )
6982 )
7083
@@ -94,7 +107,16 @@ async def test_weather_unavailable() -> None:
94107 result .expect .next_event ().is_function_call_output ()
95108 await result .expect .next_event (type = "message" ).judge (
96109 llm ,
97- intent = "Should inform the user that an error occurred and/or the weather is is currently unavailable." ,
110+ intent = """
111+ Acknowledges that the weather request could not be fulfilled and communicates this to the user.
112+
113+ The response should convey that there was a problem getting the weather information, but can be expressed in various ways such as:
114+ - Mentioning an error, service issue, or that it couldn't be retrieved
115+ - Suggesting alternatives or asking what else they can help with
116+ - Being apologetic or explaining the situation
117+
118+ The response does not need to use specific technical terms like "weather service error" or "temporary".
119+ """ ,
98120 )
99121
100122 # leaving this commented, some LLMs may occasionally try to retry.
@@ -116,7 +138,17 @@ async def test_unsupported_location() -> None:
116138 # Evaluate the agent's response for an unsupported location
117139 await result .expect .next_event (type = "message" ).judge (
118140 llm ,
119- intent = "Should inform the user that weather information is not available for the given location." ,
141+ intent = """
142+ Communicates that the weather request for the specific location could not be fulfilled.
143+
144+ The response should indicate that weather information is not available for the requested location, but can be expressed in various ways such as:
145+ - Saying they can't get weather for that location
146+ - Explaining the location isn't supported or available
147+ - Suggesting alternatives or asking what else they can help with
148+ - Being apologetic about the limitation
149+
150+ The response does not need to explicitly state "unsupported" or discourage retrying.
151+ """ ,
120152 )
121153
122154 # Ensures there are no function calls or other unexpected events
@@ -141,7 +173,23 @@ async def test_grounding() -> None:
141173 .is_message (role = "assistant" )
142174 .judge (
143175 llm ,
144- intent = "Declines to answer and/or speculate. Optionally it may ask for information or offer help if more is provided (not required)." ,
176+ intent = """
177+ Does not claim to know or provide the user's birthplace information.
178+
179+ The response should not:
180+ - State a specific city where the user was born
181+ - Claim to have access to the user's personal information
182+ - Provide a definitive answer about the user's birthplace
183+
184+ The response may include various elements such as:
185+ - Explaining lack of access to personal information
186+ - Saying they don't know
187+ - Offering to help with other topics
188+ - Friendly conversation
189+ - Suggestions for sharing information
190+
191+ The core requirement is simply that the agent doesn't provide or claim to know the user's birthplace.
192+ """ ,
145193 )
146194 )
147195
0 commit comments