|
| 1 | +# Parameterized Tests |
| 2 | + |
| 3 | +<div class='subtitle'>Use parameterized tests to test multiple scenarios</div> |
| 4 | + |
| 5 | +In some cases, a certain agent functionality should generalize to multiple scenarios. For example, a weather agent should be able to answer questions about the weather in different cities. |
| 6 | + |
| 7 | +In `testing`, instead of writing a separate test for each city, you can use parameterized tests to test multiple scenarios. This ensures robustness and generalization of your agent's behavior. |
| 8 | + |
| 9 | +```python |
| 10 | +from invariant.testing import Trace, assert_equals, parameterized |
| 11 | +import pytest |
| 12 | + |
| 13 | +@pytest.mark.parametrize( |
| 14 | + ("city",), |
| 15 | + [ |
| 16 | + ("Paris",), |
| 17 | + ("London",), |
| 18 | + ("New York",), |
| 19 | + ] |
| 20 | +) |
| 21 | +def test_check_weather_in(city: str): |
| 22 | + # create a Trace object from your agent trajectory |
| 23 | + trace = Trace( |
| 24 | + trace=[ |
| 25 | + {"role": "user", "content": f"What is the weather like in {city}"}, |
| 26 | + {"role": "agent", "content": f"The weather in {city} is 75°F and sunny."}, |
| 27 | + ] |
| 28 | + ) |
| 29 | + |
| 30 | + # make assertions about the agent's behavior |
| 31 | + with trace.as_context(): |
| 32 | + # extract the locations mentioned in the agent's response |
| 33 | + locations = trace.messages()[-1]["content"].extract("locations") |
| 34 | + |
| 35 | + # assert that the agent responded about the given city |
| 36 | + assert_equals( |
| 37 | + 1, len(locations), "The agent should respond about one location only" |
| 38 | + ) |
| 39 | + |
| 40 | + assert_equals(city, locations[0], "The agent should respond about " + city) |
| 41 | +``` |
| 42 | + |
| 43 | +### Visualization |
| 44 | + |
| 45 | +When pushing the parameterized test results to Explorer (`invariant test --push`), the resulting test instances will be listed separately: |
| 46 | + |
| 47 | +<img src="../../assets/parameterized_tests.png"/> |
0 commit comments