|
44 | 44 |
|
45 | 45 |
|
46 | 46 | _evals_common = _genai.evals._evals_common |
47 | | -_evals_utils = _genai._evals_utils |
48 | 47 |
|
49 | 48 | pytestmark = pytest.mark.usefixtures("google_auth_mock") |
50 | 49 |
|
@@ -1515,6 +1514,64 @@ def test_run_inference_with_litellm_parsing( |
1515 | 1514 | pd.testing.assert_frame_equal(call_kwargs["prompt_dataset"], mock_df) |
1516 | 1515 |
|
1517 | 1516 |
|
| 1517 | +@pytest.mark.usefixtures("google_auth_mock") |
| 1518 | +class TestEvalsMetricHandlers: |
| 1519 | + """Unit tests for utility functions in _evals_metric_handlers.""" |
| 1520 | + |
| 1521 | + def test_has_tool_call_with_tool_call(self): |
| 1522 | + events = [ |
| 1523 | + vertexai_genai_types.evals.Event( |
| 1524 | + event_id="1", |
| 1525 | + content=genai_types.Content( |
| 1526 | + parts=[ |
| 1527 | + genai_types.Part( |
| 1528 | + function_call=genai_types.FunctionCall( |
| 1529 | + name="search", args={} |
| 1530 | + ) |
| 1531 | + ) |
| 1532 | + ] |
| 1533 | + ), |
| 1534 | + ) |
| 1535 | + ] |
| 1536 | + assert _evals_metric_handlers._has_tool_call(events) |
| 1537 | + |
| 1538 | + def test_has_tool_call_no_tool_call(self): |
| 1539 | + events = [ |
| 1540 | + vertexai_genai_types.evals.Event( |
| 1541 | + event_id="1", |
| 1542 | + content=genai_types.Content(parts=[genai_types.Part(text="hello")]), |
| 1543 | + ) |
| 1544 | + ] |
| 1545 | + assert not _evals_metric_handlers._has_tool_call(events) |
| 1546 | + |
| 1547 | + def test_has_tool_call_empty_events(self): |
| 1548 | + assert not _evals_metric_handlers._has_tool_call([]) |
| 1549 | + |
| 1550 | + def test_has_tool_call_none_events(self): |
| 1551 | + assert not _evals_metric_handlers._has_tool_call(None) |
| 1552 | + |
| 1553 | + def test_has_tool_call_mixed_events(self): |
| 1554 | + events = [ |
| 1555 | + vertexai_genai_types.evals.Event( |
| 1556 | + event_id="1", |
| 1557 | + content=genai_types.Content(parts=[genai_types.Part(text="hello")]), |
| 1558 | + ), |
| 1559 | + vertexai_genai_types.evals.Event( |
| 1560 | + event_id="2", |
| 1561 | + content=genai_types.Content( |
| 1562 | + parts=[ |
| 1563 | + genai_types.Part( |
| 1564 | + function_call=genai_types.FunctionCall( |
| 1565 | + name="search", args={} |
| 1566 | + ) |
| 1567 | + ) |
| 1568 | + ] |
| 1569 | + ), |
| 1570 | + ), |
| 1571 | + ] |
| 1572 | + assert _evals_metric_handlers._has_tool_call(events) |
| 1573 | + |
| 1574 | + |
1518 | 1575 | @pytest.mark.usefixtures("google_auth_mock") |
1519 | 1576 | class TestRunAgentInternal: |
1520 | 1577 | """Unit tests for the _run_agent_internal function.""" |
@@ -3890,6 +3947,39 @@ def test_eval_case_to_agent_data_agent_info_empty(self): |
3890 | 3947 |
|
3891 | 3948 | assert agent_data.agent_config is None |
3892 | 3949 |
|
| 3950 | + @mock.patch.object(_evals_metric_handlers.logger, "warning") |
| 3951 | + def test_tool_use_quality_metric_no_tool_call_logs_warning( |
| 3952 | + self, mock_warning, mock_api_client_fixture |
| 3953 | + ): |
| 3954 | + """Tests that PredefinedMetricHandler warns for tool_use_quality_v1 if no tool call.""" |
| 3955 | + metric = vertexai_genai_types.Metric(name="tool_use_quality_v1") |
| 3956 | + handler = _evals_metric_handlers.PredefinedMetricHandler( |
| 3957 | + module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric |
| 3958 | + ) |
| 3959 | + eval_case = vertexai_genai_types.EvalCase( |
| 3960 | + eval_case_id="case-no-tool-call", |
| 3961 | + prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), |
| 3962 | + responses=[ |
| 3963 | + vertexai_genai_types.ResponseCandidate( |
| 3964 | + response=genai_types.Content(parts=[genai_types.Part(text="Hi")]) |
| 3965 | + ) |
| 3966 | + ], |
| 3967 | + intermediate_events=[ |
| 3968 | + vertexai_genai_types.evals.Event( |
| 3969 | + event_id="event1", |
| 3970 | + content=genai_types.Content( |
| 3971 | + parts=[genai_types.Part(text="intermediate event")] |
| 3972 | + ), |
| 3973 | + ) |
| 3974 | + ], |
| 3975 | + ) |
| 3976 | + handler._build_request_payload(eval_case, response_index=0) |
| 3977 | + mock_warning.assert_called_once_with( |
| 3978 | + "Metric 'tool_use_quality_v1' requires tool usage in " |
| 3979 | + "'intermediate_events', but no tool usage was found for case %s.", |
| 3980 | + "case-no-tool-call", |
| 3981 | + ) |
| 3982 | + |
3893 | 3983 |
|
3894 | 3984 | @pytest.mark.usefixtures("google_auth_mock") |
3895 | 3985 | class TestLLMMetricHandlerPayload: |
|
0 commit comments