Skip to content

Commit 913b609

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add warning message when tool usage is empty for tool_use_quality
PiperOrigin-RevId: 828122022
1 parent acb6cab commit 913b609

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444

4545

4646
_evals_common = _genai.evals._evals_common
47-
_evals_utils = _genai._evals_utils
4847

4948
pytestmark = pytest.mark.usefixtures("google_auth_mock")
5049

@@ -1515,6 +1514,64 @@ def test_run_inference_with_litellm_parsing(
15151514
pd.testing.assert_frame_equal(call_kwargs["prompt_dataset"], mock_df)
15161515

15171516

1517+
@pytest.mark.usefixtures("google_auth_mock")
1518+
class TestEvalsMetricHandlers:
1519+
"""Unit tests for utility functions in _evals_metric_handlers."""
1520+
1521+
def test_has_tool_call_with_tool_call(self):
1522+
events = [
1523+
vertexai_genai_types.evals.Event(
1524+
event_id="1",
1525+
content=genai_types.Content(
1526+
parts=[
1527+
genai_types.Part(
1528+
function_call=genai_types.FunctionCall(
1529+
name="search", args={}
1530+
)
1531+
)
1532+
]
1533+
),
1534+
)
1535+
]
1536+
assert _evals_metric_handlers._has_tool_call(events)
1537+
1538+
def test_has_tool_call_no_tool_call(self):
1539+
events = [
1540+
vertexai_genai_types.evals.Event(
1541+
event_id="1",
1542+
content=genai_types.Content(parts=[genai_types.Part(text="hello")]),
1543+
)
1544+
]
1545+
assert not _evals_metric_handlers._has_tool_call(events)
1546+
1547+
def test_has_tool_call_empty_events(self):
1548+
assert not _evals_metric_handlers._has_tool_call([])
1549+
1550+
def test_has_tool_call_none_events(self):
1551+
assert not _evals_metric_handlers._has_tool_call(None)
1552+
1553+
def test_has_tool_call_mixed_events(self):
1554+
events = [
1555+
vertexai_genai_types.evals.Event(
1556+
event_id="1",
1557+
content=genai_types.Content(parts=[genai_types.Part(text="hello")]),
1558+
),
1559+
vertexai_genai_types.evals.Event(
1560+
event_id="2",
1561+
content=genai_types.Content(
1562+
parts=[
1563+
genai_types.Part(
1564+
function_call=genai_types.FunctionCall(
1565+
name="search", args={}
1566+
)
1567+
)
1568+
]
1569+
),
1570+
),
1571+
]
1572+
assert _evals_metric_handlers._has_tool_call(events)
1573+
1574+
15181575
@pytest.mark.usefixtures("google_auth_mock")
15191576
class TestRunAgentInternal:
15201577
"""Unit tests for the _run_agent_internal function."""
@@ -3890,6 +3947,39 @@ def test_eval_case_to_agent_data_agent_info_empty(self):
38903947

38913948
assert agent_data.agent_config is None
38923949

3950+
@mock.patch.object(_evals_metric_handlers.logger, "warning")
3951+
def test_tool_use_quality_metric_no_tool_call_logs_warning(
3952+
self, mock_warning, mock_api_client_fixture
3953+
):
3954+
"""Tests that PredefinedMetricHandler warns for tool_use_quality_v1 if no tool call."""
3955+
metric = vertexai_genai_types.Metric(name="tool_use_quality_v1")
3956+
handler = _evals_metric_handlers.PredefinedMetricHandler(
3957+
module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric
3958+
)
3959+
eval_case = vertexai_genai_types.EvalCase(
3960+
eval_case_id="case-no-tool-call",
3961+
prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]),
3962+
responses=[
3963+
vertexai_genai_types.ResponseCandidate(
3964+
response=genai_types.Content(parts=[genai_types.Part(text="Hi")])
3965+
)
3966+
],
3967+
intermediate_events=[
3968+
vertexai_genai_types.evals.Event(
3969+
event_id="event1",
3970+
content=genai_types.Content(
3971+
parts=[genai_types.Part(text="intermediate event")]
3972+
),
3973+
)
3974+
],
3975+
)
3976+
handler._build_request_payload(eval_case, response_index=0)
3977+
mock_warning.assert_called_once_with(
3978+
"Metric 'tool_use_quality_v1' requires tool usage in "
3979+
"'intermediate_events', but no tool usage was found for case %s.",
3980+
"case-no-tool-call",
3981+
)
3982+
38933983

38943984
@pytest.mark.usefixtures("google_auth_mock")
38953985
class TestLLMMetricHandlerPayload:

vertexai/_genai/_evals_metric_handlers.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@
3939
_MAX_RETRIES = 3
4040

4141

42+
def _has_tool_call(intermediate_events: Optional[list[types.evals.Event]]) -> bool:
43+
"""Checks if any event in intermediate_events has a function call."""
44+
if not intermediate_events:
45+
return False
46+
for event in intermediate_events:
47+
if event.content and event.content.parts:
48+
for part in event.content.parts:
49+
if hasattr(part, "function_call") and part.function_call:
50+
return True
51+
return False
52+
53+
4254
def _extract_text_from_content(
4355
content: Optional[genai_types.Content], warn_property: str = "text"
4456
) -> Optional[str]:
@@ -903,6 +915,14 @@ def _build_request_payload(
903915
f"Response content missing for candidate {response_index}."
904916
)
905917

918+
if self.metric.name == "tool_use_quality_v1":
919+
if not _has_tool_call(eval_case.intermediate_events):
920+
logger.warning(
921+
"Metric 'tool_use_quality_v1' requires tool usage in "
922+
"'intermediate_events', but no tool usage was found for case %s.",
923+
eval_case.eval_case_id,
924+
)
925+
906926
reference_instance_data = None
907927
if eval_case.reference:
908928
reference_instance_data = PredefinedMetricHandler._content_to_instance_data(

0 commit comments

Comments
 (0)