Skip to content

Commit 44e41b9

Browse files
authored
Tool Call Accuracy OpenAPI Tools (#42494)
* Tool Call Accuracy OpenAPI Tools * Liniting issues
1 parent e7d9490 commit 44e41b9

File tree

4 files changed

+149
-17
lines changed

4 files changed

+149
-17
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_ai_services.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,18 @@
1111
from packaging.version import Version
1212

1313
# Constants.
14-
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
14+
from ._models import (
15+
_USER,
16+
_AGENT,
17+
_TOOL,
18+
_TOOL_CALL,
19+
_TOOL_CALLS,
20+
_FUNCTION,
21+
_BUILT_IN_DESCRIPTIONS,
22+
_BUILT_IN_PARAMS,
23+
_OPENAPI,
24+
OpenAPIToolDefinition,
25+
)
1526

1627
# Message instances.
1728
from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
@@ -93,7 +104,7 @@ def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[To
93104
return tool_calls_chronological
94105

95106
@staticmethod
96-
def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
107+
def _extract_function_tool_definitions(thread_run: object) -> List[Union[ToolDefinition, OpenAPIToolDefinition]]:
97108
"""
98109
Extracts tool definitions from a thread run.
99110
@@ -121,6 +132,26 @@ def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinitio
121132
parameters=parameters,
122133
)
123134
)
135+
elif tool.type == _OPENAPI:
136+
openapi_tool = tool.openapi
137+
tool_definition = OpenAPIToolDefinition(
138+
name=openapi_tool.name,
139+
description=openapi_tool.description,
140+
type=_OPENAPI,
141+
spec=openapi_tool.spec,
142+
auth=openapi_tool.auth.as_dict(),
143+
default_params=openapi_tool.default_params.as_dict() if openapi_tool.default_params else None,
144+
functions=[
145+
ToolDefinition(
146+
name=func.get("name"),
147+
description=func.get("description"),
148+
parameters=func.get("parameters"),
149+
type="function",
150+
)
151+
for func in openapi_tool.get("functions")
152+
],
153+
)
154+
final_tools.append(tool_definition)
124155
else:
125156
# Add limited support for built-in tools. Descriptions and parameters
126157
# are not published, but we'll include placeholders.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_models.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
_AZURE_AI_SEARCH: "Search an Azure AI Search index for relevant data.",
5353
_SHAREPOINT_GROUNDING: "Allows agents to access and retrieve relevant content from Microsoft SharePoint document libraries, grounding responses in organizational knowledge.",
5454
_FABRIC_DATAAGENT: "Connect to Microsoft Fabric data agents to retrieve data across different data sources.",
55-
_OPENAPI: "Connects agents to external RESTful APIs using OpenAPI 3.0 specifications, enabling seamless access to third-party services.",
5655
}
5756

5857
# Built-in tool parameters are hidden, but we include basic parameters for evaluation purposes.
@@ -101,13 +100,6 @@
101100
"type": "object",
102101
"properties": {"input": {"type": "string", "description": "Search terms to use."}},
103102
},
104-
_OPENAPI: {
105-
"type": "object",
106-
"properties": {
107-
"name": {"type": "string", "description": "The name of the function to call."},
108-
"arguments": {"type": "string", "description": "JSON string of the arguments to pass to the function."},
109-
},
110-
},
111103
}
112104

113105

@@ -245,6 +237,27 @@ class ToolDefinition(BaseModel):
245237
parameters: dict
246238

247239

240+
class OpenAPIToolDefinition(BaseModel):
241+
"""Represents OpenAPI tool definition that will be used in the agent.
242+
:param name: The name of the tool.
243+
:type name: str
244+
:param type: The type of the tool.
245+
:type type: str
246+
:param description: A description of the tool.
247+
:type description: str
248+
:param parameters: The parameters required by the tool.
249+
:type parameters: dict
250+
"""
251+
252+
name: str
253+
type: str
254+
description: Optional[str] = None
255+
spec: object
256+
auth: object
257+
default_params: Optional[list[str]] = None
258+
functions: list[ToolDefinition]
259+
260+
248261
class ToolCall:
249262
"""Represents a tool call, used as an intermediate step in the conversion process.
250263
@@ -275,7 +288,7 @@ class EvaluatorData(BaseModel):
275288

276289
query: List[Message]
277290
response: List[Message]
278-
tool_definitions: List[ToolDefinition]
291+
tool_definitions: List[Union[ToolDefinition, OpenAPIToolDefinition]]
279292

280293
def to_json(self):
281294
"""Converts the result to a JSON string.
@@ -305,14 +318,16 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
305318
# all in most of the cases, and bing would only show the API URL, without arguments or results.
306319
# Bing grounding would have "bing_grounding" in details with "requesturl" that will just be the API path with query.
307320
# TODO: Work with AI Services to add converter support for BingGrounding and CodeInterpreter.
308-
if hasattr(tool_call.details, _FUNCTION):
321+
if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
309322
# This is the internals of the content object that will be included with the tool call.
310323
tool_call_id = tool_call.details.id
311324
content_tool_call = {
312325
"type": _TOOL_CALL,
313326
"tool_call_id": tool_call_id,
314-
"name": tool_call.details.function.name,
315-
"arguments": safe_loads(tool_call.details.function.arguments),
327+
"name": tool_call.details.get(_FUNCTION).get("name") if tool_call.details.get(_FUNCTION) else None,
328+
"arguments": safe_loads(
329+
tool_call.details.get(_FUNCTION).get("arguments") if tool_call.details.get(_FUNCTION) else None
330+
),
316331
}
317332
else:
318333
# Treat built-in tools separately. Object models may be unique so handle each case separately
@@ -350,8 +365,8 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
350365
# assistant's action of calling the tool.
351366
messages.append(AssistantMessage(run_id=run_id, content=[to_dict(content_tool_call)], createdAt=tool_call.created))
352367

353-
if hasattr(tool_call.details, _FUNCTION):
354-
output = safe_loads(tool_call.details.function["output"])
368+
if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
369+
output = safe_loads(tool_call.details.get("function")["output"])
355370
else:
356371
try:
357372
# Some built-ins may have output, others may not

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_tool_call_accuracy/_tool_call_accuracy.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4+
from itertools import chain
45
import math
56
import os
67
import logging
@@ -315,6 +316,14 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
315316
built_in_definitions = _get_needed_built_in_definitions(tool_calls)
316317
needed_tool_definitions.extend(built_in_definitions)
317318

319+
# OpenAPI tool is a collection of functions, so we need to expand it
320+
tool_definitions_expanded = list(
321+
chain.from_iterable(
322+
tool.get("functions", []) if tool.get("type") == "openapi" else [tool]
323+
for tool in needed_tool_definitions
324+
)
325+
)
326+
318327
# Validate that all tool calls have corresponding definitions
319328
for tool_call in tool_calls:
320329
if isinstance(tool_call, dict):
@@ -329,7 +338,7 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
329338
# This is a regular function tool from converter
330339
tool_definition_exists = any(
331340
tool.get("name") == tool_name and tool.get("type", "function") == "function"
332-
for tool in tool_definitions
341+
for tool in tool_definitions_expanded
333342
)
334343
if not tool_definition_exists:
335344
raise EvaluationException(

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_tool_call_accuracy_evaluator.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,83 @@ def test_evaluate_open_api(self, mock_model_config):
603603
tool_definitions = []
604604
result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
605605

606+
key = ToolCallAccuracyEvaluator._RESULT_KEY
607+
assert result is not None
608+
assert result[key] == "not applicable"
609+
assert result[f"{key}_result"] == "pass"
610+
611+
def test_evaluate_open_api_with_tool_definition(self, mock_model_config):
612+
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
613+
evaluator._flow = MagicMock(side_effect=flow_side_effect)
614+
615+
# Test OpenAPI function call for exchange rates - converter format
616+
query = "What is the exchange rate from GBP to EUR?"
617+
tool_calls = [
618+
{
619+
"type": "tool_call",
620+
"tool_call_id": "call_builtin_good",
621+
"name": "get_countries_LookupCountryByCurrency",
622+
"arguments": {"currency": "GBP"},
623+
},
624+
]
625+
tool_definitions = [
626+
{
627+
"name": "get_countries",
628+
"type": "openapi",
629+
"description": "Retrieve a list of countries",
630+
"spec": {
631+
"openapi": "3.1.0",
632+
"info": {
633+
"title": "RestCountries.NET API",
634+
"description": "Web API version 3.1 for managing country items, based on previous implementations from restcountries.eu and restcountries.com.",
635+
"version": "v3.1",
636+
},
637+
"servers": [{"url": "https://restcountries.net"}],
638+
"auth": [],
639+
"paths": {
640+
"/v3.1/currency": {
641+
"get": {
642+
"description": "Search by currency.",
643+
"operationId": "LookupCountryByCurrency",
644+
"parameters": [
645+
{
646+
"name": "currency",
647+
"in": "query",
648+
"description": "The currency to search for.",
649+
"required": "true",
650+
"schema": {"type": "string"},
651+
}
652+
],
653+
"responses": {
654+
"200": {
655+
"description": "Success",
656+
"content": {"text/plain": {"schema": {"type": "string"}}},
657+
}
658+
},
659+
}
660+
}
661+
},
662+
"components": {"schemes": {}},
663+
},
664+
"auth": {"type": "anonymous", "security_scheme": {}},
665+
"functions": [
666+
{
667+
"name": "get_countries_LookupCountryByCurrency",
668+
"type": "function",
669+
"description": "Search by currency.",
670+
"parameters": {
671+
"type": "object",
672+
"properties": {
673+
"currency": {"type": "string", "description": "The currency to search for."}
674+
},
675+
"required": ["currency"],
676+
},
677+
}
678+
],
679+
}
680+
]
681+
result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)
682+
606683
key = ToolCallAccuracyEvaluator._RESULT_KEY
607684
assert result is not None
608685
assert result[key] == 5.0

0 commit comments

Comments
 (0)