Skip to content

Tool Call Accuracy OpenAPI Tools #42494

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,18 @@
from packaging.version import Version

# Constants.
from ._models import _USER, _AGENT, _TOOL, _TOOL_CALL, _TOOL_CALLS, _FUNCTION, _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
from ._models import (
_USER,
_AGENT,
_TOOL,
_TOOL_CALL,
_TOOL_CALLS,
_FUNCTION,
_BUILT_IN_DESCRIPTIONS,
_BUILT_IN_PARAMS,
_OPENAPI,
OpenAPIToolDefinition,
)

# Message instances.
from ._models import Message, SystemMessage, UserMessage, AssistantMessage, ToolCall
Expand Down Expand Up @@ -93,7 +104,7 @@ def _list_tool_calls_chronological(self, thread_id: str, run_id: str) -> List[To
return tool_calls_chronological

@staticmethod
def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinition]:
def _extract_function_tool_definitions(thread_run: object) -> List[Union[ToolDefinition, OpenAPIToolDefinition]]:
"""
Extracts tool definitions from a thread run.

Expand Down Expand Up @@ -121,6 +132,26 @@ def _extract_function_tool_definitions(thread_run: object) -> List[ToolDefinitio
parameters=parameters,
)
)
elif tool.type == _OPENAPI:
openapi_tool = tool.openapi
tool_definition = OpenAPIToolDefinition(
name=openapi_tool.name,
description=openapi_tool.description,
type=_OPENAPI,
spec=openapi_tool.spec,
auth=openapi_tool.auth.as_dict(),
default_params=openapi_tool.default_params.as_dict() if openapi_tool.default_params else None,
functions=[
ToolDefinition(
name=func.get("name"),
description=func.get("description"),
parameters=func.get("parameters"),
type="function",
)
for func in openapi_tool.get("functions")
],
)
final_tools.append(tool_definition)
else:
# Add limited support for built-in tools. Descriptions and parameters
# are not published, but we'll include placeholders.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
_AZURE_AI_SEARCH: "Search an Azure AI Search index for relevant data.",
_SHAREPOINT_GROUNDING: "Allows agents to access and retrieve relevant content from Microsoft SharePoint document libraries, grounding responses in organizational knowledge.",
_FABRIC_DATAAGENT: "Connect to Microsoft Fabric data agents to retrieve data across different data sources.",
_OPENAPI: "Connects agents to external RESTful APIs using OpenAPI 3.0 specifications, enabling seamless access to third-party services.",
}

# Built-in tool parameters are hidden, but we include basic parameters for evaluation purposes.
Expand Down Expand Up @@ -101,13 +100,6 @@
"type": "object",
"properties": {"input": {"type": "string", "description": "Search terms to use."}},
},
_OPENAPI: {
"type": "object",
"properties": {
"name": {"type": "string", "description": "The name of the function to call."},
"arguments": {"type": "string", "description": "JSON string of the arguments to pass to the function."},
},
},
}


Expand Down Expand Up @@ -245,6 +237,27 @@ class ToolDefinition(BaseModel):
parameters: dict


class OpenAPIToolDefinition(BaseModel):
"""Represents OpenAPI tool definition that will be used in the agent.
:param name: The name of the tool.
:type name: str
:param type: The type of the tool.
:type type: str
:param description: A description of the tool.
:type description: str
:param parameters: The parameters required by the tool.
:type parameters: dict
"""

name: str
type: str
description: Optional[str] = None
spec: object
auth: object
default_params: Optional[list[str]] = None
Copy link
Preview

Copilot AI Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use Optional[List[str]] instead of Optional[list[str]] for consistency with other type annotations in the codebase.

Suggested change
default_params: Optional[list[str]] = None
default_params: Optional[List[str]] = None

Copilot uses AI. Check for mistakes.

functions: list[ToolDefinition]
Copy link
Preview

Copilot AI Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use List[ToolDefinition] instead of list[ToolDefinition] for consistency with other type annotations in the codebase and better compatibility with older Python versions.

Suggested change
functions: list[ToolDefinition]
functions: List[ToolDefinition]

Copilot uses AI. Check for mistakes.



class ToolCall:
"""Represents a tool call, used as an intermediate step in the conversion process.
Expand Down Expand Up @@ -275,7 +288,7 @@ class EvaluatorData(BaseModel):

query: List[Message]
response: List[Message]
tool_definitions: List[ToolDefinition]
tool_definitions: List[Union[ToolDefinition, OpenAPIToolDefinition]]

def to_json(self):
"""Converts the result to a JSON string.
Expand Down Expand Up @@ -305,14 +318,16 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
# all in most of the cases, and bing would only show the API URL, without arguments or results.
# Bing grounding would have "bing_grounding" in details with "requesturl" that will just be the API path with query.
# TODO: Work with AI Services to add converter support for BingGrounding and CodeInterpreter.
if hasattr(tool_call.details, _FUNCTION):
if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
# This is the internals of the content object that will be included with the tool call.
tool_call_id = tool_call.details.id
content_tool_call = {
"type": _TOOL_CALL,
"tool_call_id": tool_call_id,
"name": tool_call.details.function.name,
"arguments": safe_loads(tool_call.details.function.arguments),
"name": tool_call.details.get(_FUNCTION).get("name") if tool_call.details.get(_FUNCTION) else None,
"arguments": safe_loads(
tool_call.details.get(_FUNCTION).get("arguments") if tool_call.details.get(_FUNCTION) else None
),
}
else:
# Treat built-in tools separately. Object models may be unique so handle each case separately
Expand Down Expand Up @@ -350,8 +365,8 @@ def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Mess
# assistant's action of calling the tool.
messages.append(AssistantMessage(run_id=run_id, content=[to_dict(content_tool_call)], createdAt=tool_call.created))

if hasattr(tool_call.details, _FUNCTION):
output = safe_loads(tool_call.details.function["output"])
if hasattr(tool_call.details, _FUNCTION) or tool_call.details.get("function"):
output = safe_loads(tool_call.details.get("function")["output"])
else:
try:
# Some built-ins may have output, others may not
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from itertools import chain
import math
import os
import logging
Expand Down Expand Up @@ -315,6 +316,14 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
built_in_definitions = _get_needed_built_in_definitions(tool_calls)
needed_tool_definitions.extend(built_in_definitions)

# OpenAPI tool is a collection of functions, so we need to expand it
tool_definitions_expanded = list(
chain.from_iterable(
tool.get("functions", []) if tool.get("type") == "openapi" else [tool]
for tool in needed_tool_definitions
)
)

# Validate that all tool calls have corresponding definitions
for tool_call in tool_calls:
if isinstance(tool_call, dict):
Expand All @@ -329,7 +338,7 @@ def _extract_needed_tool_definitions(self, tool_calls, tool_definitions):
# This is a regular function tool from converter
tool_definition_exists = any(
tool.get("name") == tool_name and tool.get("type", "function") == "function"
for tool in tool_definitions
for tool in tool_definitions_expanded
)
if not tool_definition_exists:
raise EvaluationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,83 @@ def test_evaluate_open_api(self, mock_model_config):
tool_definitions = []
result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)

key = ToolCallAccuracyEvaluator._RESULT_KEY
assert result is not None
assert result[key] == "not applicable"
assert result[f"{key}_result"] == "pass"

def test_evaluate_open_api_with_tool_definition(self, mock_model_config):
evaluator = ToolCallAccuracyEvaluator(model_config=mock_model_config)
evaluator._flow = MagicMock(side_effect=flow_side_effect)

# Test OpenAPI function call for exchange rates - converter format
query = "What is the exchange rate from GBP to EUR?"
tool_calls = [
{
"type": "tool_call",
"tool_call_id": "call_builtin_good",
"name": "get_countries_LookupCountryByCurrency",
"arguments": {"currency": "GBP"},
},
]
tool_definitions = [
{
"name": "get_countries",
"type": "openapi",
"description": "Retrieve a list of countries",
"spec": {
"openapi": "3.1.0",
"info": {
"title": "RestCountries.NET API",
"description": "Web API version 3.1 for managing country items, based on previous implementations from restcountries.eu and restcountries.com.",
"version": "v3.1",
},
"servers": [{"url": "https://restcountries.net"}],
"auth": [],
"paths": {
"/v3.1/currency": {
"get": {
"description": "Search by currency.",
"operationId": "LookupCountryByCurrency",
"parameters": [
{
"name": "currency",
"in": "query",
"description": "The currency to search for.",
"required": "true",
"schema": {"type": "string"},
}
],
"responses": {
"200": {
"description": "Success",
"content": {"text/plain": {"schema": {"type": "string"}}},
}
},
}
}
},
"components": {"schemes": {}},
},
"auth": {"type": "anonymous", "security_scheme": {}},
"functions": [
{
"name": "get_countries_LookupCountryByCurrency",
"type": "function",
"description": "Search by currency.",
"parameters": {
"type": "object",
"properties": {
"currency": {"type": "string", "description": "The currency to search for."}
},
"required": ["currency"],
},
}
],
}
]
result = evaluator(query=query, tool_calls=tool_calls, tool_definitions=tool_definitions)

key = ToolCallAccuracyEvaluator._RESULT_KEY
assert result is not None
assert result[key] == 5.0
Expand Down