|
5 | 5 | import math |
6 | 6 | import re |
7 | 7 | import os |
8 | | -from typing import Dict, Optional, TypeVar, Union |
| 8 | +from itertools import chain |
| 9 | +from typing import Dict, Optional, TypeVar, Union, List |
9 | 10 |
|
10 | 11 | if os.getenv("AI_EVALS_USE_PF_PROMPTY", "false").lower() == "true": |
11 | 12 | from promptflow.core._flow import AsyncPrompty |
@@ -188,3 +189,157 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t |
188 | 189 | f"{self._result_key}_result": binary_result, |
189 | 190 | f"{self._result_key}_threshold": self._threshold, |
190 | 191 | } |
| 192 | + |
| 193 | + @staticmethod |
| 194 | + def _get_built_in_tool_definition(tool_name: str): |
| 195 | + """Get the definition for the built-in tool.""" |
| 196 | + try: |
| 197 | + from ..._converters._models import _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS |
| 198 | + |
| 199 | + if tool_name in _BUILT_IN_DESCRIPTIONS: |
| 200 | + return { |
| 201 | + "type": tool_name, |
| 202 | + "description": _BUILT_IN_DESCRIPTIONS[tool_name], |
| 203 | + "name": tool_name, |
| 204 | + "parameters": _BUILT_IN_PARAMS.get(tool_name, {}), |
| 205 | + } |
| 206 | + except ImportError: |
| 207 | + pass |
| 208 | + return None |
| 209 | + |
| 210 | + def _get_needed_built_in_tool_definitions(self, tool_calls: List[Dict]) -> List[Dict]: |
| 211 | + """Extract tool definitions needed for the given built-in tool calls.""" |
| 212 | + needed_definitions = [] |
| 213 | + for tool_call in tool_calls: |
| 214 | + if isinstance(tool_call, dict): |
| 215 | + tool_type = tool_call.get("type") |
| 216 | + |
| 217 | + # Only support converter format: {type: "tool_call", name: "bing_custom_search", arguments: {...}} |
| 218 | + if tool_type == "tool_call": |
| 219 | + tool_name = tool_call.get("name") |
| 220 | + if tool_name: |
| 221 | + definition = self._get_built_in_tool_definition(tool_name) |
| 222 | + if definition and definition not in needed_definitions: |
| 223 | + needed_definitions.append(definition) |
| 224 | + |
| 225 | + return needed_definitions |
| 226 | + |
| 227 | + def _extract_tool_names_from_calls(self, tool_calls: List[Dict]) -> List[str]: |
| 228 | + """Extract just the tool names from tool calls, removing parameters.""" |
| 229 | + tool_names = [] |
| 230 | + for tool_call in tool_calls: |
| 231 | + if isinstance(tool_call, dict): |
| 232 | + tool_type = tool_call.get("type") |
| 233 | + if tool_type == "tool_call": |
| 234 | + tool_name = tool_call.get("name") |
| 235 | + if tool_name: |
| 236 | + tool_names.append(tool_name) |
| 237 | + elif tool_call.get("function", {}).get("name"): |
| 238 | + # Handle function call format |
| 239 | + tool_names.append(tool_call["function"]["name"]) |
| 240 | + elif tool_call.get("name"): |
| 241 | + # Handle direct name format |
| 242 | + tool_names.append(tool_call["name"]) |
| 243 | + return tool_names |
| 244 | + |
| 245 | + def _extract_needed_tool_definitions( |
| 246 | + self, tool_calls: List[Dict], tool_definitions: List[Dict], error_target: ErrorTarget |
| 247 | + ) -> List[Dict]: |
| 248 | + """Extract the tool definitions that are needed for the provided tool calls. |
| 249 | +
|
| 250 | + :param tool_calls: The tool calls that need definitions |
| 251 | + :type tool_calls: List[Dict] |
| 252 | + :param tool_definitions: User-provided tool definitions |
| 253 | + :type tool_definitions: List[Dict] |
| 254 | + :param error_target: The evaluator-specific error target for exceptions |
| 255 | + :type error_target: ErrorTarget |
| 256 | + :return: List of needed tool definitions |
| 257 | + :rtype: List[Dict] |
| 258 | + :raises EvaluationException: If validation fails |
| 259 | + """ |
| 260 | + needed_tool_definitions = [] |
| 261 | + |
| 262 | + # Add all user-provided tool definitions |
| 263 | + needed_tool_definitions.extend(tool_definitions) |
| 264 | + |
| 265 | + # Add the needed built-in tool definitions (if they are called) |
| 266 | + built_in_definitions = self._get_needed_built_in_tool_definitions(tool_calls) |
| 267 | + needed_tool_definitions.extend(built_in_definitions) |
| 268 | + |
| 269 | + # OpenAPI tool is a collection of functions, so we need to expand it |
| 270 | + tool_definitions_expanded = list( |
| 271 | + chain.from_iterable( |
| 272 | + tool.get("functions", []) if tool.get("type") == "openapi" else [tool] |
| 273 | + for tool in needed_tool_definitions |
| 274 | + ) |
| 275 | + ) |
| 276 | + |
| 277 | + # Validate that all tool calls have corresponding definitions |
| 278 | + for tool_call in tool_calls: |
| 279 | + if isinstance(tool_call, dict): |
| 280 | + tool_type = tool_call.get("type") |
| 281 | + |
| 282 | + if tool_type == "tool_call": |
| 283 | + tool_name = tool_call.get("name") |
| 284 | + if tool_name and self._get_built_in_tool_definition(tool_name): |
| 285 | + # This is a built-in tool from converter, already handled above |
| 286 | + continue |
| 287 | + elif tool_name: |
| 288 | + # This is a regular function tool from converter |
| 289 | + tool_definition_exists = any( |
| 290 | + tool.get("name") == tool_name and tool.get("type", "function") == "function" |
| 291 | + for tool in tool_definitions_expanded |
| 292 | + ) |
| 293 | + if not tool_definition_exists: |
| 294 | + raise EvaluationException( |
| 295 | + message=f"Tool definition for {tool_name} not found", |
| 296 | + blame=ErrorBlame.USER_ERROR, |
| 297 | + category=ErrorCategory.INVALID_VALUE, |
| 298 | + target=error_target, |
| 299 | + ) |
| 300 | + else: |
| 301 | + raise EvaluationException( |
| 302 | + message=f"Tool call missing name: {tool_call}", |
| 303 | + blame=ErrorBlame.USER_ERROR, |
| 304 | + category=ErrorCategory.INVALID_VALUE, |
| 305 | + target=error_target, |
| 306 | + ) |
| 307 | + else: |
| 308 | + # Unsupported tool format - only converter format is supported |
| 309 | + raise EvaluationException( |
| 310 | + message=f"Unsupported tool call format. Only converter format is supported: {tool_call}", |
| 311 | + blame=ErrorBlame.USER_ERROR, |
| 312 | + category=ErrorCategory.INVALID_VALUE, |
| 313 | + target=error_target, |
| 314 | + ) |
| 315 | + else: |
| 316 | + # Tool call is not a dictionary |
| 317 | + raise EvaluationException( |
| 318 | + message=f"Tool call is not a dictionary: {tool_call}", |
| 319 | + blame=ErrorBlame.USER_ERROR, |
| 320 | + category=ErrorCategory.INVALID_VALUE, |
| 321 | + target=error_target, |
| 322 | + ) |
| 323 | + |
| 324 | + return needed_tool_definitions |
| 325 | + |
| 326 | + def _not_applicable_result( |
| 327 | + self, error_message: str, threshold: Union[int, float] |
| 328 | + ) -> Dict[str, Union[str, float, Dict]]: |
| 329 | + """Return a result indicating that the evaluation is not applicable. |
| 330 | +
|
| 331 | + :param error_message: The error message explaining why evaluation is not applicable. |
| 332 | + :type error_message: str |
| 333 | + :param threshold: The threshold value for the evaluator. |
| 334 | + :type threshold: Union[int, float] |
| 335 | + :return: A dictionary containing the result of the evaluation. |
| 336 | + :rtype: Dict[str, Union[str, float, Dict]] |
| 337 | + """ |
| 338 | + # If no tool calls were made or tool call type is not supported, return not applicable result |
| 339 | + return { |
| 340 | + self._result_key: self._NOT_APPLICABLE_RESULT, |
| 341 | + f"{self._result_key}_result": "pass", |
| 342 | + f"{self._result_key}_threshold": threshold, |
| 343 | + f"{self._result_key}_reason": error_message, |
| 344 | + f"{self._result_key}_details": {}, |
| 345 | + } |
0 commit comments