Azure
diff --git a/‎sdk/evaluation/azure-ai-evaluation/assets.json‎
Lines changed: 1 addition & 1 deletion b/‎sdk/evaluation/azure-ai-evaluation/assets.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py‎
Lines changed: 68 additions & 0 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py‎
Lines changed: 13 additions & 3 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_eval.py‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py‎
Lines changed: 86 additions & 33 deletions b/‎sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_groundedness/_groundedness.py‎
Lines changed: 86 additions & 33 deletions
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/evaluation/azure-ai-evaluation",
-  "Tag": "python/evaluation/azure-ai-evaluation_e9fbe5cd65"
+  "Tag": "python/evaluation/azure-ai-evaluation_d7b00f22b8"
 }
@@ -659,6 +659,74 @@ def reformat_tool_definitions(tool_definitions, logger=None):
         return tool_definitions
 
 
+def simplify_messages(messages, drop_system=True, drop_tool_calls=False, logger=None):
+    """
+    Simplify a list of conversation messages by keeping only role and content.
+    Optionally filter out system messages and/or tool calls.
+
+    :param messages: List of message dicts (e.g., from query or response)
+    :param drop_system: If True, remove system role messages
+    :param drop_tool_calls: If True, remove tool_call items from assistant content
+    :return: New simplified list of messages
+    """
+    if isinstance(messages, str):
+        return messages
+    try:
+        # Validate input is a list
+        if not isinstance(messages, list):
+            return messages
+
+        simplified_msgs = []
+        for msg in messages:
+            # Ensure msg is a dict
+            if not isinstance(msg, dict):
+                simplified_msgs.append(msg)
+                continue
+
+            role = msg.get("role")
+            content = msg.get("content", [])
+
+            # Drop system message (if should)
+            if drop_system and role == "system":
+                continue
+
+            # Simplify user messages
+            if role == "user":
+                simplified_msg = {
+                    "role": role,
+                    "content": _extract_text_from_content(content),
+                }
+                simplified_msgs.append(simplified_msg)
+                continue
+
+            # Drop tool results (if should)
+            if drop_tool_calls and role == "tool":
+                continue
+
+            # Simplify assistant messages
+            if role == "assistant":
+                simplified_content = _extract_text_from_content(content)
+                # Check if message has content
+                if simplified_content:
+                    simplified_msg = {"role": role, "content": simplified_content}
+                    simplified_msgs.append(simplified_msg)
+                    continue
+
+                # Drop tool calls (if should)
+                if drop_tool_calls and any(c.get("type") == "tool_call" for c in content if isinstance(c, dict)):
+                    continue
+
+            # If we reach here, it means we want to keep the message
+            simplified_msgs.append(msg)
+
+        return simplified_msgs
+
+    except Exception as ex:
+        if logger:
+            logger.debug(f"Error simplifying messages: {str(ex)}. Returning original messages.")
+        return messages
+
+
 def upload(path: str, container_client: ContainerClient, logger=None):
     """Upload files or directories to Azure Blob Storage using a container client.
 
 
@@ -37,6 +37,8 @@
 
 from ._conversation_aggregators import GetAggregator, GetAggregatorType
 
+import copy
+
 P = ParamSpec("P")
 T = TypeVar("T")
 T_EvalValue = TypeVar("T_EvalValue")
@@ -486,8 +488,12 @@ def _parse_tools_from_response(self, response):
         """
         tool_calls = []
         tool_results_map = {}
-        if isinstance(response, list):
-            for message in response:
+
+        # Work on a deep copy to avoid modifying the original object
+        response_copy = copy.deepcopy(response)
+
+        if isinstance(response_copy, list):
+            for message in response_copy:
                 # Extract tool calls from assistant messages
                 if message.get("role") == "assistant" and isinstance(message.get("content"), list):
                     for content_item in message.get("content"):
@@ -519,7 +525,11 @@ async def _real_call(self, **kwargs) -> Union[DoEvalResult[T_EvalValue], Aggrega
         :rtype: Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]
         """
         # Convert inputs into list of evaluable inputs.
-        eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
+        try:
+            eval_input_list = self._convert_kwargs_to_eval_input(**kwargs)
+        except Exception as e:
+            print(f"Error converting kwargs to eval_input_list: {e}")
+            raise e
         per_turn_results = []
         # Evaluate all inputs.
         for eval_input in eval_input_list:
 
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import os, logging
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, Any, Tuple
 
 from typing_extensions import overload, override
 from azure.ai.evaluation._legacy._adapters._flows import AsyncPrompty
@@ -16,6 +16,7 @@
     ErrorCategory,
     construct_prompty_model_config,
     validate_model_config,
+    simplify_messages,
 )
 
 try:
@@ -207,6 +208,42 @@ def __call__(  # pylint: disable=docstring-missing-param
 
         return super().__call__(*args, **kwargs)
 
+    def has_context(self, eval_input: dict) -> bool:
+        """
+        Return True if eval_input contains a non-empty 'context' field.
+        Treats None, empty strings, empty lists, and lists of empty strings as no context.
+        """
+        context = eval_input.get("context", None)
+        if not context:
+            return False
+        if context == "<>":  # Special marker for no context
+            return False
+        if isinstance(context, list):
+            return any(str(c).strip() for c in context)
+        if isinstance(context, str):
+            return bool(context.strip())
+        return True
+
+    @override
+    async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
+        if "query" not in eval_input:
+            return await super()._do_eval(eval_input)
+
+        contains_context = self.has_context(eval_input)
+
+        simplified_query = simplify_messages(eval_input["query"], drop_tool_calls=contains_context)
+        simplified_response = simplify_messages(eval_input["response"], drop_tool_calls=False)
+
+        # Build simplified input
+        simplified_eval_input = {
+            "query": simplified_query,
+            "response": simplified_response,
+            "context": eval_input["context"],
+        }
+
+        # Replace and call the parent method
+        return await super()._do_eval(simplified_eval_input)
+
     async def _real_call(self, **kwargs):
         """The asynchronous call where real end-to-end evaluation logic is performed.
 
@@ -230,57 +267,73 @@ async def _real_call(self, **kwargs):
                 raise ex
 
     def _convert_kwargs_to_eval_input(self, **kwargs):
-        if "context" in kwargs or "conversation" in kwargs:
+        if kwargs.get("context") or kwargs.get("conversation"):
             return super()._convert_kwargs_to_eval_input(**kwargs)
-
         query = kwargs.get("query")
         response = kwargs.get("response")
         tool_definitions = kwargs.get("tool_definitions")
 
-        if not query or not response or not tool_definitions:
-            msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query', 'response' and 'tool_definitions' are required."
+        if (not query) or (not response):  # or not tool_definitions:
+            msg = f"{type(self).__name__}: Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response' are required."
             raise EvaluationException(
                 message=msg,
                 blame=ErrorBlame.USER_ERROR,
                 category=ErrorCategory.INVALID_VALUE,
                 target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
             )
-
         context = self._get_context_from_agent_response(response, tool_definitions)
-        if not context:
-            raise EvaluationException(
-                message=f"Context could not be extracted from agent response. Supported tools for groundedness are {self._SUPPORTED_TOOLS}. If supported tools are not used groundedness is not calculated.",
-                blame=ErrorBlame.USER_ERROR,
-                category=ErrorCategory.NOT_APPLICABLE,
-                target=ErrorTarget.GROUNDEDNESS_EVALUATOR,
-            )
 
-        return super()._convert_kwargs_to_eval_input(response=response[-1], context=context, query=query)
+        filtered_response = self._filter_file_search_results(response)
+        return super()._convert_kwargs_to_eval_input(response=filtered_response, context=context, query=query)
+
+    def _filter_file_search_results(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Filter out file_search tool results from the messages."""
+        file_search_ids = self._get_file_search_tool_call_ids(messages)
+        return [
+            msg for msg in messages if not (msg.get("role") == "tool" and msg.get("tool_call_id") in file_search_ids)
+        ]
 
     def _get_context_from_agent_response(self, response, tool_definitions):
+        """Extract context text from file_search tool results in the agent response."""
+        NO_CONTEXT = "<>"
         context = ""
         try:
             logger.debug("Extracting context from response")
             tool_calls = self._parse_tools_from_response(response=response)
-            logger.debug(f"Tool Calls parsed successfully : {tool_calls}")
-            if tool_calls:
-                for tool_call in tool_calls:
-                    if isinstance(tool_call, dict) and tool_call.get("type") == "tool_call":
-                        tool_name = tool_call.get("name")
-                        for tool in tool_definitions:
-                            if tool.get("name") == tool_name and tool.get("type") in self._SUPPORTED_TOOLS:
-                                if tool_name == "file_search":
-                                    tool_result = tool_call.get("tool_result")
-                                    if tool_result:
-                                        for result in tool_result:
-                                            content_list = result.get("content")
-                                            if content_list:
-                                                for content in content_list:
-                                                    text = content.get("text")
-                                                    if text:
-                                                        context = context + "\n" + str(text)
+            logger.debug(f"Tool Calls parsed successfully: {tool_calls}")
+
+            if not tool_calls:
+                return NO_CONTEXT
+
+            context_lines = []
+            for tool_call in tool_calls:
+                if not isinstance(tool_call, dict) or tool_call.get("type") != "tool_call":
+                    continue
+
+                tool_name = tool_call.get("name")
+                if tool_name != "file_search":
+                    continue
+
+                # Extract tool results
+                for result in tool_call.get("tool_result", []):
+                    results = result if isinstance(result, list) else [result]
+                    for r in results:
+                        file_name = r.get("file_name", "Unknown file name")
+                        for content in r.get("content", []):
+                            text = content.get("text")
+                            if text:
+                                context_lines.append(f"{file_name}:\n- {text}---\n\n")
+
+            context = "\n".join(context_lines) if len(context_lines) > 0 else None
+
         except Exception as ex:
             logger.debug(f"Error extracting context from agent response : {str(ex)}")
-            context = ""
+            context = None
+
+        context = context if context else NO_CONTEXT
+        return context
 
-        return context if context else None
+    def _get_file_search_tool_call_ids(self, query_or_response):
+        """Return a list of tool_call_ids for file search tool calls."""
+        tool_calls = self._parse_tools_from_response(query_or_response)
+        return [tc.get("tool_call_id") for tc in tool_calls if tc.get("name") == "file_search"]
Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/evaluation/azure-ai-evaluation",`
`5`		`- "Tag": "python/evaluation/azure-ai-evaluation_e9fbe5cd65"`
	`5`	`+ "Tag": "python/evaluation/azure-ai-evaluation_d7b00f22b8"`
`6`	`6`	`}`