22# Copyright (c) Microsoft Corporation. All rights reserved.
33# ---------------------------------------------------------
44import os , logging
5- from typing import Dict , List , Optional , Union
5+ from typing import Dict , List , Optional , Union , Any , Tuple
66
77from typing_extensions import overload , override
88from azure .ai .evaluation ._legacy ._adapters ._flows import AsyncPrompty
1616 ErrorCategory ,
1717 construct_prompty_model_config ,
1818 validate_model_config ,
19+ simplify_messages ,
1920)
2021
2122try :
@@ -207,6 +208,42 @@ def __call__( # pylint: disable=docstring-missing-param
207208
208209 return super ().__call__ (* args , ** kwargs )
209210
211+ def has_context (self , eval_input : dict ) -> bool :
212+ """
213+ Return True if eval_input contains a non-empty 'context' field.
214+ Treats None, empty strings, empty lists, and lists of empty strings as no context.
215+ """
216+ context = eval_input .get ("context" , None )
217+ if not context :
218+ return False
219+ if context == "<>" : # Special marker for no context
220+ return False
221+ if isinstance (context , list ):
222+ return any (str (c ).strip () for c in context )
223+ if isinstance (context , str ):
224+ return bool (context .strip ())
225+ return True
226+
227+ @override
228+ async def _do_eval (self , eval_input : Dict ) -> Dict [str , Union [float , str ]]:
229+ if "query" not in eval_input :
230+ return await super ()._do_eval (eval_input )
231+
232+ contains_context = self .has_context (eval_input )
233+
234+ simplified_query = simplify_messages (eval_input ["query" ], drop_tool_calls = contains_context )
235+ simplified_response = simplify_messages (eval_input ["response" ], drop_tool_calls = False )
236+
237+ # Build simplified input
238+ simplified_eval_input = {
239+ "query" : simplified_query ,
240+ "response" : simplified_response ,
241+ "context" : eval_input ["context" ],
242+ }
243+
244+ # Replace and call the parent method
245+ return await super ()._do_eval (simplified_eval_input )
246+
210247 async def _real_call (self , ** kwargs ):
211248 """The asynchronous call where real end-to-end evaluation logic is performed.
212249
@@ -230,57 +267,73 @@ async def _real_call(self, **kwargs):
230267 raise ex
231268
232269 def _convert_kwargs_to_eval_input (self , ** kwargs ):
233- if "context" in kwargs or "conversation" in kwargs :
270+ if kwargs . get ( "context" ) or kwargs . get ( "conversation" ) :
234271 return super ()._convert_kwargs_to_eval_input (** kwargs )
235-
236272 query = kwargs .get ("query" )
237273 response = kwargs .get ("response" )
238274 tool_definitions = kwargs .get ("tool_definitions" )
239275
240- if not query or not response or not tool_definitions :
241- msg = f"{ type (self ).__name__ } : Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query', 'response' and 'tool_definitions ' are required."
276+ if ( not query ) or ( not response ): # or not tool_definitions:
277+ msg = f"{ type (self ).__name__ } : Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response ' are required."
242278 raise EvaluationException (
243279 message = msg ,
244280 blame = ErrorBlame .USER_ERROR ,
245281 category = ErrorCategory .INVALID_VALUE ,
246282 target = ErrorTarget .GROUNDEDNESS_EVALUATOR ,
247283 )
248-
249284 context = self ._get_context_from_agent_response (response , tool_definitions )
250- if not context :
251- raise EvaluationException (
252- message = f"Context could not be extracted from agent response. Supported tools for groundedness are { self ._SUPPORTED_TOOLS } . If supported tools are not used groundedness is not calculated." ,
253- blame = ErrorBlame .USER_ERROR ,
254- category = ErrorCategory .NOT_APPLICABLE ,
255- target = ErrorTarget .GROUNDEDNESS_EVALUATOR ,
256- )
257285
258- return super ()._convert_kwargs_to_eval_input (response = response [- 1 ], context = context , query = query )
286+ filtered_response = self ._filter_file_search_results (response )
287+ return super ()._convert_kwargs_to_eval_input (response = filtered_response , context = context , query = query )
288+
289+ def _filter_file_search_results (self , messages : List [Dict [str , Any ]]) -> List [Dict [str , Any ]]:
290+ """Filter out file_search tool results from the messages."""
291+ file_search_ids = self ._get_file_search_tool_call_ids (messages )
292+ return [
293+ msg for msg in messages if not (msg .get ("role" ) == "tool" and msg .get ("tool_call_id" ) in file_search_ids )
294+ ]
259295
260296 def _get_context_from_agent_response (self , response , tool_definitions ):
297+ """Extract context text from file_search tool results in the agent response."""
298+ NO_CONTEXT = "<>"
261299 context = ""
262300 try :
263301 logger .debug ("Extracting context from response" )
264302 tool_calls = self ._parse_tools_from_response (response = response )
265- logger .debug (f"Tool Calls parsed successfully : { tool_calls } " )
266- if tool_calls :
267- for tool_call in tool_calls :
268- if isinstance (tool_call , dict ) and tool_call .get ("type" ) == "tool_call" :
269- tool_name = tool_call .get ("name" )
270- for tool in tool_definitions :
271- if tool .get ("name" ) == tool_name and tool .get ("type" ) in self ._SUPPORTED_TOOLS :
272- if tool_name == "file_search" :
273- tool_result = tool_call .get ("tool_result" )
274- if tool_result :
275- for result in tool_result :
276- content_list = result .get ("content" )
277- if content_list :
278- for content in content_list :
279- text = content .get ("text" )
280- if text :
281- context = context + "\n " + str (text )
303+ logger .debug (f"Tool Calls parsed successfully: { tool_calls } " )
304+
305+ if not tool_calls :
306+ return NO_CONTEXT
307+
308+ context_lines = []
309+ for tool_call in tool_calls :
310+ if not isinstance (tool_call , dict ) or tool_call .get ("type" ) != "tool_call" :
311+ continue
312+
313+ tool_name = tool_call .get ("name" )
314+ if tool_name != "file_search" :
315+ continue
316+
317+ # Extract tool results
318+ for result in tool_call .get ("tool_result" , []):
319+ results = result if isinstance (result , list ) else [result ]
320+ for r in results :
321+ file_name = r .get ("file_name" , "Unknown file name" )
322+ for content in r .get ("content" , []):
323+ text = content .get ("text" )
324+ if text :
325+ context_lines .append (f"{ file_name } :\n - { text } ---\n \n " )
326+
327+ context = "\n " .join (context_lines ) if len (context_lines ) > 0 else None
328+
282329 except Exception as ex :
283330 logger .debug (f"Error extracting context from agent response : { str (ex )} " )
284- context = ""
331+ context = None
332+
333+ context = context if context else NO_CONTEXT
334+ return context
285335
286- return context if context else None
336+ def _get_file_search_tool_call_ids (self , query_or_response ):
337+ """Return a list of tool_call_ids for file search tool calls."""
338+ tool_calls = self ._parse_tools_from_response (query_or_response )
339+ return [tc .get ("tool_call_id" ) for tc in tool_calls if tc .get ("name" ) == "file_search" ]
0 commit comments