22# Copyright (c) Microsoft Corporation. All rights reserved.
33# ---------------------------------------------------------
44import os , logging
5- from typing import Dict , List , Optional , Union
5+ from typing import Dict , List , Optional , Union , Any , Tuple
66
77from typing_extensions import overload , override
88from azure .ai .evaluation ._legacy ._adapters ._flows import AsyncPrompty
1616 ErrorCategory ,
1717 construct_prompty_model_config ,
1818 validate_model_config ,
19+ simplify_messages ,
1920)
2021
2122try :
@@ -213,6 +214,42 @@ def __call__( # pylint: disable=docstring-missing-param
213214
214215 return super ().__call__ (* args , ** kwargs )
215216
217+ def has_context (self , eval_input : dict ) -> bool :
218+ """
219+ Return True if eval_input contains a non-empty 'context' field.
220+ Treats None, empty strings, empty lists, and lists of empty strings as no context.
221+ """
222+ context = eval_input .get ("context" , None )
223+ if not context :
224+ return False
225+ if context == "<>" : # Special marker for no context
226+ return False
227+ if isinstance (context , list ):
228+ return any (str (c ).strip () for c in context )
229+ if isinstance (context , str ):
230+ return bool (context .strip ())
231+ return True
232+
233+ @override
234+ async def _do_eval (self , eval_input : Dict ) -> Dict [str , Union [float , str ]]:
235+ if "query" not in eval_input :
236+ return await super ()._do_eval (eval_input )
237+
238+ contains_context = self .has_context (eval_input )
239+
240+ simplified_query = simplify_messages (eval_input ["query" ], drop_tool_calls = contains_context )
241+ simplified_response = simplify_messages (eval_input ["response" ], drop_tool_calls = False )
242+
243+ # Build simplified input
244+ simplified_eval_input = {
245+ "query" : simplified_query ,
246+ "response" : simplified_response ,
247+ "context" : eval_input ["context" ],
248+ }
249+
250+ # Replace and call the parent method
251+ return await super ()._do_eval (simplified_eval_input )
252+
216253 async def _real_call (self , ** kwargs ):
217254 """The asynchronous call where real end-to-end evaluation logic is performed.
218255
@@ -236,57 +273,73 @@ async def _real_call(self, **kwargs):
236273 raise ex
237274
238275 def _convert_kwargs_to_eval_input (self , ** kwargs ):
239- if "context" in kwargs or "conversation" in kwargs :
276+ if kwargs . get ( "context" ) or kwargs . get ( "conversation" ) :
240277 return super ()._convert_kwargs_to_eval_input (** kwargs )
241-
242278 query = kwargs .get ("query" )
243279 response = kwargs .get ("response" )
244280 tool_definitions = kwargs .get ("tool_definitions" )
245281
246- if not query or not response or not tool_definitions :
247- msg = f"{ type (self ).__name__ } : Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query', 'response' and 'tool_definitions ' are required."
282+ if ( not query ) or ( not response ): # or not tool_definitions:
283+ msg = f"{ type (self ).__name__ } : Either 'conversation' or individual inputs must be provided. For Agent groundedness 'query' and 'response ' are required."
248284 raise EvaluationException (
249285 message = msg ,
250286 blame = ErrorBlame .USER_ERROR ,
251287 category = ErrorCategory .INVALID_VALUE ,
252288 target = ErrorTarget .GROUNDEDNESS_EVALUATOR ,
253289 )
254-
255290 context = self ._get_context_from_agent_response (response , tool_definitions )
256- if not context :
257- raise EvaluationException (
258- message = f"Context could not be extracted from agent response. Supported tools for groundedness are { self ._SUPPORTED_TOOLS } . If supported tools are not used groundedness is not calculated." ,
259- blame = ErrorBlame .USER_ERROR ,
260- category = ErrorCategory .NOT_APPLICABLE ,
261- target = ErrorTarget .GROUNDEDNESS_EVALUATOR ,
262- )
263291
264- return super ()._convert_kwargs_to_eval_input (response = response [- 1 ], context = context , query = query )
292+ filtered_response = self ._filter_file_search_results (response )
293+ return super ()._convert_kwargs_to_eval_input (response = filtered_response , context = context , query = query )
294+
295+ def _filter_file_search_results (self , messages : List [Dict [str , Any ]]) -> List [Dict [str , Any ]]:
296+ """Filter out file_search tool results from the messages."""
297+ file_search_ids = self ._get_file_search_tool_call_ids (messages )
298+ return [
299+ msg for msg in messages if not (msg .get ("role" ) == "tool" and msg .get ("tool_call_id" ) in file_search_ids )
300+ ]
265301
266302 def _get_context_from_agent_response (self , response , tool_definitions ):
303+ """Extract context text from file_search tool results in the agent response."""
304+ NO_CONTEXT = "<>"
267305 context = ""
268306 try :
269307 logger .debug ("Extracting context from response" )
270308 tool_calls = self ._parse_tools_from_response (response = response )
271- logger .debug (f"Tool Calls parsed successfully : { tool_calls } " )
272- if tool_calls :
273- for tool_call in tool_calls :
274- if isinstance (tool_call , dict ) and tool_call .get ("type" ) == "tool_call" :
275- tool_name = tool_call .get ("name" )
276- for tool in tool_definitions :
277- if tool .get ("name" ) == tool_name and tool .get ("type" ) in self ._SUPPORTED_TOOLS :
278- if tool_name == "file_search" :
279- tool_result = tool_call .get ("tool_result" )
280- if tool_result :
281- for result in tool_result :
282- content_list = result .get ("content" )
283- if content_list :
284- for content in content_list :
285- text = content .get ("text" )
286- if text :
287- context = context + "\n " + str (text )
309+ logger .debug (f"Tool Calls parsed successfully: { tool_calls } " )
310+
311+ if not tool_calls :
312+ return NO_CONTEXT
313+
314+ context_lines = []
315+ for tool_call in tool_calls :
316+ if not isinstance (tool_call , dict ) or tool_call .get ("type" ) != "tool_call" :
317+ continue
318+
319+ tool_name = tool_call .get ("name" )
320+ if tool_name != "file_search" :
321+ continue
322+
323+ # Extract tool results
324+ for result in tool_call .get ("tool_result" , []):
325+ results = result if isinstance (result , list ) else [result ]
326+ for r in results :
327+ file_name = r .get ("file_name" , "Unknown file name" )
328+ for content in r .get ("content" , []):
329+ text = content .get ("text" )
330+ if text :
331+ context_lines .append (f"{ file_name } :\n - { text } ---\n \n " )
332+
333+ context = "\n " .join (context_lines ) if len (context_lines ) > 0 else None
334+
288335 except Exception as ex :
289336 logger .debug (f"Error extracting context from agent response : { str (ex )} " )
290- context = ""
337+ context = None
338+
339+ context = context if context else NO_CONTEXT
340+ return context
291341
292- return context if context else None
342+ def _get_file_search_tool_call_ids (self , query_or_response ):
343+ """Return a list of tool_call_ids for file search tool calls."""
344+ tool_calls = self ._parse_tools_from_response (query_or_response )
345+ return [tc .get ("tool_call_id" ) for tc in tool_calls if tc .get ("name" ) == "file_search" ]
0 commit comments