@@ -494,14 +494,17 @@ def _extract_text_from_content(content):
494494 return text
495495
496496
497- def _get_conversation_history (query ):
497+ def _get_conversation_history (query , include_system_messages = False ):
498498 all_user_queries = []
499499 cur_user_query = []
500500 all_agent_responses = []
501501 cur_agent_response = []
502+ system_message = None
502503 for msg in query :
503504 if not "role" in msg :
504505 continue
506+ if include_system_messages and msg ["role" ] == "system" and "content" in msg :
507+ system_message = msg .get ("content" , "" )
505508 if msg ["role" ] == "user" and "content" in msg :
506509 if cur_agent_response != []:
507510 all_agent_responses .append (cur_agent_response )
@@ -530,13 +533,18 @@ def _get_conversation_history(query):
530533 category = ErrorCategory .INVALID_VALUE ,
531534 blame = ErrorBlame .USER_ERROR ,
532535 )
533-
534- return {"user_queries" : all_user_queries , "agent_responses" : all_agent_responses }
536+ result = {"user_queries" : all_user_queries , "agent_responses" : all_agent_responses }
537+ if include_system_messages :
538+ result ["system_message" ] = system_message
539+ return result
535540
536541
537542def _pretty_format_conversation_history (conversation_history ):
538543 """Formats the conversation history for better readability."""
539544 formatted_history = ""
545+ if "system_message" in conversation_history and conversation_history ["system_message" ] is not None :
546+ formatted_history += "SYSTEM_PROMPT:\n "
547+ formatted_history += " " + conversation_history ["system_message" ] + "\n \n "
540548 for i , (user_query , agent_response ) in enumerate (
541549 zip (conversation_history ["user_queries" ], conversation_history ["agent_responses" ] + [None ])
542550 ):
@@ -552,10 +560,10 @@ def _pretty_format_conversation_history(conversation_history):
552560 return formatted_history
553561
554562
555- def reformat_conversation_history (query , logger = None ):
563+ def reformat_conversation_history (query , logger = None , include_system_messages = False ):
556564 """Reformats the conversation history to a more compact representation."""
557565 try :
558- conversation_history = _get_conversation_history (query )
566+ conversation_history = _get_conversation_history (query , include_system_messages = include_system_messages )
559567 return _pretty_format_conversation_history (conversation_history )
560568 except :
561569 # If the conversation history cannot be parsed for whatever reason (e.g. the converter format changed), the original query is returned
@@ -570,22 +578,53 @@ def reformat_conversation_history(query, logger=None):
570578 return query
571579
572580
573- def _get_agent_response (agent_response_msgs ):
574- """Extracts the text from the agent response content ."""
581+ def _get_agent_response (agent_response_msgs , include_tool_messages = False ):
582+ """Extracts formatted agent response including text, and optionally tool calls/results ."""
575583 agent_response_text = []
584+ tool_results = {}
585+
586+ # First pass: collect tool results
587+ if include_tool_messages :
588+ for msg in agent_response_msgs :
589+ if msg .get ("role" ) == "tool" and "tool_call_id" in msg :
590+ for content in msg .get ("content" , []):
591+ if content .get ("type" ) == "tool_result" :
592+ result = content .get ("tool_result" )
593+ tool_results [msg ["tool_call_id" ]] = f"[TOOL_RESULT] { result } "
594+
595+ # Second pass: parse assistant messages and tool calls
576596 for msg in agent_response_msgs :
577- if "role" in msg and msg [ "role" ] == "assistant" and "content" in msg :
597+ if "role" in msg and msg . get ( "role" ) == "assistant" and "content" in msg :
578598 text = _extract_text_from_content (msg ["content" ])
579599 if text :
580600 agent_response_text .extend (text )
601+ if include_tool_messages :
602+ for content in msg .get ("content" , []):
603+ # Todo: Verify if this is the correct way to handle tool calls
604+ if content .get ("type" ) == "tool_call" :
605+ if "tool_call" in content and "function" in content .get ("tool_call" , {}):
606+ tc = content .get ("tool_call" , {})
607+ func_name = tc .get ("function" , {}).get ("name" , "" )
608+ args = tc .get ("function" , {}).get ("arguments" , {})
609+ tool_call_id = tc .get ("id" )
610+ else :
611+ tool_call_id = content .get ("tool_call_id" )
612+ func_name = content .get ("name" , "" )
613+ args = content .get ("arguments" , {})
614+ args_str = ", " .join (f'{ k } ="{ v } "' for k , v in args .items ())
615+ call_line = f"[TOOL_CALL] { func_name } ({ args_str } )"
616+ agent_response_text .append (call_line )
617+ if tool_call_id in tool_results :
618+ agent_response_text .append (tool_results [tool_call_id ])
619+
581620 return agent_response_text
582621
583622
584- def reformat_agent_response (response , logger = None ):
623+ def reformat_agent_response (response , logger = None , include_tool_messages = False ):
585624 try :
586625 if response is None or response == []:
587626 return ""
588- agent_response = _get_agent_response (response )
627+ agent_response = _get_agent_response (response , include_tool_messages = include_tool_messages )
589628 if agent_response == []:
590629 # If no message could be extracted, likely the format changed, fallback to the original response in that case
591630 if logger :
@@ -602,6 +641,26 @@ def reformat_agent_response(response, logger=None):
602641 return response
603642
604643
644+ def reformat_tool_definitions (tool_definitions , logger = None ):
645+ try :
646+ output_lines = ["TOOL_DEFINITIONS:" ]
647+ for tool in tool_definitions :
648+ name = tool .get ("name" , "unnamed_tool" )
649+ desc = tool .get ("description" , "" ).strip ()
650+ params = tool .get ("parameters" , {}).get ("properties" , {})
651+ param_names = ", " .join (params .keys ()) if params else "no parameters"
652+ output_lines .append (f"- { name } : { desc } (inputs: { param_names } )" )
653+ return "\n " .join (output_lines )
654+ except Exception as e :
655+ # If the tool definitions cannot be parsed for whatever reason, the original tool definitions are returned
656+ # This is a fallback to ensure that the evaluation can still proceed. See comments on reformat_conversation_history for more details.
657+ if logger :
658+ logger .warning (
659+ f"Tool definitions could not be parsed, falling back to original definitions: { tool_definitions } "
660+ )
661+ return tool_definitions
662+
663+
605664def upload (path : str , container_client : ContainerClient , logger = None ):
606665 """Upload files or directories to Azure Blob Storage using a container client.
607666
0 commit comments