@@ -299,7 +299,7 @@ def test_claude_response_model_parse_and_cost():
299299 content for content in parsed_output .raw_response .content if content .type == "tool_use"
300300 ]
301301 assert "Thinking about the request." in parsed_output .think
302- assert parsed_output .action == ' search_web(query=" latest news")'
302+ assert parsed_output .action == " search_web(query=' latest news')"
303303 assert fn_calls [0 ].id == "tool_abc"
304304 assert global_tracker .stats ["input_tokens" ] == 40
305305 assert global_tracker .stats ["output_tokens" ] == 20
@@ -348,7 +348,7 @@ def test_openai_response_model_parse_and_cost():
348348 fn_calls = [
349349 content for content in parsed_output .raw_response .output if content .type == "function_call"
350350 ]
351- assert parsed_output .action == ' get_current_weather(location=" Boston, MA" , unit=" celsius")'
351+ assert parsed_output .action == " get_current_weather(location=' Boston, MA' , unit=' celsius')"
352352 assert fn_calls [0 ].call_id == "call_abc123"
353353 assert parsed_output .raw_response == mock_api_resp
354354 assert global_tracker .stats ["input_tokens" ] == 70
@@ -716,3 +716,53 @@ def test_claude_model_with_multiple_messages_pricy_call():
716716# TODO: Add tests for image token costing (this is complex and model-specific)
717717# - For OpenAI, you'd need to know how they bill for images (e.g., fixed cost per image + tokens for text parts)
718718# - You'd likely need to mock the response from client.chat.completions.create to include specific usage for images.
719+
720+
721+ EDGE_CASES = [
722+ # 1. Empty kwargs dict
723+ ("valid_function" , {}, "valid_function()" ),
724+ # 2. Kwargs with problematic string values (quotes, escapes, unicode)
725+ (
726+ "send_message" ,
727+ {
728+ "text" : 'He said "Hello!" and used a backslash: \\ ' ,
729+ "unicode" : "Café naïve résumé 🚀" ,
730+ "newlines" : "Line1\n Line2\t Tabbed" ,
731+ },
732+ "send_message(text='He said \" Hello!\" and used a backslash: \\ \\ ', unicode='Café naïve résumé 🚀', newlines='Line1\\ nLine2\\ tTabbed')" ,
733+ ),
734+ # 3. Mixed types including problematic float values
735+ (
736+ "complex_call" ,
737+ {
738+ "infinity" : float ("inf" ),
739+ "nan" : float ("nan" ),
740+ "negative_zero" : - 0.0 ,
741+ "scientific" : 1.23e-45 ,
742+ },
743+ "complex_call(infinity=inf, nan=nan, negative_zero=-0.0, scientific=1.23e-45)" ,
744+ ),
745+ # 4. Deeply nested structures that could stress repr()
746+ (
747+ "process_data" ,
748+ {
749+ "nested" : {"level1" : {"level2" : {"level3" : [1 , 2 , {"deep" : True }]}}},
750+ "circular_ref_like" : {"a" : {"b" : {"c" : "back_to_start" }}},
751+ },
752+ "process_data(nested={'level1': {'level2': {'level3': [1, 2, {'deep': True}]}}}, circular_ref_like={'a': {'b': {'c': 'back_to_start'}}})" ,
753+ ),
754+ ]
755+
756+
757+ def test_tool_call_to_python_code ():
758+ from agentlab .llm .response_api import tool_call_to_python_code
759+
760+ for edge_case in EDGE_CASES :
761+ func_name , kwargs , expected = edge_case
762+ result = tool_call_to_python_code (func_name , kwargs )
763+ print (result )
764+ assert result == expected , f"Expected { expected } but got { result } "
765+
766+
767+ if __name__ == "__main__" :
768+ test_tool_call_to_python_code ()
0 commit comments