Skip to content

Commit 23ec728

Browse files
committed
use repr(value) to handle more robustly the conversion. This solves edge cases hlighted in the tests
1 parent 2ff844d commit 23ec728

File tree

2 files changed

+66
-17
lines changed

2 files changed

+66
-17
lines changed

src/agentlab/llm/response_api.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,7 @@ def _parse_response(self, response: dict) -> dict:
360360
interesting_keys = ["output_text"]
361361
for output in response.output:
362362
if output.type == "function_call":
363-
arguments = json.loads(output.arguments)
364-
func_args_str = ", ".join(
365-
[
366-
f'{k}="{v}"' if isinstance(v, str) else f"{k}={v}"
367-
for k, v in arguments.items()
368-
]
369-
)
370-
result.action = f"{output.name}({func_args_str})"
363+
result.action = tool_call_to_python_code(output.name, json.loads(output.arguments))
371364
result.tool_calls = output
372365
break
373366
elif output.type == "reasoning":
@@ -603,13 +596,7 @@ def _parse_response(self, response: dict) -> dict:
603596
)
604597
for output in response.content:
605598
if output.type == "tool_use":
606-
func_args_str = ", ".join(
607-
[
608-
f'{k}="{v}"' if isinstance(v, str) else f"{k}={v}"
609-
for k, v in output.input.items()
610-
]
611-
)
612-
result.action = f"{output.name}({func_args_str})"
599+
result.action = tool_call_to_python_code(output.name, output.input)
613600
elif output.type == "text":
614601
result.think += output.text
615602
return result
@@ -736,3 +723,15 @@ def make_model(self, extra_kwargs=None, **kwargs):
736723

737724
def get_message_builder(self) -> MessageBuilder:
738725
return OpenAIChatCompletionAPIMessageBuilder
726+
727+
728+
def tool_call_to_python_code(func_name, kwargs):
729+
"""Format a function name and kwargs dict into a Python function call string."""
730+
if kwargs is None:
731+
kwargs = {}
732+
733+
if not kwargs:
734+
return f"{func_name}()"
735+
736+
args_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
737+
return f"{func_name}({args_str})"

tests/llm/test_response_api.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def test_claude_response_model_parse_and_cost():
299299
content for content in parsed_output.raw_response.content if content.type == "tool_use"
300300
]
301301
assert "Thinking about the request." in parsed_output.think
302-
assert parsed_output.action == 'search_web(query="latest news")'
302+
assert parsed_output.action == "search_web(query='latest news')"
303303
assert fn_calls[0].id == "tool_abc"
304304
assert global_tracker.stats["input_tokens"] == 40
305305
assert global_tracker.stats["output_tokens"] == 20
@@ -348,7 +348,7 @@ def test_openai_response_model_parse_and_cost():
348348
fn_calls = [
349349
content for content in parsed_output.raw_response.output if content.type == "function_call"
350350
]
351-
assert parsed_output.action == 'get_current_weather(location="Boston, MA", unit="celsius")'
351+
assert parsed_output.action == "get_current_weather(location='Boston, MA', unit='celsius')"
352352
assert fn_calls[0].call_id == "call_abc123"
353353
assert parsed_output.raw_response == mock_api_resp
354354
assert global_tracker.stats["input_tokens"] == 70
@@ -716,3 +716,53 @@ def test_claude_model_with_multiple_messages_pricy_call():
716716
# TODO: Add tests for image token costing (this is complex and model-specific)
717717
# - For OpenAI, you'd need to know how they bill for images (e.g., fixed cost per image + tokens for text parts)
718718
# - You'd likely need to mock the response from client.chat.completions.create to include specific usage for images.
719+
720+
721+
EDGE_CASES = [
722+
# 1. Empty kwargs dict
723+
("valid_function", {}, "valid_function()"),
724+
# 2. Kwargs with problematic string values (quotes, escapes, unicode)
725+
(
726+
"send_message",
727+
{
728+
"text": 'He said "Hello!" and used a backslash: \\',
729+
"unicode": "Café naïve résumé 🚀",
730+
"newlines": "Line1\nLine2\tTabbed",
731+
},
732+
"send_message(text='He said \"Hello!\" and used a backslash: \\\\', unicode='Café naïve résumé 🚀', newlines='Line1\\nLine2\\tTabbed')",
733+
),
734+
# 3. Mixed types including problematic float values
735+
(
736+
"complex_call",
737+
{
738+
"infinity": float("inf"),
739+
"nan": float("nan"),
740+
"negative_zero": -0.0,
741+
"scientific": 1.23e-45,
742+
},
743+
"complex_call(infinity=inf, nan=nan, negative_zero=-0.0, scientific=1.23e-45)",
744+
),
745+
# 4. Deeply nested structures that could stress repr()
746+
(
747+
"process_data",
748+
{
749+
"nested": {"level1": {"level2": {"level3": [1, 2, {"deep": True}]}}},
750+
"circular_ref_like": {"a": {"b": {"c": "back_to_start"}}},
751+
},
752+
"process_data(nested={'level1': {'level2': {'level3': [1, 2, {'deep': True}]}}}, circular_ref_like={'a': {'b': {'c': 'back_to_start'}}})",
753+
),
754+
]
755+
756+
757+
def test_tool_call_to_python_code():
758+
from agentlab.llm.response_api import tool_call_to_python_code
759+
760+
for edge_case in EDGE_CASES:
761+
func_name, kwargs, expected = edge_case
762+
result = tool_call_to_python_code(func_name, kwargs)
763+
print(result)
764+
assert result == expected, f"Expected {expected} but got {result}"
765+
766+
767+
if __name__ == "__main__":
768+
test_tool_call_to_python_code()

0 commit comments

Comments
 (0)