Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions mesa_llm/llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ async def aapply_plan(self, plan: Plan) -> list[dict]:
await self.memory.aadd_to_memory(
type="action",
content={
k: v
for tool_call in tool_call_resp
for k, v in tool_call.items()
if k not in ["tool_call_id", "role"]
"tool_calls": [
{k: v for k, v in tc.items() if k not in ["tool_call_id", "role"]}
for tc in tool_call_resp
]
},
)

Expand All @@ -117,10 +117,10 @@ def apply_plan(self, plan: Plan) -> list[dict]:
self.memory.add_to_memory(
type="action",
content={
k: v
for tool_call in tool_call_resp
for k, v in tool_call.items()
if k not in ["tool_call_id", "role"]
"tool_calls": [
{k: v for k, v in tc.items() if k not in ["tool_call_id", "role"]}
for tc in tool_call_resp
]
},
)

Expand Down
20 changes: 20 additions & 0 deletions mesa_llm/memory/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ def format_nested_dict(data, indent_level=0):
if isinstance(value, dict):
lines.append(f"{indent}[blue]└──[/blue] [cyan]{key} :[/cyan]")
lines.extend(format_nested_dict(value, indent_level + 1))
elif isinstance(value, list):
lines.append(f"{indent}[blue]└──[/blue] [cyan]{key} :[/cyan]")
next_indent = " " * (indent_level + 1)
for i, item in enumerate(value):
if isinstance(item, dict):
lines.append(
f"{next_indent}[blue]├──[/blue] [cyan]({i + 1})[/cyan]"
)
lines.extend(format_nested_dict(item, indent_level + 2))
else:
lines.append(
f"{next_indent}[blue]├──[/blue] [cyan]{item}[/cyan]"
)
else:
lines.append(
f"{indent}[blue]└──[/blue] [cyan]{key} : [/cyan]{value}"
Expand All @@ -50,6 +63,13 @@ def format_nested_dict(data, indent_level=0):
lines.append(f"\n[bold cyan][{key.title()}][/bold cyan]")
if isinstance(value, dict):
lines.extend(format_nested_dict(value, 1))
elif isinstance(value, list):
for i, item in enumerate(value):
if isinstance(item, dict):
lines.append(f" [blue]├──[/blue] [cyan]({i + 1})[/cyan]")
lines.extend(format_nested_dict(item, 2))
else:
lines.append(f" [blue]├──[/blue] [cyan]{item}[/cyan]")
Comment on lines +66 to +72
Copy link

Copilot AI Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The list handling added here only applies when a list is a direct value at the top level of MemoryEntry.content. However, in the actual data path from apply_plan(), the memory entry content will be {"action": {"tool_calls": [...]}} — the list is nested inside the "tool_calls" key of a dict. When format_nested_dict encounters this list, it falls through to the else branch (line 39-41) and renders it as a raw Python list string, not using the new tree formatting.

To properly handle this, format_nested_dict should also handle list values, similar to the handling added at lines 53-59. Otherwise the nice tree display only works for the synthetic test case, not the real data structure produced by the fix in llm_agent.py.

Copilot uses AI. Check for mistakes.
else:
lines.append(f" [blue]└──[/blue] [cyan]{value} :[/cyan]")

Expand Down
125 changes: 119 additions & 6 deletions tests/test_llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,125 @@ def add_agent(self, pos):

assert resp == fake_response

assert {
"tool": "foo",
"argument": "bar",
} in agent.memory.step_content.values() or agent.memory.step_content == {
"tool": "foo",
"argument": "bar",
action_content = agent.memory.step_content.get("action")
assert action_content is not None
assert "tool_calls" in action_content
assert len(action_content["tool_calls"]) == 1
assert action_content["tool_calls"][0] == {"tool": "foo", "argument": "bar"}


def test_apply_plan_preserves_multiple_tool_calls(monkeypatch):
"""All tool call results must be preserved when the LLM returns >1 tool call."""
monkeypatch.setenv("GEMINI_API_KEY", "dummy")

class DummyModel(Model):
def __init__(self):
super().__init__(seed=42)
self.grid = MultiGrid(5, 5, torus=False)

model = DummyModel()
agent = LLMAgent.create_agents(
model,
n=1,
reasoning=ReActReasoning,
system_prompt="test",
vision=-1,
internal_state=["test_state"],
).to_list()[0]
model.grid.place_agent(agent, (1, 1))
agent.memory = ShortTermMemory(agent=agent, n=5, display=False)

fake_response = [
{
"tool_call_id": "1",
"role": "tool",
"name": "move_one_step",
"response": "agent moved to (3, 4)",
},
{
"tool_call_id": "2",
"role": "tool",
"name": "arrest_citizen",
"response": "Citizen 12 arrested",
},
]
monkeypatch.setattr(
agent.tool_manager, "call_tools", lambda agent, llm_response: fake_response
)

plan = Plan(step=0, llm_plan="do something")
agent.apply_plan(plan)

action_content = agent.memory.step_content.get("action")
assert action_content is not None
assert "tool_calls" in action_content
assert len(action_content["tool_calls"]) == 2
assert action_content["tool_calls"][0] == {
"name": "move_one_step",
"response": "agent moved to (3, 4)",
}
assert action_content["tool_calls"][1] == {
"name": "arrest_citizen",
"response": "Citizen 12 arrested",
}


@pytest.mark.asyncio
async def test_aapply_plan_preserves_multiple_tool_calls(monkeypatch):
"""Async variant: all tool call results must be preserved."""
monkeypatch.setenv("GEMINI_API_KEY", "dummy")

class DummyModel(Model):
def __init__(self):
super().__init__(seed=42)
self.grid = MultiGrid(5, 5, torus=False)

model = DummyModel()
agent = LLMAgent.create_agents(
model,
n=1,
reasoning=ReActReasoning,
system_prompt="test",
vision=-1,
internal_state=["test_state"],
).to_list()[0]
model.grid.place_agent(agent, (1, 1))
agent.memory = ShortTermMemory(agent=agent, n=5, display=False)

fake_response = [
{
"tool_call_id": "1",
"role": "tool",
"name": "move_one_step",
"response": "agent moved to (3, 4)",
},
{
"tool_call_id": "2",
"role": "tool",
"name": "arrest_citizen",
"response": "Citizen 12 arrested",
},
]

async def fake_acall_tools(agent, llm_response):
return fake_response

monkeypatch.setattr(agent.tool_manager, "acall_tools", fake_acall_tools)

plan = Plan(step=0, llm_plan="do something")
await agent.aapply_plan(plan)

action_content = agent.memory.step_content.get("action")
assert action_content is not None
assert "tool_calls" in action_content
assert len(action_content["tool_calls"]) == 2
assert action_content["tool_calls"][0] == {
"name": "move_one_step",
"response": "agent moved to (3, 4)",
}
assert action_content["tool_calls"][1] == {
"name": "arrest_citizen",
"response": "Citizen 12 arrested",
}


Expand Down
39 changes: 39 additions & 0 deletions tests/test_memory/test_memory_parent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,45 @@ def test_memory_entry_str(self):
assert "Test content" in str_repr
assert "observation" in str_repr

def test_memory_entry_str_with_list_of_dicts(self):
"""Test MemoryEntry string representation with list values (e.g. tool_calls)."""
mock_agent = Mock()
content = {
"action": [
{"name": "move_one_step", "response": "moved"},
{"name": "arrest_citizen", "response": "arrested"},
]
}
entry = MemoryEntry(content=content, step=1, agent=mock_agent)
str_repr = str(entry)
assert "move_one_step" in str_repr
assert "arrest_citizen" in str_repr

Copy link

Copilot AI Mar 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test uses content = {"action": [list of dicts]} where the list is a direct value of the "action" key. However, the actual memory entry produced by apply_plan() will have the structure {"action": {"tool_calls": [list of dicts]}} — the list is nested one level deeper. This means the test exercises the new elif isinstance(value, list) branch in __str__() but does not cover the actual data path, where the list is inside format_nested_dict and would fall through to the else (plain value) branch. Consider adding a test with the real structure {"action": {"tool_calls": [...]}} to verify the display works end-to-end.

Suggested change
def test_memory_entry_str_with_nested_tool_calls_list(self):
"""Test MemoryEntry string representation with nested tool_calls list under action."""
mock_agent = Mock()
content = {
"action": {
"tool_calls": [
{"name": "move_one_step", "response": "moved"},
{"name": "arrest_citizen", "response": "arrested"},
]
}
}
entry = MemoryEntry(content=content, step=1, agent=mock_agent)
str_repr = str(entry)
assert "move_one_step" in str_repr
assert "arrest_citizen" in str_repr

Copilot uses AI. Check for mistakes.
def test_memory_entry_str_with_list_of_strings(self):
"""Test MemoryEntry string representation with a list of plain strings."""
mock_agent = Mock()
content = {"tags": ["alpha", "beta"]}
entry = MemoryEntry(content=content, step=1, agent=mock_agent)
str_repr = str(entry)
assert "alpha" in str_repr
assert "beta" in str_repr

def test_memory_entry_str_with_nested_tool_calls_list(self):
"""Test MemoryEntry string representation with nested tool_calls list under action."""
mock_agent = Mock()
content = {
"action": {
"tool_calls": [
{"name": "move_one_step", "response": "moved"},
{"name": "arrest_citizen", "response": "arrested"},
]
}
}
entry = MemoryEntry(content=content, step=1, agent=mock_agent)
str_repr = str(entry)
assert "move_one_step" in str_repr
assert "arrest_citizen" in str_repr


class MemoryMock(Memory):
def __init__(
Expand Down
Loading