Skip to content

Commit 775a626

Browse files
committed
feat: add structured decision reasoning
1 parent 86f4d04 commit 775a626

File tree

2 files changed

+448
-0
lines changed

2 files changed

+448
-0
lines changed

mesa_llm/reasoning/decision.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import json
2+
from typing import TYPE_CHECKING
3+
4+
from pydantic import BaseModel, Field
5+
6+
from mesa_llm.reasoning.reasoning import Observation, Plan, Reasoning
7+
8+
if TYPE_CHECKING:
9+
from mesa_llm.llm_agent import LLMAgent
10+
11+
12+
class DecisionOption(BaseModel):
13+
name: str
14+
description: str
15+
tradeoffs: list[str]
16+
score: float = Field(
17+
description="Relative evaluation score for this option in the current context."
18+
)
19+
20+
21+
class DecisionOutput(BaseModel):
22+
goal: str
23+
constraints: list[str]
24+
known_facts: list[str]
25+
unknowns: list[str]
26+
assumptions: list[str]
27+
options: list[DecisionOption]
28+
chosen_option: str
29+
rationale: str
30+
confidence: float = Field(ge=0.0, le=1.0)
31+
risks: list[str]
32+
next_action: str
33+
34+
35+
class DecisionReasoning(Reasoning):
36+
"""
37+
Structured decision-making reasoning that returns a strict JSON object before
38+
converting the selected next action into tool calls.
39+
"""
40+
41+
def __init__(self, agent: "LLMAgent"):
42+
super().__init__(agent=agent)
43+
44+
def get_decision_system_prompt(self) -> str:
45+
return """
46+
You are an autonomous agent operating within a simulation environment.
47+
48+
Your task is to analyze your current observation and memory to make a highly structured, optimal decision.
49+
Do not produce free-form chain-of-thought prose. You must evaluate the situation and return a strict JSON object matching the required schema.
50+
51+
Your response must include:
52+
- goal: Your current primary objective within the simulation.
53+
- constraints: Any rules, resource limits, or environmental boundaries restricting your actions.
54+
- known_facts: Verified data strictly grounded in your current observation or historical memory.
55+
- unknowns: Critical missing information required for perfect decision-making.
56+
- assumptions: Logical inferences made to bridge the gap between known facts and unknowns.
57+
- options: A list of distinct, executable choices currently available to you. Each must include a name, description, tradeoffs, and a relative evaluation score.
58+
- chosen_option: The exact name of the best option selected from the list above.
59+
- rationale: A concise, logical justification for why this option was chosen over the alternatives.
60+
- confidence: A float between 0.0 and 1.0 representing your certainty in this decision.
61+
- risks: Potential negative outcomes or failure states associated with the chosen option.
62+
- next_action: A single, concrete, and strictly formatted executable command.
63+
64+
Execution Requirements:
65+
1. Ground all known_facts entirely in the provided observation context. Do not hallucinate simulation state or capabilities.
66+
2. next_action must strictly match an available execution command. Do not invent tools.
67+
3. If information is heavily constrained or missing, explicitly reflect this by lowering the confidence score and detailing the danger in risks.
68+
"""
69+
70+
def get_decision_prompt(self, obs: Observation) -> list[str]:
71+
prompt_list = [self.agent.memory.get_prompt_ready()]
72+
last_communication = self.agent.memory.get_communication_history()
73+
74+
if last_communication:
75+
prompt_list.append("last communication: \n" + str(last_communication))
76+
if obs:
77+
prompt_list.append("current observation: \n" + str(obs))
78+
79+
return prompt_list
80+
81+
def plan(
82+
self,
83+
prompt: str | None = None,
84+
obs: Observation | None = None,
85+
ttl: int = 1,
86+
selected_tools: list[str] | None = None,
87+
) -> Plan:
88+
"""
89+
Plan the next action through a structured decision artifact.
90+
"""
91+
if obs is None:
92+
obs = self.agent.generate_obs()
93+
94+
self.agent.llm.system_prompt = self.get_decision_system_prompt()
95+
prompt_list = self.get_decision_prompt(obs)
96+
97+
if prompt is not None:
98+
prompt_list.append(prompt)
99+
elif self.agent.step_prompt is not None:
100+
prompt_list.append(self.agent.step_prompt)
101+
else:
102+
raise ValueError("No prompt provided and agent.step_prompt is None.")
103+
104+
selected_tools_schema = self.agent.tool_manager.get_all_tools_schema(
105+
selected_tools
106+
)
107+
108+
rsp = self.agent.llm.generate(
109+
prompt=prompt_list,
110+
tool_schema=selected_tools_schema,
111+
tool_choice="none",
112+
response_format=DecisionOutput,
113+
)
114+
115+
formatted_response = json.loads(rsp.choices[0].message.content)
116+
self.agent.memory.add_to_memory(type="decision", content=formatted_response)
117+
118+
if hasattr(self.agent, "_step_display_data"):
119+
self.agent._step_display_data["plan_content"] = formatted_response[
120+
"rationale"
121+
]
122+
123+
return self.execute_tool_call(
124+
formatted_response["next_action"],
125+
selected_tools=selected_tools,
126+
ttl=ttl,
127+
)
128+
129+
async def aplan(
130+
self,
131+
prompt: str | None = None,
132+
obs: Observation | None = None,
133+
ttl: int = 1,
134+
selected_tools: list[str] | None = None,
135+
) -> Plan:
136+
"""
137+
Asynchronous version of plan() method for parallel planning.
138+
"""
139+
if obs is None:
140+
obs = await self.agent.agenerate_obs()
141+
142+
self.agent.llm.system_prompt = self.get_decision_system_prompt()
143+
prompt_list = self.get_decision_prompt(obs)
144+
145+
if prompt is not None:
146+
prompt_list.append(prompt)
147+
elif self.agent.step_prompt is not None:
148+
prompt_list.append(self.agent.step_prompt)
149+
else:
150+
raise ValueError("No prompt provided and agent.step_prompt is None.")
151+
152+
selected_tools_schema = self.agent.tool_manager.get_all_tools_schema(
153+
selected_tools
154+
)
155+
156+
rsp = await self.agent.llm.agenerate(
157+
prompt=prompt_list,
158+
tool_schema=selected_tools_schema,
159+
tool_choice="none",
160+
response_format=DecisionOutput,
161+
)
162+
163+
formatted_response = json.loads(rsp.choices[0].message.content)
164+
await self.agent.memory.aadd_to_memory(
165+
type="decision", content=formatted_response
166+
)
167+
168+
if hasattr(self.agent, "_step_display_data"):
169+
self.agent._step_display_data["plan_content"] = formatted_response[
170+
"rationale"
171+
]
172+
173+
return await self.aexecute_tool_call(
174+
formatted_response["next_action"],
175+
selected_tools=selected_tools,
176+
ttl=ttl,
177+
)

0 commit comments

Comments
 (0)