mesa
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mesa_llm/memory/lt_memory.py‎
Lines changed: 4 additions & 2 deletions b/‎mesa_llm/memory/lt_memory.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎mesa_llm/memory/memory.py‎
Lines changed: 6 additions & 0 deletions b/‎mesa_llm/memory/memory.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎mesa_llm/memory/st_lt_memory.py‎
Lines changed: 8 additions & 6 deletions b/‎mesa_llm/memory/st_lt_memory.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎mesa_llm/memory/st_memory.py‎
Lines changed: 17 additions & 12 deletions b/‎mesa_llm/memory/st_memory.py‎
Lines changed: 17 additions & 12 deletions
diff --git a/‎mesa_llm/module_llm.py‎
Lines changed: 13 additions & 1 deletion b/‎mesa_llm/module_llm.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎mesa_llm/reasoning/cot.py‎
Lines changed: 38 additions & 13 deletions b/‎mesa_llm/reasoning/cot.py‎
Lines changed: 38 additions & 13 deletions
@@ -37,7 +37,7 @@ jobs:
         run: python -m build
       - name: Upload package as artifact to GitHub
         if: github.repository == 'mesa/mesa-llm' && startsWith(github.ref, 'refs/tags')
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v7
         with:
           name: package
           path: dist/
 
@@ -5,7 +5,7 @@ ci:
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.14.14
+  rev: v0.15.4
   hooks:
     # Run the linter with fix argument.
     - id: ruff-check
 
@@ -63,14 +63,16 @@ def _update_long_term_memory(self):
         Update the long term memory by summarizing the short term memory with a LLM
         """
         prompt = self._build_consolidation_prompt()
-        self.long_term_memory = self.llm.generate(prompt)
+        response = self.llm.generate(prompt)
+        self.long_term_memory = response.choices[0].message.content
 
     async def _aupdate_long_term_memory(self):
         """
         Asynchronous version of _update_long_term_memory
         """
         prompt = self._build_consolidation_prompt()
-        self.long_term_memory = await self.llm.agenerate(prompt)
+        response = await self.llm.agenerate(prompt)
+        self.long_term_memory = response.choices[0].message.content
 
     def process_step(self, pre_step: bool = False):
         """
 
@@ -137,6 +137,12 @@ def add_to_memory(self, type: str, content: dict):
         """
         Add a new entry to the memory
         """
+        if not isinstance(content, dict):
+            raise TypeError(
+                "Expected 'content' to be dict, "
+                f"got {content.__class__.__name__}: {content!r}"
+            )
+
         if type == "observation":
             # Only store changed parts of observation
             changed_parts = {
 
@@ -88,14 +88,16 @@ def _update_long_term_memory(self):
         Update the long term memory by summarizing the short term memory with a LLM
         """
         prompt = self._build_consolidation_prompt()
-        self.long_term_memory = self.llm.generate(prompt)
+        response = self.llm.generate(prompt)
+        self.long_term_memory = response.choices[0].message.content
 
     async def _aupdate_long_term_memory(self):
         """
         Async Function to update long term memory
         """
         prompt = self._build_consolidation_prompt()
-        self.long_term_memory = await self.llm.agenerate(prompt)
+        response = await self.llm.agenerate(prompt)
+        self.long_term_memory = response.choices[0].message.content
 
     def _process_step_core(self, pre_step: bool):
         """
@@ -191,10 +193,10 @@ def format_short_term(self) -> str:
             return "\n".join(lines)
 
     def get_prompt_ready(self) -> str:
-        return [
-            f"Short term memory:\n {self.format_short_term()}",
-            f"Long term memory: \n{self.format_long_term()}",
-        ]
+        return (
+            f"Short term memory:\n {self.format_short_term()}\n\n"
+            f"Long term memory: \n{self.format_long_term()}"
+        )
 
     def get_communication_history(self) -> str:
         """
 
@@ -13,7 +13,7 @@ class ShortTermMemory(Memory):
 
     Attributes:
         agent : the agent that the memory belongs to
-        n : number of short-term memories to remember
+        n : positive number of short-term memories to remember
         display : whether to display the memory
         llm_model : the model to use for the summarization
     """
@@ -24,12 +24,16 @@ def __init__(
         n: int = 5,
         display: bool = True,
     ):
+        if n < 1:
+            raise ValueError("n must be >= 1 for ShortTermMemory")
+
         super().__init__(
             agent=agent,
             display=display,
         )
         self.n = n
-        self.short_term_memory = deque()
+        self.short_term_memory = deque(maxlen=self.n)
+        self._current_step_entry: MemoryEntry | None = None
 
     async def aprocess_step(self, pre_step: bool = False):
         """
@@ -40,35 +44,36 @@ async def aprocess_step(self, pre_step: bool = False):
     def process_step(self, pre_step: bool = False):
         """
         Process the step of the agent :
-        - Add the new entry to the short term memory
+        - Capture pre-step content into the current in-progress step entry
+        - Merge current and post-step content into one finalized entry
         - Display the new entry
         """
 
-        # Add the new entry to the short term memory
+        # Save a temporary pre-step snapshot. This entry is not persisted in deque.
         if pre_step:
-            new_entry = MemoryEntry(
+            self._current_step_entry = MemoryEntry(
                 agent=self.agent,
                 content=self.step_content,
                 step=None,
             )
-            self.short_term_memory.append(new_entry)
             self.step_content = {}
             return
 
-        elif not self.short_term_memory[-1].content.get("step", None):
-            pre_step = self.short_term_memory.pop()
-            self.step_content.update(pre_step.content)
+        new_entry = None
+        if self._current_step_entry is not None:
+            merged_content = dict(self.step_content)
+            merged_content.update(self._current_step_entry.content)
             new_entry = MemoryEntry(
                 agent=self.agent,
-                content=self.step_content,
+                content=merged_content,
                 step=self.agent.model.steps,
             )
-
             self.short_term_memory.append(new_entry)
+            self._current_step_entry = None
             self.step_content = {}
 
         # Display the new entry
-        if self.display:
+        if self.display and new_entry is not None:
             new_entry.display()
 
     def format_short_term(self) -> str:
 
@@ -37,13 +37,25 @@ def __init__(
         Initialize the LLM module
 
         Args:
-            llm_model: The model to use for the LLM in the format of {provider}/{LLM}
+            llm_model: The model to use for the LLM in the format
+                "{provider}/{model}" (for example, "openai/gpt-4o").
             api_base: The API base to use if the LLM provider is Ollama
             system_prompt: The system prompt to use for the LLM
+
+        Raises:
+            ValueError: If llm_model is not in the expected "{provider}/{model}"
+                format, or if the provider API key is missing.
         """
         self.api_base = api_base
         self.llm_model = llm_model
         self.system_prompt = system_prompt
+
+        if "/" not in llm_model:
+            raise ValueError(
+                f"Invalid model format '{llm_model}'. "
+                "Expected '{provider}/{model}', e.g. 'openai/gpt-4o'."
+            )
+
         provider = self.llm_model.split("/")[0].upper()
 
         if provider in ["OLLAMA", "OLLAMA_CHAT"]:
 
@@ -14,8 +14,8 @@ class CoTReasoning(Reasoning):
         - **agent** (LLMAgent reference)
 
     Methods:
-        - **plan(prompt, obs=None, ttl=1, selected_tools=None)** → *Plan* - Generate synchronous plan with CoT reasoning
-        - **async aplan(prompt, obs=None, ttl=1, selected_tools=None)** → *Plan* - Generate asynchronous plan with CoT reasoning
+        - **plan(obs, ttl=1, prompt=None, selected_tools=None)** → *Plan* - Generate synchronous plan with CoT reasoning
+        - **async aplan(obs, ttl=1, prompt=None, selected_tools=None)** → *Plan* - Generate asynchronous plan with CoT reasoning
 
     Reasoning Format:
         Thought 1: [Initial reasoning based on observation]
@@ -87,9 +87,9 @@ def get_cot_system_prompt(self, obs: Observation) -> str:
 
     def plan(
         self,
-        obs: Observation,
-        ttl: int = 1,
         prompt: str | None = None,
+        obs: Observation | None = None,
+        ttl: int = 1,
         selected_tools: list[str] | None = None,
     ) -> Plan:
         """
@@ -102,12 +102,17 @@ def plan(
             else:
                 raise ValueError("No prompt provided and agent.step_prompt is None.")
 
+        if obs is None:
+            obs = self.agent.generate_obs()
+
         step = obs.step + 1
         llm = self.agent.llm
         obs_str = str(obs)
 
         # Add current observation to memory (for record)
-        self.agent.memory.add_to_memory(type="Observation", content=obs_str)
+        self.agent.memory.add_to_memory(
+            type="Observation", content={"content": obs_str}
+        )
         system_prompt = self.get_cot_system_prompt(obs)
 
         llm.system_prompt = system_prompt
@@ -118,7 +123,9 @@ def plan(
         )
 
         chaining_message = rsp.choices[0].message.content
-        self.agent.memory.add_to_memory(type="Plan", content=chaining_message)
+        self.agent.memory.add_to_memory(
+            type="Plan", content={"content": chaining_message}
+        )
 
         # Pass plan content to agent for display
         if hasattr(self.agent, "_step_display_data"):
@@ -131,25 +138,41 @@ def plan(
             tool_choice="required",
         )
         response_message = rsp.choices[0].message
-        cot_plan = Plan(step=step, llm_plan=response_message, ttl=1)
+        cot_plan = Plan(step=step, llm_plan=response_message, ttl=ttl)
 
-        self.agent.memory.add_to_memory(type="Plan-Execution", content=str(cot_plan))
+        self.agent.memory.add_to_memory(
+            type="Plan-Execution", content={"content": str(cot_plan)}
+        )
 
         return cot_plan
 
     async def aplan(
         self,
-        prompt: str,
-        obs: Observation,
+        prompt: str | None = None,
+        obs: Observation | None = None,
         ttl: int = 1,
         selected_tools: list[str] | None = None,
     ) -> Plan:
         """
         Asynchronous version of plan() method for parallel planning.
         """
+        # If no prompt is provided, use the agent's default step prompt
+        if prompt is None:
+            if self.agent.step_prompt is not None:
+                prompt = self.agent.step_prompt
+            else:
+                raise ValueError("No prompt provided and agent.step_prompt is None.")
+
+        if obs is None:
+            obs = await self.agent.agenerate_obs()
+
         step = obs.step + 1
         llm = self.agent.llm
 
+        obs_str = str(obs)
+        await self.agent.memory.aadd_to_memory(
+            type="Observation", content={"content": obs_str}
+        )
         system_prompt = self.get_cot_system_prompt(obs)
         llm.system_prompt = system_prompt
 
@@ -160,7 +183,9 @@ async def aplan(
         )
 
         chaining_message = rsp.choices[0].message.content
-        await self.agent.memory.aadd_to_memory(type="Plan", content=chaining_message)
+        await self.agent.memory.aadd_to_memory(
+            type="Plan", content={"content": chaining_message}
+        )
 
         # Pass plan content to agent for display
         if hasattr(self.agent, "_step_display_data"):
@@ -173,10 +198,10 @@ async def aplan(
             tool_choice="required",
         )
         response_message = rsp.choices[0].message
-        cot_plan = Plan(step=step, llm_plan=response_message, ttl=1)
+        cot_plan = Plan(step=step, llm_plan=response_message, ttl=ttl)
 
         await self.agent.memory.aadd_to_memory(
-            type="Plan-Execution", content=str(cot_plan)
+            type="Plan-Execution", content={"content": str(cot_plan)}
         )
 
         return cot_plan