feat: add clone method to session; remove refs to model_opts in session

jakelorocco · jakelorocco · commit b956f8dac907 · 2025-10-06T09:38:52.000-04:00
diff --git a/docs/tutorial.md b/docs/tutorial.md
@@ -944,6 +944,23 @@ or the entire last turn (user query + assistant response):
 print(m.ctx.last_turn())
 ```
 
+You can also use `session.clone()` to create a copy of a given session with its context at given point in time. This allows you to make multiple generation requests with the same objects in your context:
+```python
+m = start_session(ctx=ChatContext())
+m.instruct("Multiply 2x2.")
+
+m1 = m.clone()
+m2 = m.clone()
+
+# Need to run this code in an async event loop.
+co1 = m1.ainstruct("Multiply that by 3")
+co2 = m2.ainstruct("Multiply that by 5")
+
+print(await co1)  # 12
+print(await co2)  # 20
+```
+In the above example, both requests have `Multiply 2x2` and the LLM's response to that (presumably `4`) in their context. By cloning the session, the new requests both operate independently on that context to get the correct answers to 4 x 3 and 4 x 5.
+
 ## Chapter 8: Implementing Agents
 
 > **Definition:**  An *agent* is a generative program in which an LLM determines the control flow of the program.
@@ -1323,13 +1340,13 @@ Mellea supports asynchronous behavior in several ways: asynchronous functions an
 
 ### Asynchronous Functions:
 `MelleaSession`s have asynchronous functions that work just like regular async functions in python. These async session functions mirror their synchronous counterparts:
-```
+```python
 m = start_session()
 result = await m.ainstruct("Write your instruction here!")
 ```
 
 However, if you want to run multiple async functions at the same time, you need to be careful with your context. By default, `MelleaSession`s use a `SimpleContext` that has no history. This will work just fine when running multiple async requests at once:
-```
+```python
 m = start_session()
 coroutines = []
 
@@ -1340,7 +1357,7 @@ results = await asyncio.gather(*coroutines)
 ```
 
 If you try to use a `ChatContext`, you will need to await between each request so that the context can be properly modified:
-```
+```python
 m = start_session(ctx=ChatContext())
 
 result = await m.ainstruct("Write a short fairy tale.")
@@ -1351,7 +1368,7 @@ print(main_character)
 ```
 
 Otherwise, you're requests will use outdated contexts that don't have the messages you expect. For example,
-```
+```python
 m = start_session(ctx=ChatContext())
 
 co1 = m.ainstruct("Write a very long math problem.")  # Start first request.
@@ -1360,8 +1377,12 @@ co2 = m.ainstruct("Solve the math problem.")  # Start second request with an emp
 results = await asyncio.gather(co1, co2)
 for result in results:
     print(result)  # Neither request had anything in its context.
+
+print(m.ctx)  # Only shows the operations from the second request.
 ```
 
+Additionally, see [Chapter 7: Context Management](#chapter-7-on-context-management) for an example of how to use `session.clone()` to avoid these context issues.
+
 ### Asynchronicity in Synchronous Functions
 Mellea utilizes asynchronicity internally. When you call `m.instruct`, you are using synchronous code that executes an asynchronous request to an LLM to generate the result. For a single request, this won't cause any differences in execution speed.
 
diff --git a/mellea/stdlib/session.py b/mellea/stdlib/session.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import contextvars
+from copy import copy
 from typing import Any, Literal, overload
 
 from PIL import Image as PILImage
@@ -176,11 +177,10 @@ def __init__(self, backend: Backend, ctx: Context | None = None):
         Args:
             backend (Backend): This is always required.
             ctx (Context): The way in which the model's context will be managed. By default, each interaction with the model is a stand-alone interaction, so we use SimpleContext as the default.
-            model_options (Optional[dict]): model options, which will upsert into the model/backend's defaults.
         """
         self.backend = backend
         self.ctx: Context = ctx if ctx is not None else SimpleContext()
-        self._backend_stack: list[tuple[Backend, dict | None]] = []
+        self._backend_stack: list[Backend] = []
         self._session_logger = FancyLogger.get_logger()
         self._context_token = None
 
@@ -196,14 +196,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
             _context_session.reset(self._context_token)
             self._context_token = None
 
-    def _push_model_state(self, new_backend: Backend, new_model_opts: dict):
-        """The backend and model options used within a `Context` can be temporarily changed. This method changes the model's backend and model_opts, while saving the current settings in the `self._backend_stack`.
-
-        Question: should this logic be moved into context? I really want to keep `Session` as simple as possible... see true motivation in the docstring for the class.
-        """
-        self._backend_stack.append((self.backend, self.model_options))
+    def _push_model_state(self, new_backend: Backend):
+        """The backend used within a `Context` can be temporarily changed. This method changes the model's backend, while saving the current settings in the `self._backend_stack`."""
+        self._backend_stack.append(self.backend)
         self.backend = new_backend
-        self.opts = new_model_opts
 
     def _pop_model_state(self) -> bool:
         """Pops the model state.
@@ -214,13 +210,43 @@ def _pop_model_state(self) -> bool:
         Question: should this logic be moved into context? I really want to keep `Session` as simple as possible... see true motivation in the docstring for the class.
         """
         try:
-            b, b_model_opts = self._backend_stack.pop()
+            b = self._backend_stack.pop()
             self.backend = b
-            self.model_options = b_model_opts
             return True
         except Exception:
             return False
 
+    def __copy__(self):
+        new = MelleaSession(backend=self.backend, ctx=self.ctx)
+        new._backend_stack = self._backend_stack.copy()
+        new._session_logger = self._session_logger
+        # Explicitly don't copy over the _context_token.
+
+        return new
+
+    def clone(self):
+        """Useful for running multiple generation requests while keeping the context at a given point in time.
+
+        Returns:
+            a copy of the current session. Keeps the context, backend, backend stack, and session logger.
+
+        Examples:
+            >>> from mellea import start_session
+            >>> m = start_session()
+            >>> m.instruct("What is 2x2?")
+            >>>
+            >>> m1 = m.clone()
+            >>> out = m1.instruct("Multiply that by 2")
+            >>> print(out)
+            ... 8
+            >>>
+            >>> m2 = m.clone()
+            >>> out = m2.instruct("Multiply that by 3")
+            >>> print(out)
+            ... 12
+        """
+        return copy(self)
+
     def reset(self):
         """Reset the context state."""
         self.ctx = self.ctx.reset_to_new()
diff --git a/test/stdlib_basics/test_session.py b/test/stdlib_basics/test_session.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from mellea.backends.ollama import OllamaModelBackend
 from mellea.backends.types import ModelOption
 from mellea.stdlib.base import ChatContext, ModelOutputThunk
 from mellea.stdlib.chat import Message
@@ -99,6 +100,55 @@ async def test_async_without_waiting_with_chat_context(m_session):
     ctx = m_session.ctx
     assert len(ctx.view_for_generation()) == 2
 
+def test_session_copy_with_context_ops(m_session):
+    out = m_session.instruct("What is 2x2?")
+    main_ctx = m_session.ctx
+
+    m1 = m_session.clone()
+    out1 = m1.instruct("Multiply by 3.")
+
+    m2 = m_session.clone()
+    out2 = m2.instruct("Multiply by 4.")
+
+    # Assert that each context is the correct one.
+    assert m_session.ctx is main_ctx
+    assert m_session.ctx is not m1.ctx
+    assert m_session.ctx is not m2.ctx
+    assert m1.ctx is not m2.ctx
+
+    # Assert that node data is correct.
+    assert m_session.ctx.node_data is out
+    assert m1.ctx.node_data is out1
+    assert m2.ctx.node_data is out2
+
+    # Assert that the new sessions still branch off the original one.
+    assert m1.ctx.previous_node.previous_node is m_session.ctx
+    assert m2.ctx.previous_node.previous_node is m_session.ctx
+
+def test_session_copy_with_backend_stack(m_session):
+    # Assert expected values from cloning.
+    m1 = m_session.clone()
+    assert m1.backend is m_session.backend
+    assert m1._session_logger is m_session._session_logger
+    assert m1._backend_stack is not m_session._backend_stack
+
+    # Assert that pushing to a backend stack doesn't change it for sessions previously cloned from it.
+    new_backend = OllamaModelBackend()
+    m_session._push_model_state(new_backend=new_backend)
+    assert len(m_session._backend_stack) == 1
+    assert len(m1._backend_stack) == 0
+    assert m1.backend is not m_session.backend
+
+    # Assert that newly cloned sessions don't cause errors with changes to the backend stack.
+    m2 = m_session.clone()
+    assert len(m2._backend_stack) == 1
+
+    # They should still be different lists.
+    assert m2._backend_stack is not m_session._backend_stack
+    assert m2._pop_model_state()
+    assert len(m2._backend_stack) == 0
+    assert len(m_session._backend_stack) == 1
+    assert m2.backend is m1.backend
 
 if __name__ == "__main__":
     pytest.main([__file__])