ArtificialAnalysis
diff --git a/‎docs/guides/caching.md‎
Lines changed: 79 additions & 0 deletions b/‎docs/guides/caching.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions b/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/stirrup/core/agent.py‎
Lines changed: 140 additions & 14 deletions b/‎src/stirrup/core/agent.py‎
Lines changed: 140 additions & 14 deletions
@@ -0,0 +1,79 @@
+# Caching and Resumption
+
+Stirrup automatically caches agent state on interruptions, allowing you to resume long-running tasks.
+
+## Enabling Resume
+
+Pass `resume=True` to `session()`:
+
+```python
+from stirrup import Agent
+from stirrup.clients.chat_completions_client import ChatCompletionsClient
+from stirrup.tools import DEFAULT_TOOLS
+
+client = ChatCompletionsClient(model="gpt-5")
+agent = Agent(client=client, name="researcher", tools=DEFAULT_TOOLS, max_turns=50)
+
+async with agent.session(output_dir="./output", resume=True) as session:
+    await session.run("Analyze all datasets in the data folder")
+```
+
+## How It Works
+
+1. **On interruption** (Ctrl+C, error, or max turns): Stirrup saves conversation state and execution environment files to `~/.cache/stirrup/<task_hash>/`
+
+2. **On next run with `resume=True`**: If a cache exists for the same prompt, the agent restores state and continues from the last turn
+
+3. **On successful completion**: The cache is automatically cleared (configurable via `clear_on_success`)
+
+```
+# First run (interrupted at turn 15)
+$ python my_agent.py
+^C
+Cached state for task abc123...
+
+# Second run (resumes from turn 15)
+$ python my_agent.py
+Resuming from cached state at turn 15
+```
+
+## What Gets Cached
+
+- Conversation messages and history
+- Current turn number
+- Tool metadata
+- All files in the execution environment
+
+## Preserving Caches on Success
+
+By default, caches are cleared on successful completion. To preserve them for inspection or debugging:
+
+```python
+async with agent.session(
+    resume=True,
+    clear_cache_on_success=False,  # Keep cache after success
+) as session:
+    await session.run("Analyze the data")
+```
+
+## Managing Caches
+
+```python
+from stirrup.core.cache import CacheManager
+
+cache_manager = CacheManager()
+
+# List all caches
+for task_hash in cache_manager.list_caches():
+    info = cache_manager.get_cache_info(task_hash)
+    print(f"{task_hash}: turn {info['turn']}")
+
+# Clear a specific cache
+cache_manager.clear_cache("abc123def456")
+```
+
+## Notes
+
+- Cache key is computed from the initial prompt—same prompt = same cache
+- Caches are stored locally in `~/.cache/stirrup/`
+- Caches are automatically cleared on successful completion (by default)
@@ -80,6 +80,7 @@ nav:
       - Creating Tools: guides/tools.md
       - Tool Providers: guides/tool-providers.md
       - Code Execution: guides/code-execution.md
+      - Caching: guides/caching.md
       - Skills: guides/skills.md
       - Sub-Agents: guides/sub-agents.md
       - MCP Integration: guides/mcp.md
 
@@ -5,6 +5,7 @@
 import json
 import logging
 import re
+import signal
 from contextlib import AsyncExitStack
 from dataclasses import dataclass, field
 from itertools import chain, takewhile
@@ -20,6 +21,7 @@
     CONTEXT_SUMMARIZATION_CUTOFF,
     FINISH_TOOL_NAME,
 )
+from stirrup.core.cache import CacheManager, CacheState, compute_task_hash
 from stirrup.core.models import (
     AssistantMessage,
     ChatMessage,
@@ -226,13 +228,19 @@ def __init__(
         self._pending_output_dir: Path | None = None
         self._pending_input_files: str | Path | list[str | Path] | None = None
         self._pending_skills_dir: Path | None = None
+        self._resume: bool = False
+        self._clear_cache_on_success: bool = True
 
         # Instance-scoped state (populated during __aenter__, isolated per agent instance)
         self._active_tools: dict[str, Tool] = {}
         self._last_finish_params: Any = None  # FinishParams type parameter
         self._last_run_metadata: dict[str, list[Any]] = {}
         self._transferred_paths: list[str] = []  # Paths transferred to parent (for subagents)
 
+        # Cache state for resumption (set during run(), used in __aexit__ for caching on interrupt)
+        self._current_task_hash: str | None = None
+        self._current_run_state: CacheState | None = None
+
     @property
     def name(self) -> str:
         """The name of this agent."""
@@ -263,6 +271,8 @@ def session(
         output_dir: Path | str | None = None,
         input_files: str | Path | list[str | Path] | None = None,
         skills_dir: Path | str | None = None,
+        resume: bool = False,
+        clear_cache_on_success: bool = True,
     ) -> Self:
         """Configure a session and return self for use as async context manager.
 
@@ -278,6 +288,13 @@ def session(
             skills_dir: Directory containing skill definitions to load and make available
                        to the agent. Skills are uploaded to the execution environment
                        and their metadata is included in the system prompt.
+            resume: If True, attempt to resume from cached state if available.
+                   The cache is identified by hashing the init_msgs passed to run().
+                   Cached state includes message history, current turn, and execution
+                   environment files from a previous interrupted run.
+            clear_cache_on_success: If True (default), automatically clear the cache
+                                   when the agent completes successfully. Set to False
+                                   to preserve caches for inspection or debugging.
 
         Returns:
             Self, for use with `async with agent.session(...) as session:`
@@ -294,8 +311,18 @@ def session(
         self._pending_output_dir = Path(output_dir) if output_dir else None
         self._pending_input_files = input_files
         self._pending_skills_dir = Path(skills_dir) if skills_dir else None
+        self._resume = resume
+        self._clear_cache_on_success = clear_cache_on_success
         return self
 
+    def _handle_interrupt(self, _signum: int, _frame: object) -> None:
+        """Handle SIGINT to ensure caching before exit.
+
+        Converts the signal to a KeyboardInterrupt exception so that __aexit__
+        is properly called and can cache the state before cleanup.
+        """
+        raise KeyboardInterrupt("Agent interrupted - state will be cached")
+
     def _resolve_input_files(self, input_files: str | Path | list[str | Path]) -> list[Path]:
         """Resolve input file paths, expanding globs and normalizing to Path objects.
 
@@ -632,6 +659,11 @@ async def __aenter__(self) -> Self:
             # depth is already set (0 for main agent, passed in for sub-agents)
             self._logger.__enter__()
 
+            # Set up signal handler for graceful caching on interrupt (root agent only)
+            if current_depth == 0:
+                self._original_sigint = signal.getsignal(signal.SIGINT)
+                signal.signal(signal.SIGINT, self._handle_interrupt)
+
             return self
 
         except Exception:
@@ -653,6 +685,47 @@ async def __aexit__(
         state = _SESSION_STATE.get()
 
         try:
+            # Cache state on non-success exit (only at root level)
+            should_cache = (
+                state.depth == 0
+                and (exc_type is not None or self._last_finish_params is None)
+                and self._current_task_hash is not None
+                and self._current_run_state is not None
+            )
+
+            logger.debug(
+                "[%s __aexit__] Cache decision: should_cache=%s, depth=%d, exc_type=%s, "
+                "finish_params=%s, task_hash=%s, run_state=%s",
+                self._name,
+                should_cache,
+                state.depth,
+                exc_type,
+                self._last_finish_params is not None,
+                self._current_task_hash,
+                self._current_run_state is not None,
+            )
+
+            if should_cache:
+                cache_manager = CacheManager(clear_on_success=self._clear_cache_on_success)
+
+                exec_env_dir = state.exec_env.temp_dir if state.exec_env else None
+
+                # Explicit checks to keep type checker happy - should_cache condition guarantees these
+                if self._current_task_hash is None or self._current_run_state is None:
+                    raise ValueError("Cache state is unexpectedly None after should_cache check")
+
+                # Temporarily block SIGINT during cache save to prevent interruption
+                original_handler = signal.getsignal(signal.SIGINT)
+                signal.signal(signal.SIGINT, signal.SIG_IGN)
+                try:
+                    cache_manager.save_state(
+                        self._current_task_hash,
+                        self._current_run_state,
+                        exec_env_dir,
+                    )
+                finally:
+                    signal.signal(signal.SIGINT, original_handler)
+                self._logger.info(f"Cached state for task {self._current_task_hash}")
             # Save files from finish_params.paths based on depth
             if state.output_dir and self._last_finish_params and state.exec_env:
                 paths = getattr(self._last_finish_params, "paths", None)
@@ -707,6 +780,11 @@ async def __aexit__(
                                 state.depth,
                             )
         finally:
+            # Restore original signal handler (root agent only)
+            if hasattr(self, "_original_sigint"):
+                signal.signal(signal.SIGINT, self._original_sigint)
+                del self._original_sigint
+
             # Exit logger context
             self._logger.finish_params = self._last_finish_params
             self._logger.run_metadata = self._last_run_metadata
@@ -870,23 +948,59 @@ async def run(
             ])
 
         """
-        msgs: list[ChatMessage] = []
 
-        # Build the complete system prompt (base + input files + user instructions)
-        full_system_prompt = self._build_system_prompt()
-        msgs.append(SystemMessage(content=full_system_prompt))
+        # Compute task hash for caching/resume
+        task_hash = compute_task_hash(init_msgs)
+        self._current_task_hash = task_hash
+
+        # Initialize cache manager
+        cache_manager = CacheManager(clear_on_success=self._clear_cache_on_success)
+        start_turn = 0
+        resumed = False
+
+        # Try to resume from cache if requested
+        if self._resume:
+            state = _SESSION_STATE.get()
+            cached = cache_manager.load_state(task_hash)
+            if cached:
+                # Restore files to exec env
+                if state.exec_env and state.exec_env.temp_dir:
+                    cache_manager.restore_files(task_hash, state.exec_env.temp_dir)
+
+                # Restore state
+                msgs = cached.msgs
+                full_msg_history = cached.full_msg_history
+                run_metadata = cached.run_metadata
+                start_turn = cached.turn
+                resumed = True
+                self._logger.info(f"Resuming from cached state at turn {start_turn}")
+            else:
+                self._logger.info(f"No cache found for task {task_hash}, starting fresh")
 
-        if isinstance(init_msgs, str):
-            msgs.append(UserMessage(content=init_msgs))
-        else:
-            msgs.extend(init_msgs)
+        if not resumed:
+            msgs: list[ChatMessage] = []
+
+            # Build the complete system prompt (base + input files + user instructions)
+            full_system_prompt = self._build_system_prompt()
+            msgs.append(SystemMessage(content=full_system_prompt))
+
+            if isinstance(init_msgs, str):
+                msgs.append(UserMessage(content=init_msgs))
+            else:
+                msgs.extend(init_msgs)
+
+            # Local metadata storage - isolated per run() invocation for thread safety
+            run_metadata: dict[str, list[Any]] = {}
+
+            full_msg_history: list[list[ChatMessage]] = []
 
         # Set logger depth if provided (for sub-agent runs)
         if depth is not None:
             self._logger.depth = depth
 
-        # Log the task at run start
-        self._logger.task_message(msgs[-1].content)
+        # Log the task at run start (only if not resuming)
+        if not resumed:
+            self._logger.task_message(msgs[-1].content)
 
         # Show warnings (top-level only, if logger supports it)
         if self._logger.depth == 0 and isinstance(self._logger, AgentLogger):
@@ -897,9 +1011,6 @@ async def run(
         # Use logger callback if available and not overridden
         step_callback = self._logger.on_step
 
-        # Local metadata storage - isolated per run() invocation for thread safety
-        run_metadata: dict[str, list[Any]] = {}
-
         full_msg_history: list[list[ChatMessage]] = []
         finish_params: FinishParams | None = None
 
@@ -908,7 +1019,16 @@ async def run(
         total_input_tokens = 0
         total_output_tokens = 0
 
-        for i in range(self._max_turns):
+        for i in range(start_turn, self._max_turns):
+            # Capture current state for potential caching (before any async work)
+            self._current_run_state = CacheState(
+                msgs=list(msgs),
+                full_msg_history=[list(group) for group in full_msg_history],
+                turn=i,
+                run_metadata=dict(run_metadata),
+                task_hash=task_hash,
+                agent_name=self._name,
+            )
             if self._max_turns - i <= 30 and i != 0:
                 num_turns_remaining_msg = _num_turns_remaining_msg(self._max_turns - i)
                 msgs.append(num_turns_remaining_msg)
@@ -976,6 +1096,12 @@ async def run(
         self._last_finish_params = finish_params
         self._last_run_metadata = run_metadata
 
+        # Clear cache on successful completion (finish_params is set)
+        if finish_params is not None and cache_manager.clear_on_success:
+            cache_manager.clear_cache(task_hash)
+            self._current_task_hash = None
+            self._current_run_state = None
+
         return finish_params, full_msg_history, run_metadata
 
     def to_tool(