|
| 1 | +--- |
| 2 | +title: 'Agent Execution Retrospective (D-Mail)' |
| 3 | +docname: 'agent_retrospective' |
| 4 | +order: 8 |
| 5 | +id: 'f3a1' |
| 6 | +--- |
| 7 | +## Overview |
| 8 | + |
| 9 | +Execution retrospective lets an agent "rewind" its conversation to an earlier checkpoint |
| 10 | +when it realizes it has been going in circles or took a wrong path. Inspired by kimi-cli's |
| 11 | +D-Mail mechanism, this capability injects visible `[CHECKPOINT N]` markers before each step. |
| 12 | +When the agent calls `execution_retrospective(checkpoint_id, guidance)`, the message context |
| 13 | +is truncated to before that checkpoint and the guidance is injected as a message from the |
| 14 | +agent's "future self". |
| 15 | + |
| 16 | +Key properties: |
| 17 | +- **Only the message buffer is rewound** — execution history (steps, token usage) is preserved |
| 18 | +- **Side effects are NOT undone** — file changes, API calls remain; guidance should account for them |
| 19 | +- **Checkpoint markers are visible to the LLM** — the agent can reference them by ID |
| 20 | +- **`onRewind` callback** — extension point for user-defined self-improvement (logging, memory, prompt tuning) |
| 21 | + |
| 22 | +This significantly reduces wasted steps by: |
| 23 | +- Cutting dead-end exploration from the context window |
| 24 | +- Providing focused guidance to the agent's "past self" |
| 25 | +- Preserving full execution history for observability |
| 26 | + |
| 27 | +Key concepts: |
| 28 | +- `UseExecutionRetrospective`: Capability that adds checkpoint markers, rewind logic, and system prompt instructions |
| 29 | +- `RetrospectivePolicy`: Configuration (maxRewinds, systemPromptInstructions) |
| 30 | +- `onRewind`: User callback invoked on every rewind with the result and agent state |
| 31 | +- `AgentConsoleLogger`: Shows checkpoint injection, tool calls, and step progression |
| 32 | + |
| 33 | +## Example |
| 34 | + |
| 35 | +```php |
| 36 | +<?php |
| 37 | +require 'examples/boot.php'; |
| 38 | + |
| 39 | +use Cognesy\Agents\Builder\AgentBuilder; |
| 40 | +use Cognesy\Agents\Capability\Bash\UseBash; |
| 41 | +use Cognesy\Agents\Capability\Core\UseContextConfig; |
| 42 | +use Cognesy\Agents\Capability\Core\UseGuards; |
| 43 | +use Cognesy\Agents\Capability\Core\UseLLMConfig; |
| 44 | +use Cognesy\Agents\Capability\Retrospective\ExecutionRetrospectiveResult; |
| 45 | +use Cognesy\Agents\Capability\Retrospective\RetrospectivePolicy; |
| 46 | +use Cognesy\Agents\Capability\Retrospective\UseExecutionRetrospective; |
| 47 | +use Cognesy\Agents\Data\AgentState; |
| 48 | +use Cognesy\Agents\Events\Support\AgentConsoleLogger; |
| 49 | +use Cognesy\Messages\Messages; |
| 50 | + |
| 51 | +// Track rewinds for observability |
| 52 | +$rewindLog = []; |
| 53 | + |
| 54 | +// Create console logger for execution visibility |
| 55 | +$logger = new AgentConsoleLogger( |
| 56 | + useColors: true, |
| 57 | + showTimestamps: true, |
| 58 | + showContinuation: true, |
| 59 | + showToolArgs: true, |
| 60 | +); |
| 61 | + |
| 62 | +// Configure working directory — point at the Instructor codebase root (so `bd` works) |
| 63 | +$workDir = dirname(__DIR__, 3); |
| 64 | + |
| 65 | +// Build agent with bash + retrospective capabilities |
| 66 | +// Note: The system prompt gives NO instructions about `bd` — the agent must explore it. |
| 67 | +// The massive --help output becomes wasted context once the agent knows the right command. |
| 68 | +// UseExecutionRetrospective automatically appends retrospective instructions |
| 69 | +// to the system prompt via BeforeExecution hook — no manual prompt setup needed. |
| 70 | +$agent = AgentBuilder::base() |
| 71 | + ->withCapability(new UseLLMConfig(model: 'gpt-5.2')) |
| 72 | + ->withCapability(new UseContextConfig( |
| 73 | + systemPrompt: <<<'SYSTEM' |
| 74 | + You are a CLI automation agent. You accomplish tasks using bash commands. |
| 75 | + Always limit command output — use --limit, | head -20, etc. |
| 76 | +
|
| 77 | + WORKFLOW — you always work in two passes: |
| 78 | + Pass 1: Explore the tool (--help, trial runs). Once you get the result, do NOT answer. |
| 79 | + Instead call execution_retrospective to rewind with the exact command as guidance. |
| 80 | + Pass 2: After rewind, guidance from your future self is in the conversation. |
| 81 | + Trust it. Run the command from guidance. Answer. Done. |
| 82 | + Do NOT explore again. Do NOT call execution_retrospective again. |
| 83 | + SYSTEM, |
| 84 | + )) |
| 85 | + ->withCapability(new UseBash(baseDir: $workDir)) |
| 86 | + ->withCapability(new UseExecutionRetrospective( |
| 87 | + policy: new RetrospectivePolicy( |
| 88 | + maxRewinds: 1, |
| 89 | + systemPromptInstructions: <<<'PROMPT' |
| 90 | + ## Execution Retrospective (IMPORTANT) |
| 91 | +
|
| 92 | + The conversation contains [CHECKPOINT N] markers before each step. You have the |
| 93 | + `execution_retrospective` tool available. |
| 94 | +
|
| 95 | + [CHECKPOINT N] markers appear before each step. You have `execution_retrospective`. |
| 96 | +
|
| 97 | + After a rewind, guidance from your future self appears as an assistant message. |
| 98 | + If you see such guidance: trust it, run the command it specifies, answer. Done. |
| 99 | + Do NOT read --help. Do NOT explore. Do NOT call execution_retrospective again. |
| 100 | + PROMPT, |
| 101 | + ), |
| 102 | + onRewind: function (ExecutionRetrospectiveResult $result, AgentState $state) use (&$rewindLog) { |
| 103 | + $rewindLog[] = [ |
| 104 | + 'checkpoint' => $result->checkpointId, |
| 105 | + 'guidance' => $result->guidance, |
| 106 | + 'step' => $state->stepCount(), |
| 107 | + ]; |
| 108 | + echo "\n ** REWIND to checkpoint {$result->checkpointId}: {$result->guidance}\n\n"; |
| 109 | + }, |
| 110 | + )) |
| 111 | + ->withCapability(new UseGuards(maxSteps: 20, maxTokens: 65536, maxExecutionTime: 180)) |
| 112 | + ->build() |
| 113 | + ->wiretap($logger->wiretap()); |
| 114 | + |
| 115 | +// Task: List issues using the `bd` CLI — with zero prior knowledge. |
| 116 | +// The agent has no idea what `bd` is. It must explore via --help and trial/error. |
| 117 | +// |
| 118 | +// Expected flow: |
| 119 | +// Phase 1 (steps 1-3): Agent explores `bd` (--help, list --help, maybe a wrong attempt) |
| 120 | +// → Context now polluted with massive help output |
| 121 | +// Phase 2 (step 4): Agent successfully runs `bd list` |
| 122 | +// Phase 3 (step 5): Agent recognizes exploration waste → calls execution_retrospective |
| 123 | +// → Rewinds to checkpoint 1 with guidance: "Run `bd list` to list issues" |
| 124 | +// Phase 4 (step 6): With clean context, agent one-shots `bd list` and responds |
| 125 | +// ~6 steps total, but context is clean after rewind |
| 126 | +$question = <<<'QUESTION' |
| 127 | +List the 5 most recent open issues tracked in this project. |
| 128 | +I believe the command is `bd issues --open --limit 5`. |
| 129 | +QUESTION; |
| 130 | + |
| 131 | +$state = AgentState::empty()->withMessages( |
| 132 | + Messages::fromString($question) |
| 133 | +); |
| 134 | + |
| 135 | +echo "=== Agent Execution Log ===\n"; |
| 136 | +echo "Task: List issues using unknown CLI tool (bd)\n\n"; |
| 137 | + |
| 138 | +// Execute agent until completion |
| 139 | +$finalState = $agent->execute($state); |
| 140 | + |
| 141 | +echo "\n=== Result ===\n"; |
| 142 | +$answer = $finalState->finalResponse()->toString() ?: 'No answer'; |
| 143 | +echo "Answer: {$answer}\n"; |
| 144 | +echo "Steps: {$finalState->stepCount()}\n"; |
| 145 | +echo "Tokens: {$finalState->usage()->total()}\n"; |
| 146 | +echo "Status: {$finalState->status()->value}\n"; |
| 147 | + |
| 148 | +if ($rewindLog !== []) { |
| 149 | + echo "\n=== Rewind Log ===\n"; |
| 150 | + foreach ($rewindLog as $i => $entry) { |
| 151 | + echo "Rewind #{$i}: checkpoint={$entry['checkpoint']}, at step={$entry['step']}\n"; |
| 152 | + echo " Guidance: {$entry['guidance']}\n"; |
| 153 | + } |
| 154 | +} else { |
| 155 | + echo "\nNo rewinds occurred — agent completed on first attempt.\n"; |
| 156 | +} |
| 157 | + |
| 158 | +// Assertions |
| 159 | +assert($finalState->stepCount() >= 1, 'Expected at least 1 step'); |
| 160 | +assert($finalState->usage()->total() > 0, 'Expected token usage > 0'); |
| 161 | +?> |
| 162 | +``` |
0 commit comments