cloudflare · threepointone · Apr 9, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.changeset/think-lifecycle-hooks.md b/.changeset/think-lifecycle-hooks.md
@@ -0,0 +1,20 @@
+---
+"@cloudflare/think": patch
+"agents": patch
+---
+
+Think now owns the inference loop with lifecycle hooks at every stage.
+
+**Breaking:** `onChatMessage()`, `assembleContext()`, and `getMaxSteps()` are removed. Use lifecycle hooks and the `maxSteps` property instead. If you need full custom inference, extend `Agent` directly.
+
+**New lifecycle hooks:** `beforeTurn`, `beforeToolCall`, `afterToolCall`, `onStepFinish`, `onChunk` — fire on every turn from all entry paths (WebSocket, `chat()`, `saveMessages`, auto-continuation).
+
+**`beforeTurn(ctx)`** receives the assembled system prompt, messages, tools, and model. Return a `TurnConfig` to override any part — model, system prompt, messages, tools, activeTools, toolChoice, maxSteps, providerOptions.
+
+**`maxSteps`** is now a property (default 10) instead of a method. Override per-turn via `TurnConfig.maxSteps`.
+
+**MCP tools auto-merged** — no need to manually merge `this.mcp.getAITools()` in `getTools()`.
+
+**Dynamic context blocks:** `Session.addContext()` and `Session.removeContext()` allow adding/removing context blocks after session initialization (e.g., from extensions).
+
+**Extension manifest expanded** with `context` (namespaced context block declarations) and `hooks` fields.
diff --git a/examples/assistant/src/server.ts b/examples/assistant/src/server.ts
@@ -9,12 +9,18 @@
 import { createWorkersAI } from "workers-ai-provider";
 import { routeAgentRequest, callable } from "agents";
 import { Think, Session } from "@cloudflare/think";
+import type {
+  TurnContext,
+  TurnConfig,
+  ChatResponseResult
+} from "@cloudflare/think";
 import { tool } from "ai";
 import type { LanguageModel, ToolSet } from "ai";
 import { z } from "zod";
 
 export class MyAssistant extends Think<Env> {
   waitForMcpConnections = { timeout: 5000 };
+  override maxSteps = 5;
 
   getModel(): LanguageModel {
     return createWorkersAI({ binding: this.env.AI })(
@@ -50,11 +56,7 @@ Always respond concisely.`
   }
 
   getTools(): ToolSet {
-    const mcpTools = this.mcp.getAITools();
-
     return {
-      ...mcpTools,
-
       getWeather: tool({
         description: "Get the current weather for a city",
         inputSchema: z.object({
@@ -108,8 +110,15 @@ Always respond concisely.`
     };
   }
 
-  getMaxSteps(): number {
-    return 5;
+  // Lifecycle hooks — log tool usage and turn completion
+  beforeTurn(ctx: TurnContext): TurnConfig | void {
+    console.log(
+      `Turn starting: ${Object.keys(ctx.tools).length} tools, continuation=${ctx.continuation}`
+    );
+  }
+
+  onChatResponse(result: ChatResponseResult): void {
+    console.log(`Turn ${result.status}: ${result.message.parts.length} parts`);
   }
 
   onStart() {

diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/agents/src/experimental/memory/session/context.ts b/packages/agents/src/experimental/memory/session/context.ts
@@ -166,6 +166,85 @@ export class ContextBlocks {
     this.loaded = true;
   }
 
+  /**
+   * Dynamically register a new context block after initialization.
+   * Used by extensions to contribute context at runtime.
+   *
+   * If blocks have already been loaded, the new block's provider is
+   * initialized and loaded immediately. The snapshot is NOT updated
+   * automatically — call `refreshSystemPrompt()` to rebuild.
+   */
+  async addBlock(config: ContextConfig): Promise<ContextBlock> {
+    if (!this.loaded) await this.load();
+
+    if (this.configs.some((c) => c.label === config.label)) {
+      throw new Error(`Block "${config.label}" already exists`);
+    }
+
+    this.configs.push(config);
+
+    if (config.provider?.init) {
+      config.provider.init(config.label);
+    }
+
+    const content = config.provider
+      ? ((await config.provider.get()) ?? "")
+      : "";
+
+    const skill = config.provider ? isSkillProvider(config.provider) : false;
+    const searchable = config.provider
+      ? isSearchProvider(config.provider)
+      : false;
+    const writable = config.provider
+      ? isWritableProvider(config.provider) ||
+        (skill && !!(config.provider as SkillProvider).set) ||
+        (searchable && !!(config.provider as SearchProvider).set)
+      : false;
+
+    const block: ContextBlock = {
+      label: config.label,
+      description: config.description,
+      content,
+      tokens: estimateStringTokens(content),
+      maxTokens: config.maxTokens,
+      writable,
+      isSkill: skill,
+      isSearchable: searchable
+    };
+
+    this.blocks.set(config.label, block);
+    return block;
+  }
+
+  /**
+   * Remove a dynamically registered context block.
+   * Used during extension unload cleanup.
+   *
+   * Returns true if the block existed and was removed.
+   * The snapshot is NOT updated automatically — call
+   * `refreshSystemPrompt()` to rebuild.
+   *
+   * Note: loaded skills for this block are cleaned up from the
+   * tracking set but the skill unload callback is NOT fired
+   * (history reclamation is skipped — appropriate for full
+   * extension removal).
+   */
+  removeBlock(label: string): boolean {
+    const idx = this.configs.findIndex((c) => c.label === label);
+    if (idx === -1) return false;
+
+    this.configs.splice(idx, 1);
+    this.blocks.delete(label);
+
+    for (const id of this._loadedSkills) {
+      if (id.startsWith(`${label}:`)) {
+        this._loadedSkills.delete(id);
+      }
+    }
+
+    return true;
+  }
+
   /**
    * Get a block by label.
    */

diff --git a/packages/agents/src/experimental/memory/session/session.ts b/packages/agents/src/experimental/memory/session/session.ts
@@ -440,6 +440,50 @@ export class Session {
     return this.context.appendToBlock(label, content);
   }
 
+  /**
+   * Dynamically register a new context block after session initialization.
+   * Used by extensions to contribute context blocks at runtime.
+   *
+   * The block's provider is initialized and loaded immediately.
+   * Call `refreshSystemPrompt()` afterward to include the new block
+   * in the system prompt.
+   *
+   * Note: When called without a provider, auto-wires to SQLite via
+   * AgentContextProvider. Requires the session to have been created
+   * via `Session.create(agent)` (not the direct constructor).
+   */
+  async addContext(
+    label: string,
+    options?: SessionContextOptions
+  ): Promise<ContextBlock> {
+    this._ensureReady();
+    const opts = options ?? {};
+    let provider = opts.provider;
+    if (!provider) {
+      const key = this._sessionId ? `${label}_${this._sessionId}` : label;
+      provider = new AgentContextProvider(this._agent!, key);
+    }
+    return this.context.addBlock({
+      label,
+      description: opts.description,
+      maxTokens: opts.maxTokens,
+      provider
+    });
+  }
+
+  /**
+   * Remove a dynamically registered context block.
+   * Used during extension unload cleanup.
+   *
+   * Returns true if the block existed and was removed.
+   * Call `refreshSystemPrompt()` afterward to rebuild the prompt
+   * without the removed block.
+   */
+  removeContext(label: string): boolean {
+    this._ensureReady();
+    return this.context.removeBlock(label);
+  }
+
   // ── Skills ───────────────────────────────────────────────────
 
   /**

diff --git a/packages/think/README.md b/packages/think/README.md
@@ -63,33 +63,67 @@ export class MyAgent extends Think<Env> {
 
 ## Think
 
-### Override points
-
-| Method                    | Default                          | Description                                     |
-| ------------------------- | -------------------------------- | ----------------------------------------------- |
-| `getModel()`              | throws                           | Return the `LanguageModel` to use               |
-| `getSystemPrompt()`       | `"You are a helpful assistant."` | System prompt (fallback when no context blocks) |
-| `getTools()`              | `{}`                             | AI SDK `ToolSet` for the agentic loop           |
-| `getMaxSteps()`           | `10`                             | Max tool-call rounds per turn                   |
-| `configureSession()`      | identity                         | Add context blocks, compaction, search, skills  |
-| `assembleContext()`       | prune older tool calls           | Customize what's sent to the LLM                |
-| `onChatMessage(options?)` | `streamText(...)`                | Full control over inference                     |
-| `onChatResponse(result)`  | no-op                            | Post-turn lifecycle hook                        |
-| `onChatError(error)`      | passthrough                      | Customize error handling                        |
+### Configuration
+
+| Method / Property    | Default                          | Description                                     |
+| -------------------- | -------------------------------- | ----------------------------------------------- |
+| `getModel()`         | throws                           | Return the `LanguageModel` to use               |
+| `getSystemPrompt()`  | `"You are a helpful assistant."` | System prompt (fallback when no context blocks) |
+| `getTools()`         | `{}`                             | AI SDK `ToolSet` for the agentic loop           |
+| `maxSteps`           | `10`                             | Max tool-call rounds per turn (property)        |
+| `configureSession()` | identity                         | Add context blocks, compaction, search, skills  |
+| `getExtensions()`    | `[]`                             | Sandboxed extension declarations (load order)   |
+| `extensionLoader`    | `undefined`                      | `WorkerLoader` binding — enables extensions     |
+
+### Lifecycle hooks
+
+Think owns the `streamText` call. Hooks fire on every turn regardless of entry path (WebSocket, `chat()`, `saveMessages`, auto-continuation).
+
+| Hook                     | When it fires                               | Return                         |
+| ------------------------ | ------------------------------------------- | ------------------------------ |
+| `beforeTurn(ctx)`        | Before `streamText` — see assembled context | `TurnConfig` overrides or void |
+| `beforeToolCall(ctx)`    | When model calls a tool (observation only)  | `ToolCallDecision` or void     |
+| `afterToolCall(ctx)`     | After tool execution                        | void                           |
+| `onStepFinish(ctx)`      | After each step completes                   | void                           |
+| `onChunk(ctx)`           | Per streaming chunk (high-frequency)        | void                           |
+| `onChatResponse(result)` | After turn completes + message persisted    | void                           |
+| `onChatError(error)`     | On error during a turn                      | error to propagate             |
+
+#### beforeTurn example
 
-### Client tools
+```ts
+export class MyAgent extends Think<Env> {
+  getModel() { ... }
+
+  // Switch to a cheaper model for continuation turns
+  beforeTurn(ctx: TurnContext) {
+    if (ctx.continuation) {
+      return { model: this.cheapModel };
+    }
+  }
+}
+```
 
-Think supports client-defined tools that execute in the browser. The client sends tool schemas in the chat request body, and Think merges them with server tools automatically:
+#### TurnConfig — what you can override per-turn
 
 ```ts
-// Client sends:
-{ messages: [...], clientTools: [{ name: "search", description: "Search the web" }] }
-
-// In onChatMessage, the default implementation merges:
-// workspace + getTools() + clientTools + session context tools + options.tools
+interface TurnConfig {
+  model?: LanguageModel; // override model
+  system?: string; // override system prompt
+  messages?: ModelMessage[]; // override assembled messages
+  tools?: ToolSet; // extra tools to merge (additive)
+  activeTools?: string[]; // limit which tools the model can call
+  toolChoice?: ToolChoice; // force a specific tool
+  maxSteps?: number; // override maxSteps for this turn
+  providerOptions?: Record<string, unknown>;
+}
 ```
 
-When the LLM calls a client tool, the tool call chunk is sent to the client. The client executes it and sends back `CF_AGENT_TOOL_RESULT`. Think applies the result, persists the updated message, broadcasts `CF_AGENT_MESSAGE_UPDATED`, and optionally auto-continues the conversation (debounce-based — multiple rapid tool results coalesce into one continuation turn).
+### Client tools
+
+Think supports client-defined tools that execute in the browser. The client sends tool schemas in the chat request body, and Think merges them with server tools automatically.
+
+When the LLM calls a client tool, the tool call chunk is sent to the client. The client executes it and sends back `CF_AGENT_TOOL_RESULT`. Think applies the result, persists the updated message, broadcasts `CF_AGENT_MESSAGE_UPDATED`, and optionally auto-continues the conversation.
 
 Tool approval flows are also supported via `CF_AGENT_TOOL_APPROVAL`.
 
@@ -109,6 +143,20 @@ export class MyAgent extends Think<Env> {
 }
 ```
 
+#### Dynamic context blocks
+
+Context blocks can also be added at runtime (e.g., by extensions):
+
+```ts
+await session.addContext("notes", { description: "User notes" });
+await session.refreshSystemPrompt(); // rebuild the prompt
+
+session.removeContext("notes");
+await session.refreshSystemPrompt();
+```
+
+#### Skills
+
 Skills support load/unload for explicit context management:
 
 ```ts
@@ -126,7 +174,7 @@ configureSession(session: Session) {
 
 ### MCP integration
 
-Think inherits MCP client support from the Agent base class. Set `waitForMcpConnections` to ensure MCP-discovered tools are available before `onChatMessage` runs:
+Think inherits MCP client support from the Agent base class. MCP tools are automatically merged into every turn. Set `waitForMcpConnections` to ensure MCP servers are connected before the inference loop runs:
 
 ```ts
 export class MyAgent extends Think<Env> {
@@ -171,10 +219,11 @@ export class MyAgent extends Think<Env, MyConfig> {
 
 - **WebSocket protocol** — wire-compatible with `useAgentChat` from `@cloudflare/ai-chat`
 - **Built-in workspace** — every agent gets `this.workspace` with file tools auto-wired
+- **Lifecycle hooks** — `beforeTurn`, `onStepFinish`, `onChunk`, `onChatResponse` fire on every turn
 - **Stream resumption** — page refresh replays buffered chunks via `ResumableStream`
 - **Client tools** — accept tool schemas from clients, handle results and approvals
 - **Auto-continuation** — debounce-based continuation after tool results
-- **MCP integration** — wait for MCP connections before inference
+- **MCP integration** — MCP tools auto-merged, wait for connections before inference
 - **Abort/cancel** — pass an `AbortSignal` or send a cancel message
 - **Multi-tab broadcast** — all connected clients see the stream (resume-aware exclusions)
 - **Partial persistence** — on error, the partial assistant message is saved