feat(langchain_v1): Implement Context Editing Middleware (#33267)

nfcampos · sydney-runkle · web-flow · commit f30813928354 · 2025-10-06T10:34:04.000-04:00
Brings functionality similar to Anthropic's context editing to all chat models https://docs.claude.com/en/docs/build-with-claude/context-editing --------- Co-authored-by: Sydney Runkle <54324534+sydney-runkle@users.noreply.github.com>
diff --git a/libs/langchain_v1/langchain/agents/middleware/__init__.py b/libs/langchain_v1/langchain/agents/middleware/__init__.py
@@ -1,5 +1,10 @@
 """Middleware plugins for agents."""
 
+from .call_tracking import ModelCallLimitMiddleware
+from .context_editing import (
+    ClearToolUsesEdit,
+    ContextEditingMiddleware,
+)
 from .human_in_the_loop import HumanInTheLoopMiddleware
 from .model_fallback import ModelFallbackMiddleware
 from .pii import PIIDetectionError, PIIMiddleware
@@ -24,8 +29,11 @@
     "AgentState",
     # should move to langchain-anthropic if we decide to keep it
     "AnthropicPromptCachingMiddleware",
+    "ClearToolUsesEdit",
+    "ContextEditingMiddleware",
     "HumanInTheLoopMiddleware",
     "LLMToolSelectorMiddleware",
+    "ModelCallLimitMiddleware",
     "ModelFallbackMiddleware",
     "ModelRequest",
     "PIIDetectionError",
diff --git a/libs/langchain_v1/langchain/agents/middleware/context_editing.py b/libs/langchain_v1/langchain/agents/middleware/context_editing.py
@@ -0,0 +1,245 @@
+"""Context editing middleware.
+
+This middleware mirrors Anthropic's context editing capabilities by clearing
+older tool results once the conversation grows beyond a configurable token
+threshold. The implementation is intentionally model-agnostic so it can be used
+with any LangChain chat model.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable, Iterable, Sequence
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Literal
+
+from langchain_core.messages import (
+    AIMessage,
+    AnyMessage,
+    BaseMessage,
+    SystemMessage,
+    ToolMessage,
+)
+from langchain_core.messages.utils import count_tokens_approximately
+from typing_extensions import Protocol
+
+from langchain.agents.middleware.types import AgentMiddleware, AgentState, ModelRequest
+
+if TYPE_CHECKING:
+    from langgraph.runtime import Runtime
+
+DEFAULT_TOOL_PLACEHOLDER = "[cleared]"
+
+
+TokenCounter = Callable[
+    [Sequence[BaseMessage]],
+    int,
+]
+
+
+class ContextEdit(Protocol):
+    """Protocol describing a context editing strategy."""
+
+    def apply(
+        self,
+        messages: list[AnyMessage],
+        *,
+        count_tokens: TokenCounter,
+    ) -> None:
+        """Apply an edit to the message list in place."""
+        ...
+
+
+@dataclass(slots=True)
+class ClearToolUsesEdit(ContextEdit):
+    """Configuration for clearing tool outputs when token limits are exceeded."""
+
+    trigger: int = 100_000
+    """Token count that triggers the edit."""
+
+    clear_at_least: int = 0
+    """Minimum number of tokens to reclaim when the edit runs."""
+
+    keep: int = 3
+    """Number of most recent tool results that must be preserved."""
+
+    clear_tool_inputs: bool = False
+    """Whether to clear the originating tool call parameters on the AI message."""
+
+    exclude_tools: Sequence[str] = ()
+    """List of tool names to exclude from clearing."""
+
+    placeholder: str = DEFAULT_TOOL_PLACEHOLDER
+    """Placeholder text inserted for cleared tool outputs."""
+
+    def apply(
+        self,
+        messages: list[AnyMessage],
+        *,
+        count_tokens: TokenCounter,
+    ) -> None:
+        """Apply the clear-tool-uses strategy."""
+        tokens = count_tokens(messages)
+
+        if tokens <= self.trigger:
+            return
+
+        candidates = [
+            (idx, msg) for idx, msg in enumerate(messages) if isinstance(msg, ToolMessage)
+        ]
+
+        if self.keep >= len(candidates):
+            candidates = []
+        elif self.keep:
+            candidates = candidates[: -self.keep]
+
+        cleared_tokens = 0
+        excluded_tools = set(self.exclude_tools)
+
+        for idx, tool_message in candidates:
+            if tool_message.response_metadata.get("context_editing", {}).get("cleared"):
+                continue
+
+            ai_message = next(
+                (m for m in reversed(messages[:idx]) if isinstance(m, AIMessage)), None
+            )
+
+            if ai_message is None:
+                continue
+
+            tool_call = next(
+                (
+                    call
+                    for call in ai_message.tool_calls
+                    if call.get("id") == tool_message.tool_call_id
+                ),
+                None,
+            )
+
+            if tool_call is None:
+                continue
+
+            if (tool_message.name or tool_call["name"]) in excluded_tools:
+                continue
+
+            messages[idx] = tool_message.model_copy(
+                update={
+                    "artifact": None,
+                    "content": self.placeholder,
+                    "response_metadata": {
+                        **tool_message.response_metadata,
+                        "context_editing": {
+                            "cleared": True,
+                            "strategy": "clear_tool_uses",
+                        },
+                    },
+                }
+            )
+
+            if self.clear_tool_inputs:
+                messages[messages.index(ai_message)] = self._build_cleared_tool_input_message(
+                    ai_message,
+                    tool_message.tool_call_id,
+                )
+
+            if self.clear_at_least > 0:
+                new_token_count = count_tokens(messages)
+                cleared_tokens = max(0, tokens - new_token_count)
+                if cleared_tokens >= self.clear_at_least:
+                    break
+
+        return
+
+    def _build_cleared_tool_input_message(
+        self,
+        message: AIMessage,
+        tool_call_id: str,
+    ) -> AIMessage:
+        updated_tool_calls = []
+        cleared_any = False
+        for tool_call in message.tool_calls:
+            updated_call = dict(tool_call)
+            if updated_call.get("id") == tool_call_id:
+                updated_call["args"] = {}
+                cleared_any = True
+            updated_tool_calls.append(updated_call)
+
+        metadata = dict(getattr(message, "response_metadata", {}))
+        context_entry = dict(metadata.get("context_editing", {}))
+        if cleared_any:
+            cleared_ids = set(context_entry.get("cleared_tool_inputs", []))
+            cleared_ids.add(tool_call_id)
+            context_entry["cleared_tool_inputs"] = sorted(cleared_ids)
+            metadata["context_editing"] = context_entry
+
+        return message.model_copy(
+            update={
+                "tool_calls": updated_tool_calls,
+                "response_metadata": metadata,
+            }
+        )
+
+
+class ContextEditingMiddleware(AgentMiddleware):
+    """Middleware that automatically prunes tool results to manage context size.
+
+    The middleware applies a sequence of edits when the total input token count
+    exceeds configured thresholds. Currently the ``ClearToolUsesEdit`` strategy is
+    supported, aligning with Anthropic's ``clear_tool_uses_20250919`` behaviour.
+    """
+
+    edits: list[ContextEdit]
+    token_count_method: Literal["approximate", "model"]
+
+    def __init__(
+        self,
+        *,
+        edits: Iterable[ContextEdit] | None = None,
+        token_count_method: Literal["approximate", "model"] = "approximate",  # noqa: S107
+    ) -> None:
+        """Initialise a context editing middleware instance.
+
+        Args:
+            edits: Sequence of edit strategies to apply. Defaults to a single
+                `ClearToolUsesEdit` mirroring Anthropic defaults.
+            token_count_method: Whether to use approximate token counting
+                (faster, less accurate) or exact counting implemented by the
+                chat model (potentially slower, more accurate).
+        """
+        super().__init__()
+        self.edits = list(edits or (ClearToolUsesEdit(),))
+        self.token_count_method = token_count_method
+
+    def modify_model_request(
+        self,
+        request: ModelRequest,
+        state: AgentState,  # noqa: ARG002
+        runtime: Runtime,  # noqa: ARG002
+    ) -> ModelRequest:
+        """Modify the model request by applying context edits before invocation."""
+        if not request.messages:
+            return request
+
+        if self.token_count_method == "approximate":  # noqa: S105
+
+            def count_tokens(messages: Sequence[BaseMessage]) -> int:
+                return count_tokens_approximately(messages)
+        else:
+            system_msg = (
+                [SystemMessage(content=request.system_prompt)] if request.system_prompt else []
+            )
+
+            def count_tokens(messages: Sequence[BaseMessage]) -> int:
+                return request.model.get_num_tokens_from_messages(
+                    system_msg + list(messages), request.tools
+                )
+
+        for edit in self.edits:
+            edit.apply(request.messages, count_tokens=count_tokens)
+
+        return request
+
+
+__all__ = [
+    "ClearToolUsesEdit",
+    "ContextEditingMiddleware",
+]
diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_context_editing_middleware.py b/libs/langchain_v1/tests/unit_tests/agents/test_context_editing_middleware.py