fix(langchain): further tool call limit optimizations (#9338)

christian-bromann · web-flow · commit 5d43b2133902 · 2025-11-08T11:06:51.000-08:00
diff --git a/libs/langchain/src/agents/middleware/tests/toolCallLimit.test.ts b/libs/langchain/src/agents/middleware/tests/toolCallLimit.test.ts
@@ -544,6 +544,30 @@ describe("toolCallLimitMiddleware", () => {
   });
 
   describe("Error Behavior", () => {
+    it("should throw an error if run limit exceeds thread limit", async () => {
+      expect(() =>
+        toolCallLimitMiddleware({
+          threadLimit: 2,
+          runLimit: 3,
+          exitBehavior: "error",
+        })
+      ).toThrow(
+        "runLimit (3) cannot exceed threadLimit (2). The run limit should be less than or equal to the thread limit."
+      );
+    });
+
+    it("should raise if invalid exit behavior is provided", async () => {
+      expect(() =>
+        toolCallLimitMiddleware({
+          threadLimit: 2,
+          runLimit: 1,
+          exitBehavior: "invalid" as any,
+        })
+      ).toThrow(
+        "Invalid enum value. Expected 'continue' | 'error' | 'end', received 'invalid'"
+      );
+    });
+
     it("should throw ToolCallLimitExceededError when exitBehavior is error", async () => {
       const middleware = toolCallLimitMiddleware({
         threadLimit: 2,
@@ -651,6 +675,49 @@ describe("toolCallLimitMiddleware", () => {
         );
       }
     });
+
+    it("should run remaining tools until limit is exceeded", async () => {
+      const middleware = toolCallLimitMiddleware({
+        threadLimit: 3,
+        runLimit: 2,
+        exitBehavior: "continue",
+      });
+
+      const model = new FakeToolCallingChatModel({
+        responses: [
+          new AIMessage({
+            content: "",
+            tool_calls: [
+              { id: "1", name: "search", args: { query: "test1" } },
+              { id: "2", name: "search", args: { query: "test2" } },
+              { id: "3", name: "calculator", args: { expression: "1+1" } },
+            ],
+          }),
+          new AIMessage({
+            content: "",
+            tool_calls: [{ id: "4", name: "search", args: { query: "test3" } }],
+          }),
+          new AIMessage("Should not reach here"),
+        ],
+      });
+
+      const agent = createAgent({
+        model,
+        tools: [searchTool, calculatorTool],
+        middleware: [middleware],
+      });
+
+      const result = await agent.invoke({
+        messages: [new HumanMessage("Search and calculate")],
+      });
+
+      const lastMessage = result.messages[result.messages.length - 1];
+      expect(lastMessage.content).toContain(
+        "Tool call limit exceeded. Do not make additional tool calls."
+      );
+      expect(searchToolMock).toHaveBeenCalledTimes(2);
+      expect(calculatorToolMock).toHaveBeenCalledTimes(0);
+    });
   });
 
   describe("Combined Thread and Run Limits", () => {
@@ -715,7 +782,7 @@ describe("toolCallLimitMiddleware", () => {
 
       const middleware = toolCallLimitMiddleware({
         threadLimit: 2, // Will hit this
-        runLimit: 10, // Won't hit this
+        runLimit: 2, // Won't hit this
         exitBehavior: "end",
       });
 
diff --git a/libs/langchain/src/agents/middleware/toolCallLimit.ts b/libs/langchain/src/agents/middleware/toolCallLimit.ts
@@ -1,4 +1,5 @@
 import { AIMessage, ToolMessage } from "@langchain/core/messages";
+import { z as z4 } from "zod/v4";
 import { z } from "zod/v3";
 import type { InferInteropZodInput } from "@langchain/core/utils/types";
 import type { ToolCall } from "@langchain/core/messages/tool";
@@ -12,42 +13,19 @@ import { createMiddleware } from "../middleware.js";
  * that the model has no notion of.
  *
  * @param toolName - Tool name being limited (if specific tool), or undefined for all tools.
- * @param threadCount - Current thread tool call count.
- * @param runCount - Current run tool call count.
- * @param threadLimit - Thread tool call limit (if set).
- * @param runLimit - Run tool call limit (if set).
- * @returns A concise message. If only run limit is exceeded (not thread limit),
- * returns a simple "limit exceeded" message without instructing model to stop.
- * If thread limit is exceeded, includes instruction not to call again.
+ * @returns A concise message instructing the model not to call the tool again.
  */
-function buildToolMessageContent(
-  toolName: string | undefined,
-  threadCount: number,
-  runCount: number,
-  threadLimit: number | undefined,
-  runLimit: number | undefined
-): string {
-  // Check if thread limit is exceeded
-  const threadExceeded = threadLimit !== undefined && threadCount > threadLimit;
-  // Check if only run limit is exceeded (not thread limit)
-  const onlyRunExceeded =
-    runLimit !== undefined && runCount > runLimit && !threadExceeded;
-
-  if (onlyRunExceeded) {
-    // Run limit exceeded but thread limit not exceeded - simpler message
-    if (toolName) {
-      return `Tool call limit exceeded for '${toolName}'.`;
-    }
-    return "Tool call limit exceeded.";
-  }
-
-  // Thread limit exceeded (or both) - include instruction not to call again
+function buildToolMessageContent(toolName: string | undefined): string {
+  // Always instruct the model not to call again, regardless of which limit was hit
   if (toolName) {
     return `Tool call limit exceeded. Do not call '${toolName}' again.`;
   }
   return "Tool call limit exceeded. Do not make additional tool calls.";
 }
 
+const VALID_EXIT_BEHAVIORS = ["continue", "error", "end"] as const;
+const DEFAULT_EXIT_BEHAVIOR = "continue";
+
 /**
  * Build the final AI message content for 'end' behavior.
  *
@@ -84,6 +62,13 @@ function buildFinalAIMessageContent(
   return `${toolDesc} call limit reached: ${limitsText}.`;
 }
 
+/**
+ * Schema for the exit behavior.
+ */
+const exitBehaviorSchema = z
+  .enum(VALID_EXIT_BEHAVIORS)
+  .default(DEFAULT_EXIT_BEHAVIOR);
+
 /**
  * Exception raised when tool call limits are exceeded.
  *
@@ -162,8 +147,10 @@ export const ToolCallLimitOptionsSchema = z.object({
    * - "end": Stop execution immediately, injecting a ToolMessage and an AI message
    *   for the single tool call that exceeded the limit. Raises NotImplementedError
    *   if there are multiple tool calls.
+   *
+   * @default "continue"
    */
-  exitBehavior: z.enum(["continue", "error", "end"]).default("continue"),
+  exitBehavior: exitBehaviorSchema,
 });
 
 export type ToolCallLimitConfig = InferInteropZodInput<
@@ -202,7 +189,7 @@ const DEFAULT_TOOL_COUNT_KEY = "__all__";
  *   - "error": Raise a ToolCallLimitExceededError exception
  *   - "end": Stop execution immediately with a ToolMessage + AI message for the single tool call that exceeded the limit. Raises NotImplementedError if there are multiple tool calls.
  *
- * @throws {Error} If both limits are undefined.
+ * @throws {Error} If both limits are undefined, if exitBehavior is invalid, or if runLimit exceeds threadLimit.
  * @throws {NotImplementedError} If exitBehavior is "end" and there are multiple tool calls.
  *
  * @example Continue execution with blocked tools (default)
@@ -271,9 +258,27 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
   }
 
   /**
-   * Apply default for exitBehavior
+   * Validate exitBehavior (Zod schema already validates, but provide helpful error)
    */
-  const exitBehavior = options.exitBehavior ?? "continue";
+  const exitBehavior = options.exitBehavior ?? DEFAULT_EXIT_BEHAVIOR;
+  const parseResult = exitBehaviorSchema.safeParse(exitBehavior);
+  if (!parseResult.success) {
+    throw new Error(z4.prettifyError(parseResult.error).slice(2));
+  }
+
+  /**
+   * Validate that runLimit does not exceed threadLimit
+   */
+  if (
+    options.threadLimit !== undefined &&
+    options.runLimit !== undefined &&
+    options.runLimit > options.threadLimit
+  ) {
+    throw new Error(
+      `runLimit (${options.runLimit}) cannot exceed threadLimit (${options.threadLimit}). ` +
+        "The run limit should be less than or equal to the thread limit."
+    );
+  }
 
   /**
    * Generate the middleware name based on the tool name
@@ -358,7 +363,7 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
           return {
             allowed,
             blocked,
-            finalThreadCount: tempThreadCount + blocked.length,
+            finalThreadCount: tempThreadCount,
             finalRunCount: tempRunCount + blocked.length,
           };
         };
@@ -387,7 +392,9 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
           );
 
         /**
-         * Update counts to include ALL tool call attempts (both allowed and blocked)
+         * Update counts:
+         * - Thread count includes only allowed calls (blocked calls don't count towards thread-level tracking)
+         * - Run count includes blocked calls since they were attempted in this run
          */
         threadCounts[countKey] = finalThreadCount;
         runCounts[countKey] = finalRunCount;
@@ -409,8 +416,10 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
          * Handle different exit behaviors
          */
         if (exitBehavior === "error") {
+          // Use hypothetical thread count to show which limit was exceeded
+          const hypotheticalThreadCount = finalThreadCount + blocked.length;
           throw new ToolCallLimitExceededError(
-            finalThreadCount,
+            hypotheticalThreadCount,
             finalRunCount,
             options.threadLimit,
             options.runLimit,
@@ -421,13 +430,7 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
         /**
          * Build tool message content (sent to model - no thread/run details)
          */
-        const toolMsgContent = buildToolMessageContent(
-          options.toolName,
-          finalThreadCount,
-          finalRunCount,
-          options.threadLimit,
-          options.runLimit
-        );
+        const toolMsgContent = buildToolMessageContent(options.toolName);
 
         /**
          * Inject artificial error ToolMessages for blocked tool calls
@@ -485,9 +488,12 @@ export function toolCallLimitMiddleware(options: ToolCallLimitConfig) {
 
           /**
            * Build final AI message content (displayed to user - includes thread/run details)
+           * Use hypothetical thread count (what it would have been if call wasn't blocked)
+           * to show which limit was actually exceeded
            */
+          const hypotheticalThreadCount = finalThreadCount + blocked.length;
           const finalMsgContent = buildFinalAIMessageContent(
-            finalThreadCount,
+            hypotheticalThreadCount,
             finalRunCount,
             options.threadLimit,
             options.runLimit,