fix(summarization): resolve fraction trigger bug by using model profile for maxInputTokens (#189)

christian-bromann · web-flow · commit 07b91219e4bc · 2026-02-04T17:06:15.000-08:00
* fix(summarization): resolve fraction trigger bug by using model profile for maxInputTokens The fraction-based trigger in createSummarizationMiddleware was always ineffective because maxInputTokens was never passed to the internal functions that needed it. fixes #185 * fix type
diff --git a/libs/deepagents/src/middleware/summarization.test.ts b/libs/deepagents/src/middleware/summarization.test.ts
@@ -8,16 +8,21 @@ import type {
 } from "../backends/protocol.js";
 import { createMockBackend } from "./test.js";
 
-// Mock the OpenAI module with a class constructor
-vi.mock("@langchain/openai", () => {
+// Mock the initChatModel function from langchain/chat_models/universal
+vi.mock("langchain/chat_models/universal", () => {
   return {
-    ChatOpenAI: class MockChatOpenAI {
-      constructor(_config: any) {}
-      async invoke(_messages: any) {
-        return {
-          content: "This is a summary of the conversation.",
-        };
-      }
+    initChatModel: async (_modelName: string) => {
+      return {
+        async invoke(_messages: any) {
+          return {
+            content: "This is a summary of the conversation.",
+          };
+        },
+        // Mock profile with maxInputTokens for testing
+        profile: {
+          maxInputTokens: 128000,
+        },
+      };
     },
   };
 });
@@ -130,6 +135,120 @@ describe("createSummarizationMiddleware", () => {
     });
   });
 
+  describe("fraction trigger", () => {
+    it("should trigger summarization when token count exceeds fraction of maxInputTokens", async () => {
+      const mockBackend = createMockBackend();
+
+      // Create a mock model with profile containing low maxInputTokens
+      const mockModelWithProfile = {
+        profile: {
+          maxInputTokens: 200, // Low threshold for testing (100 tokens = 50%)
+        },
+        async invoke(_messages: any) {
+          return {
+            content: "This is a summary of the conversation.",
+          };
+        },
+      };
+
+      const middleware = createSummarizationMiddleware({
+        model: mockModelWithProfile as any,
+        backend: mockBackend,
+        trigger: { type: "fraction", value: 0.5 }, // 50% of maxInputTokens
+        keep: { type: "messages", value: 2 },
+      });
+
+      // Create messages with enough content to exceed 100 tokens (50% of 200)
+      const messages = Array.from(
+        { length: 10 },
+        (_, i) =>
+          new HumanMessage({
+            content: `Message ${i} with some extra content to increase token count`,
+          }),
+      );
+
+      // @ts-expect-error - typing issue
+      const result = await middleware.beforeModel?.({ messages });
+
+      expect(result).toBeDefined();
+      expect(result?.messages).toBeDefined();
+      // Should have summary message + 2 preserved messages
+      expect(result?.messages.length).toBe(3);
+    });
+
+    it("should not trigger fraction-based summarization when model has no profile", async () => {
+      const mockBackend = createMockBackend();
+
+      // Create a mock model WITHOUT a profile (no maxInputTokens)
+      const mockModelWithoutProfile = {
+        async invoke(_messages: any) {
+          return {
+            content: "This is a summary of the conversation.",
+          };
+        },
+        // No profile property
+      };
+
+      const middleware = createSummarizationMiddleware({
+        model: mockModelWithoutProfile as any,
+        backend: mockBackend,
+        trigger: { type: "fraction", value: 0.5 },
+        keep: { type: "messages", value: 2 },
+        // maxInputTokens is NOT provided and model has no profile
+      });
+
+      // Create messages with content
+      const messages = Array.from(
+        { length: 10 },
+        (_, i) =>
+          new HumanMessage({
+            content: `Message ${i} with some extra content`,
+          }),
+      );
+
+      // @ts-expect-error - typing issue
+      const result = await middleware.beforeModel?.({ messages });
+
+      // Without maxInputTokens (no explicit option and no model profile), fraction trigger should not fire
+      expect(result).toBeUndefined();
+    });
+
+    it("should not trigger when token count is below fraction threshold", async () => {
+      const mockBackend = createMockBackend();
+
+      // Create a mock model with high maxInputTokens in profile
+      const mockModelWithHighLimit = {
+        profile: {
+          maxInputTokens: 100000, // Very high threshold
+        },
+        async invoke(_messages: any) {
+          return {
+            content: "This is a summary of the conversation.",
+          };
+        },
+      };
+
+      const middleware = createSummarizationMiddleware({
+        model: mockModelWithHighLimit as any,
+        backend: mockBackend,
+        trigger: { type: "fraction", value: 0.9 }, // 90% of maxInputTokens
+        keep: { type: "messages", value: 2 },
+      });
+
+      // Create just a few short messages
+      const messages = [
+        new HumanMessage({ content: "Hello" }),
+        new AIMessage({ content: "Hi" }),
+      ];
+
+      // @ts-expect-error - typing issue
+      const result = await middleware.beforeModel?.({ messages });
+
+      // Token count is far below 90% of 100000, so should not trigger
+      expect(result).toBeUndefined();
+    });
+  });
+
   describe("keep policy", () => {
     it("should preserve specified number of recent messages", async () => {
       const mockBackend = createMockBackend();
diff --git a/libs/deepagents/src/middleware/summarization.ts b/libs/deepagents/src/middleware/summarization.ts
@@ -53,7 +53,7 @@ import {
 } from "langchain";
 import { getBufferString } from "@langchain/core/messages";
 import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
-import { ChatOpenAI } from "@langchain/openai";
+import { initChatModel } from "langchain/chat_models/universal";
 
 import type { BackendProtocol, BackendFactory } from "../backends/protocol.js";
 import type { StateBackend } from "../backends/state.js";
@@ -259,14 +259,44 @@ export function createSummarizationMiddleware(
     return `${historyPathPrefix}/${id}.md`;
   }
 
+  /**
+   * Cached resolved model to avoid repeated initChatModel calls
+   */
+  let cachedModel: BaseChatModel | undefined = undefined;
+
   /**
    * Resolve the chat model.
+   * Uses initChatModel to support any model provider from a string name.
+   * The resolved model is cached for subsequent calls.
    */
-  function getChatModel(): BaseChatModel {
+  async function getChatModel(): Promise<BaseChatModel> {
+    if (cachedModel) {
+      return cachedModel;
+    }
+
     if (typeof model === "string") {
-      return new ChatOpenAI({ modelName: model });
+      cachedModel = await initChatModel(model);
+    } else {
+      cachedModel = model;
+    }
+    return cachedModel;
+  }
+
+  /**
+   * Get the max input tokens from the resolved model's profile.
+   * Similar to Python's _get_profile_limits.
+   */
+  function getMaxInputTokens(resolvedModel: BaseChatModel): number | undefined {
+    const profile = resolvedModel.profile;
+    if (
+      profile &&
+      typeof profile === "object" &&
+      "maxInputTokens" in profile &&
+      typeof profile.maxInputTokens === "number"
+    ) {
+      return profile.maxInputTokens;
     }
-    return model;
+    return undefined;
   }
 
   /**
@@ -533,9 +563,10 @@ export function createSummarizationMiddleware(
   /**
    * Create summary of messages.
    */
-  async function createSummary(messages: BaseMessage[]): Promise<string> {
-    const chatModel = getChatModel();
-
+  async function createSummary(
+    messages: BaseMessage[],
+    chatModel: BaseChatModel,
+  ): Promise<string> {
     // Trim messages if too long
     let messagesToSummarize = messages;
     const tokens = countTokensApproximately(messages);
@@ -605,29 +636,49 @@ ${summary}
         return undefined;
       }
 
-      // Step 1: Truncate args if configured
+      /**
+       * Resolve the chat model and get max input tokens from profile
+       */
+      const resolvedModel = await getChatModel();
+      const maxInputTokens = getMaxInputTokens(resolvedModel);
+
+      /**
+       * Step 1: Truncate args if configured
+       */
       const { messages: truncatedMessages, modified: argsWereTruncated } =
-        truncateArgs(messages);
+        truncateArgs(messages, maxInputTokens);
 
-      // Step 2: Check if summarization should happen
+      /**
+       * Step 2: Check if summarization should happen
+       */
       const totalTokens = countTokensApproximately(truncatedMessages);
       const shouldDoSummarization = shouldSummarize(
         truncatedMessages,
         totalTokens,
+        maxInputTokens,
       );
 
-      // If only truncation happened (no summarization)
+      /**
+       * If only truncation happened (no summarization)
+       */
       if (argsWereTruncated && !shouldDoSummarization) {
         return { messages: truncatedMessages };
       }
 
-      // If no truncation and no summarization
+      /**
+       * If no truncation and no summarization
+       */
       if (!shouldDoSummarization) {
         return undefined;
       }
 
-      // Step 3: Perform summarization
-      const cutoffIndex = determineCutoffIndex(truncatedMessages);
+      /**
+       * Step 3: Perform summarization
+       */
+      const cutoffIndex = determineCutoffIndex(
+        truncatedMessages,
+        maxInputTokens,
+      );
       if (cutoffIndex <= 0) {
         if (argsWereTruncated) {
           return { messages: truncatedMessages };
@@ -638,7 +689,9 @@ ${summary}
       const messagesToSummarize = truncatedMessages.slice(0, cutoffIndex);
       const preservedMessages = truncatedMessages.slice(cutoffIndex);
 
-      // Offload to backend first
+      /**
+       * Offload to backend first
+       */
       const resolvedBackend = getBackend(state);
       const filePath = await offloadToBackend(
         resolvedBackend,
@@ -647,14 +700,20 @@ ${summary}
       );
 
       if (filePath === null) {
-        // Offloading failed - don't proceed with summarization
+        /**
+         * Offloading failed - don't proceed with summarization
+         */
         return undefined;
       }
 
-      // Generate summary
-      const summary = await createSummary(messagesToSummarize);
+      /**
+       * Generate summary
+       */
+      const summary = await createSummary(messagesToSummarize, resolvedModel);
 
-      // Build summary message
+      /**
+       * Build summary message
+       */
       const summaryMessage = buildSummaryMessage(summary, filePath);
 
       return {