Fix token limit enforcement in context_get to prevent MCP protocol errors

mkreyman · claude · mkreyman · commit 2d9db93e3c11 · 2025-07-11T16:22:20.000-06:00
- Added calculateSafeItemCount() helper function to determine safe result size - Implemented automatic response truncation when approaching 25,000 token limit - Enhanced pagination metadata with truncated/truncatedCount fields - Improved warning messages with specific pagination instructions - Added comprehensive unit tests for token limit enforcement - Bumped version to 0.10.1 - Updated CHANGELOG.md with fix details This prevents "response exceeds maximum allowed tokens" errors that users were experiencing with large result sets like "*test*" pattern queries. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,8 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.10.1] - 2025-01-11
+
 ### Fixed
 
+- **Token Limit Enforcement** - Fixed MCP protocol token limit errors
+
+  - Added automatic response truncation when approaching 25,000 token limit
+  - Implemented `calculateSafeItemCount()` helper to determine safe result size
+  - Enhanced pagination metadata with `truncated` and `truncatedCount` fields
+  - Improved warning messages with specific pagination instructions
+  - Prevents "response exceeds maximum allowed tokens" errors from MCP clients
+
 - **Pagination Defaults in context_get** - Improved consistency
   - Added proper validation of pagination parameters at handler level
   - Default limit of 100 items now properly applied when not specified
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "mcp-memory-keeper",
-  "version": "0.10.0",
+  "version": "0.10.1",
   "description": "MCP server for persistent context management in AI coding assistants",
   "main": "dist/index.js",
   "bin": {
diff --git a/src/__tests__/integration/tokenLimitEnforcement.test.ts b/src/__tests__/integration/tokenLimitEnforcement.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect } from '@jest/globals';
+
+// Helper functions from the main index.ts file
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+function calculateSafeItemCount(items: any[], tokenLimit: number): number {
+  if (items.length === 0) return 0;
+
+  let safeCount = 0;
+  let currentTokens = 0;
+
+  // Include base response structure in token calculation
+  const baseResponse = {
+    items: [],
+    pagination: {
+      total: 0,
+      returned: 0,
+      offset: 0,
+      hasMore: false,
+      nextOffset: null,
+      totalCount: 0,
+      page: 1,
+      pageSize: 0,
+      totalPages: 1,
+      hasNextPage: false,
+      hasPreviousPage: false,
+      previousOffset: null,
+      totalSize: 0,
+      averageSize: 0,
+      defaultsApplied: {},
+      truncated: false,
+      truncatedCount: 0,
+    },
+  };
+
+  // Estimate tokens for base response structure
+  const baseTokens = estimateTokens(JSON.stringify(baseResponse, null, 2));
+  currentTokens = baseTokens;
+
+  // Add items one by one until we approach the token limit
+  for (let i = 0; i < items.length; i++) {
+    const itemTokens = estimateTokens(JSON.stringify(items[i], null, 2));
+
+    // Leave some buffer (10%) to account for formatting and additional metadata
+    if (currentTokens + itemTokens > tokenLimit * 0.9) {
+      break;
+    }
+
+    currentTokens += itemTokens;
+    safeCount++;
+  }
+
+  // Always return at least 1 item if any exist, even if it exceeds limit
+  // This prevents infinite loops and ensures progress
+  return Math.max(safeCount, items.length > 0 ? 1 : 0);
+}
+
+describe('Token Limit Enforcement Unit Tests', () => {
+  describe('calculateSafeItemCount', () => {
+    it('should return 0 for empty items array', () => {
+      const result = calculateSafeItemCount([], 20000);
+      expect(result).toBe(0);
+    });
+
+    it('should return at least 1 item if any exist', () => {
+      const largeItem = {
+        key: 'large.item',
+        value: 'X'.repeat(100000), // Very large item
+        category: 'test',
+        priority: 'high',
+      };
+
+      const result = calculateSafeItemCount([largeItem], 20000);
+      expect(result).toBe(1);
+    });
+
+    it('should truncate items when approaching token limit', () => {
+      // Create multiple medium-sized items
+      const items = [];
+      for (let i = 0; i < 50; i++) {
+        items.push({
+          key: `item.${i}`,
+          value:
+            'This is a medium-sized test value that contains enough text to trigger token limit enforcement when many items are returned together. '.repeat(
+              20
+            ),
+          category: 'test',
+          priority: 'high',
+        });
+      }
+
+      const result = calculateSafeItemCount(items, 20000);
+      expect(result).toBeLessThan(50);
+      expect(result).toBeGreaterThan(0);
+    });
+
+    it('should handle small items that all fit within limit', () => {
+      const items = [];
+      for (let i = 0; i < 10; i++) {
+        items.push({
+          key: `small.item.${i}`,
+          value: 'Small value',
+          category: 'test',
+          priority: 'high',
+        });
+      }
+
+      const result = calculateSafeItemCount(items, 20000);
+      expect(result).toBe(10);
+    });
+
+    it('should respect token limit with buffer', () => {
+      // Create items that would exceed token limit
+      const items = [];
+      const itemValue = 'X'.repeat(2000); // 2KB item that will definitely cause truncation
+
+      for (let i = 0; i < 100; i++) {
+        items.push({
+          key: `large.buffer.item.${i}`,
+          value: itemValue,
+          category: 'test',
+          priority: 'high',
+        });
+      }
+
+      const result = calculateSafeItemCount(items, 20000);
+
+      // Should be significantly less than all items due to token limits
+      expect(result).toBeLessThan(100);
+      expect(result).toBeGreaterThan(0);
+
+      // Verify that the result respects the buffer by checking actual tokens
+      const actualTokens = result * estimateTokens(JSON.stringify(items[0], null, 2));
+      expect(actualTokens).toBeLessThan(20000 * 0.9); // Should be under 90% of limit
+    });
+  });
+
+  describe('estimateTokens', () => {
+    it('should estimate tokens correctly', () => {
+      const text = 'This is a test string';
+      const tokens = estimateTokens(text);
+      expect(tokens).toBe(Math.ceil(text.length / 4));
+    });
+
+    it('should handle empty strings', () => {
+      const tokens = estimateTokens('');
+      expect(tokens).toBe(0);
+    });
+
+    it('should handle large strings', () => {
+      const largeText = 'X'.repeat(10000);
+      const tokens = estimateTokens(largeText);
+      expect(tokens).toBe(2500); // 10000 / 4
+    });
+  });
+});
diff --git a/src/index.ts b/src/index.ts
@@ -184,6 +184,59 @@ function calculateResponseMetrics(items: any[]): {
   return { totalSize, estimatedTokens, averageSize };
 }
 
+// Helper to calculate how many items can fit within token limit
+function calculateSafeItemCount(items: any[], tokenLimit: number): number {
+  if (items.length === 0) return 0;
+
+  let safeCount = 0;
+  let currentTokens = 0;
+
+  // Include base response structure in token calculation
+  const baseResponse = {
+    items: [],
+    pagination: {
+      total: 0,
+      returned: 0,
+      offset: 0,
+      hasMore: false,
+      nextOffset: null,
+      totalCount: 0,
+      page: 1,
+      pageSize: 0,
+      totalPages: 1,
+      hasNextPage: false,
+      hasPreviousPage: false,
+      previousOffset: null,
+      totalSize: 0,
+      averageSize: 0,
+      defaultsApplied: {},
+      truncated: false,
+      truncatedCount: 0,
+    },
+  };
+
+  // Estimate tokens for base response structure
+  const baseTokens = estimateTokens(JSON.stringify(baseResponse, null, 2));
+  currentTokens = baseTokens;
+
+  // Add items one by one until we approach the token limit
+  for (let i = 0; i < items.length; i++) {
+    const itemTokens = estimateTokens(JSON.stringify(items[i], null, 2));
+
+    // Leave some buffer (10%) to account for formatting and additional metadata
+    if (currentTokens + itemTokens > tokenLimit * 0.9) {
+      break;
+    }
+
+    currentTokens += itemTokens;
+    safeCount++;
+  }
+
+  // Always return at least 1 item if any exist, even if it exceeds limit
+  // This prevents infinite loops and ensures progress
+  return Math.max(safeCount, items.length > 0 ? 1 : 0);
+}
+
 // Helper to parse relative time strings
 function parseRelativeTime(relativeTime: string): string | null {
   const now = new Date();
@@ -699,8 +752,22 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
         const metrics = calculateResponseMetrics(result.items);
         const TOKEN_LIMIT = 20000; // Conservative limit to stay well under MCP's 25k limit
 
-        // Check if we're approaching token limits
+        // Check if we're approaching token limits and enforce truncation
         const isApproachingLimit = metrics.estimatedTokens > TOKEN_LIMIT;
+        let actualItems = result.items;
+        let wasTruncated = false;
+        let truncatedCount = 0;
+
+        if (isApproachingLimit) {
+          // Calculate how many items we can safely return
+          const safeItemCount = calculateSafeItemCount(result.items, TOKEN_LIMIT);
+
+          if (safeItemCount < result.items.length) {
+            actualItems = result.items.slice(0, safeItemCount);
+            wasTruncated = true;
+            truncatedCount = result.items.length - safeItemCount;
+          }
+        }
 
         // Calculate pagination metadata
         // Use the validated limit and offset from paginationValidation
@@ -709,9 +776,18 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
         const currentPage =
           effectiveLimit > 0 ? Math.floor(effectiveOffset / effectiveLimit) + 1 : 1;
         const totalPages = effectiveLimit > 0 ? Math.ceil(result.totalCount / effectiveLimit) : 1;
-        const hasNextPage = currentPage < totalPages;
+
+        // Update pagination to account for truncation
+        const hasNextPage = wasTruncated || currentPage < totalPages;
         const hasPreviousPage = currentPage > 1;
 
+        // Calculate next offset accounting for truncation
+        const nextOffset = hasNextPage
+          ? wasTruncated
+            ? effectiveOffset + actualItems.length
+            : effectiveOffset + effectiveLimit
+          : null;
+
         // Track whether defaults were applied
         const defaultsApplied = {
           limit: rawLimit === undefined,
@@ -720,7 +796,7 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
 
         // Enhanced response format
         if (includeMetadata) {
-          const itemsWithMetadata = result.items.map(item => ({
+          const itemsWithMetadata = actualItems.map(item => ({
             key: item.key,
             value: item.value,
             category: item.category,
@@ -736,10 +812,10 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
             items: itemsWithMetadata,
             pagination: {
               total: result.totalCount,
-              returned: result.items.length,
+              returned: actualItems.length,
               offset: effectiveOffset,
               hasMore: hasNextPage,
-              nextOffset: hasNextPage ? effectiveOffset + effectiveLimit : null,
+              nextOffset: nextOffset,
               // Extended pagination metadata
               totalCount: result.totalCount,
               page: currentPage,
@@ -755,13 +831,20 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
               averageSize: metrics.averageSize,
               // Defaults applied
               defaultsApplied: defaultsApplied,
+              // Truncation information
+              truncated: wasTruncated,
+              truncatedCount: truncatedCount,
             },
           };
 
           // Add warning if approaching token limits
           if (isApproachingLimit) {
-            response.pagination.warning =
-              'Large result set. Consider using smaller limit or more specific filters.';
+            if (wasTruncated) {
+              response.pagination.warning = `Response truncated due to token limits. ${truncatedCount} items omitted. Use pagination with offset=${nextOffset} to retrieve remaining items.`;
+            } else {
+              response.pagination.warning =
+                'Large result set. Consider using smaller limit or more specific filters.';
+            }
           }
 
           return {
@@ -776,20 +859,27 @@ server.setRequestHandler(CallToolRequestSchema, async request => {
 
         // Return enhanced format for all queries to support pagination
         const response: any = {
-          items: result.items,
+          items: actualItems,
           pagination: {
             total: result.totalCount,
-            returned: result.items.length,
+            returned: actualItems.length,
             offset: effectiveOffset,
             hasMore: hasNextPage,
-            nextOffset: hasNextPage ? effectiveOffset + effectiveLimit : null,
+            nextOffset: nextOffset,
+            // Truncation information
+            truncated: wasTruncated,
+            truncatedCount: truncatedCount,
           },
         };
 
         // Add warning if approaching token limits
         if (isApproachingLimit) {
-          response.pagination.warning =
-            'Large result set. Consider using smaller limit or more specific filters.';
+          if (wasTruncated) {
+            response.pagination.warning = `Response truncated due to token limits. ${truncatedCount} items omitted. Use pagination with offset=${nextOffset} to retrieve remaining items.`;
+          } else {
+            response.pagination.warning =
+              'Large result set. Consider using smaller limit or more specific filters.';
+          }
         }
 
         return {

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "mcp-memory-keeper",`
`3`		`- "version": "0.10.0",`
	`3`	`+ "version": "0.10.1",`
`4`	`4`	`"description": "MCP server for persistent context management in AI coding assistants",`
`5`	`5`	`"main": "dist/index.js",`
`6`	`6`	`"bin": {`