Sliding window fix

cte · cte · commit 1d67f88aba2c · 2025-02-24T15:09:46.000-08:00
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts
@@ -55,13 +55,15 @@ export function truncateConversationIfNeeded(
 /**
  * Calculates the maximum allowed tokens for models that support prompt caching.
  *
- * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.
+ * The maximum is computed as the greater of (contextWindow - buffer) and 80% of the contextWindow.
  *
  * @param {ModelInfo} modelInfo - The model information containing the context window size.
  * @returns {number} The maximum number of tokens allowed for prompt caching models.
  */
 function getMaxTokensForPromptCachingModels(modelInfo: ModelInfo): number {
-	return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)
+	// The buffer needs to be at least as large as `modelInfo.maxTokens`.
+	const buffer = modelInfo.maxTokens ? Math.max(40_000, modelInfo.maxTokens) : 40_000
+	return Math.max(modelInfo.contextWindow - buffer, modelInfo.contextWindow * 0.8)
 }
 
 /**

Original file line number	Diff line number	Diff line change
`@@ -55,13 +55,15 @@ export function truncateConversationIfNeeded(`
`55`	`55`	`/**`
`56`	`56`	`* Calculates the maximum allowed tokens for models that support prompt caching.`
`57`	`57`	`*`
`58`		`- * The maximum is computed as the greater of (contextWindow - 40000) and 80% of the contextWindow.`
	`58`	`+ * The maximum is computed as the greater of (contextWindow - buffer) and 80% of the contextWindow.`
`59`	`59`	`*`
`60`	`60`	`* @param {ModelInfo} modelInfo - The model information containing the context window size.`
`61`	`61`	`* @returns {number} The maximum number of tokens allowed for prompt caching models.`
`62`	`62`	`*/`
`63`	`63`	`function getMaxTokensForPromptCachingModels(modelInfo: ModelInfo): number {`
`64`		`- return Math.max(modelInfo.contextWindow - 40_000, modelInfo.contextWindow * 0.8)`
	`64`	+ // The buffer needs to be at least as large as `modelInfo.maxTokens`.
	`65`	`+ const buffer = modelInfo.maxTokens ? Math.max(40_000, modelInfo.maxTokens) : 40_000`
	`66`	`+ return Math.max(modelInfo.contextWindow - buffer, modelInfo.contextWindow * 0.8)`
`65`	`67`	`}`
`66`	`68`
`67`	`69`	`/**`