File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -55,13 +55,15 @@ export function truncateConversationIfNeeded(
5555/**
5656 * Calculates the maximum allowed tokens for models that support prompt caching.
5757 *
58- * The maximum is computed as the greater of (contextWindow - 40000 ) and 80% of the contextWindow.
58+ * The maximum is computed as the greater of (contextWindow - buffer ) and 80% of the contextWindow.
5959 *
6060 * @param {ModelInfo } modelInfo - The model information containing the context window size.
6161 * @returns {number } The maximum number of tokens allowed for prompt caching models.
6262 */
6363function getMaxTokensForPromptCachingModels ( modelInfo : ModelInfo ) : number {
64- return Math . max ( modelInfo . contextWindow - 40_000 , modelInfo . contextWindow * 0.8 )
64+ // The buffer needs to be at least as large as `modelInfo.maxTokens`.
65+ const buffer = modelInfo . maxTokens ? Math . max ( 40_000 , modelInfo . maxTokens ) : 40_000
66+ return Math . max ( modelInfo . contextWindow - buffer , modelInfo . contextWindow * 0.8 )
6567}
6668
6769/**
You can’t perform that action at this time.
0 commit comments