@@ -25,11 +25,13 @@ describe("getModelMaxOutputTokens", () => {
2525 expect ( result ) . toBe ( 16384 )
2626 } )
2727
28- test ( "should return model maxTokens when not using claude-code provider" , ( ) => {
28+ test ( "should return model maxTokens when not using claude-code provider and maxTokens is within 20% of context window " , ( ) => {
2929 const settings : ProviderSettings = {
3030 apiProvider : "anthropic" ,
3131 }
3232
33+ // mockModel has maxTokens: 8192 and contextWindow: 200000
34+ // 8192 is 4.096% of 200000, which is <= 20%, so it should use model.maxTokens
3335 const result = getModelMaxOutputTokens ( {
3436 modelId : "claude-3-5-sonnet-20241022" ,
3537 model : mockModel ,
@@ -115,7 +117,7 @@ describe("getModelMaxOutputTokens", () => {
115117 contextWindow : 1_048_576 ,
116118 supportsPromptCache : false ,
117119 supportsReasoningBudget : true ,
118- maxTokens : 65_535 ,
120+ maxTokens : 65_535 , // 65_535 is ~6.25% of 1_048_576, which is <= 20%
119121 }
120122
121123 const settings : ProviderSettings = {
@@ -124,7 +126,68 @@ describe("getModelMaxOutputTokens", () => {
124126 }
125127
126128 const result = getModelMaxOutputTokens ( { modelId : geminiModelId , model, settings } )
127- expect ( result ) . toBe ( 65_535 ) // Should use model.maxTokens, not ANTHROPIC_DEFAULT_MAX_TOKENS
129+ expect ( result ) . toBe ( 65_535 ) // Should use model.maxTokens since it's within 20% threshold
130+ } )
131+
132+ test ( "should clamp maxTokens to 20% of context window when maxTokens exceeds threshold" , ( ) => {
133+ const model : ModelInfo = {
134+ contextWindow : 100_000 ,
135+ supportsPromptCache : false ,
136+ maxTokens : 50_000 , // 50% of context window, exceeds 20% threshold
137+ }
138+
139+ const settings : ProviderSettings = {
140+ apiProvider : "openai" ,
141+ }
142+
143+ const result = getModelMaxOutputTokens ( {
144+ modelId : "gpt-4" ,
145+ model,
146+ settings,
147+ format : "openai" ,
148+ } )
149+ // Should clamp to 20% of context window: 100_000 * 0.2 = 20_000
150+ expect ( result ) . toBe ( 20_000 )
151+ } )
152+
153+ test ( "should clamp maxTokens to 20% of context window for Anthropic models when maxTokens exceeds threshold" , ( ) => {
154+ const model : ModelInfo = {
155+ contextWindow : 100_000 ,
156+ supportsPromptCache : true ,
157+ maxTokens : 50_000 , // 50% of context window, exceeds 20% threshold
158+ }
159+
160+ const settings : ProviderSettings = {
161+ apiProvider : "anthropic" ,
162+ }
163+
164+ const result = getModelMaxOutputTokens ( {
165+ modelId : "claude-3-5-sonnet-20241022" ,
166+ model,
167+ settings,
168+ } )
169+ // Should clamp to 20% of context window: 100_000 * 0.2 = 20_000
170+ expect ( result ) . toBe ( 20_000 )
171+ } )
172+
173+ test ( "should use model.maxTokens when exactly at 20% threshold" , ( ) => {
174+ const model : ModelInfo = {
175+ contextWindow : 100_000 ,
176+ supportsPromptCache : false ,
177+ maxTokens : 20_000 , // Exactly 20% of context window
178+ }
179+
180+ const settings : ProviderSettings = {
181+ apiProvider : "openai" ,
182+ }
183+
184+ const result = getModelMaxOutputTokens ( {
185+ modelId : "gpt-4" ,
186+ model,
187+ settings,
188+ format : "openai" ,
189+ } )
190+ expect ( result ) . toBe ( 20_000 ) // Should use model.maxTokens since it's exactly at 20%
128191 } )
129192
130193 test ( "should return modelMaxTokens from settings when reasoning budget is required" , ( ) => {
0 commit comments