11import { convertToCoreMessages , streamText as _streamText , type Message } from 'ai' ;
2- import { MAX_TOKENS , type FileMap } from './constants' ;
2+ import { MAX_TOKENS , PROVIDER_COMPLETION_LIMITS , isReasoningModel , type FileMap } from './constants' ;
33import { getSystemPrompt } from '~/lib/common/prompts/prompts' ;
44import { DEFAULT_MODEL , DEFAULT_PROVIDER , MODIFICATIONS_TAG_NAME , PROVIDER_LIST , WORK_DIR } from '~/utils/constants' ;
55import type { IProviderSetting } from '~/types/model' ;
@@ -26,6 +26,23 @@ export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0]
2626
2727const logger = createScopedLogger ( 'stream-text' ) ;
2828
29+ function getCompletionTokenLimit ( modelDetails : any ) : number {
30+ // 1. If model specifies completion tokens, use that
31+ if ( modelDetails . maxCompletionTokens && modelDetails . maxCompletionTokens > 0 ) {
32+ return modelDetails . maxCompletionTokens ;
33+ }
34+
35+ // 2. Use provider-specific default
36+ const providerDefault = PROVIDER_COMPLETION_LIMITS [ modelDetails . provider ] ;
37+
38+ if ( providerDefault ) {
39+ return providerDefault ;
40+ }
41+
42+ // 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
43+ return Math . min ( MAX_TOKENS , 16384 ) ;
44+ }
45+
2946function sanitizeText ( text : string ) : string {
3047 let sanitized = text . replace ( / < d i v c l a s s = \\ " _ _ b o l t T h o u g h t _ _ \\ " > .* ?< \/ d i v > / s, '' ) ;
3148 sanitized = sanitized . replace ( / < t h i n k > .* ?< \/ t h i n k > / s, '' ) ;
@@ -123,10 +140,10 @@ export async function streamText(props: {
123140 }
124141 }
125142
126- const dynamicMaxTokens = modelDetails && modelDetails . maxTokenAllowed ? modelDetails . maxTokenAllowed : MAX_TOKENS ;
143+ const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit ( modelDetails ) : Math . min ( MAX_TOKENS , 16384 ) ;
127144
128- // Ensure we never exceed reasonable token limits to prevent API errors
129- const safeMaxTokens = Math . min ( dynamicMaxTokens , 100000 ) ; // Cap at 100k for safety
145+ // Additional safety cap - should not be needed with proper completion limits, but kept for safety
146+ const safeMaxTokens = Math . min ( dynamicMaxTokens , 128000 ) ;
130147
131148 logger . info (
132149 `Max tokens for model ${ modelDetails . name } is ${ safeMaxTokens } (capped from ${ dynamicMaxTokens } ) based on model limits` ,
@@ -204,18 +221,84 @@ export async function streamText(props: {
204221
205222 logger . info ( `Sending llm call to ${ provider . name } with model ${ modelDetails . name } ` ) ;
206223
224+ // DEBUG: Log reasoning model detection
225+ const isReasoning = isReasoningModel ( modelDetails . name ) ;
226+ logger . info ( `DEBUG STREAM: Model "${ modelDetails . name } " detected as reasoning model: ${ isReasoning } ` ) ;
227+
207228 // console.log(systemPrompt, processedMessages);
208229
209- return await _streamText ( {
230+ // Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
231+ const tokenParams = isReasoning ? { maxCompletionTokens : safeMaxTokens } : { maxTokens : safeMaxTokens } ;
232+
233+ // Filter out unsupported parameters for reasoning models
234+ const filteredOptions =
235+ isReasoning && options
236+ ? Object . fromEntries (
237+ Object . entries ( options ) . filter (
238+ ( [ key ] ) =>
239+ ! [
240+ 'temperature' ,
241+ 'topP' ,
242+ 'presencePenalty' ,
243+ 'frequencyPenalty' ,
244+ 'logprobs' ,
245+ 'topLogprobs' ,
246+ 'logitBias' ,
247+ ] . includes ( key ) ,
248+ ) ,
249+ )
250+ : options || { } ;
251+
252+ // DEBUG: Log filtered options
253+ logger . info (
254+ `DEBUG STREAM: Options filtering for model "${ modelDetails . name } ":` ,
255+ JSON . stringify (
256+ {
257+ isReasoning,
258+ originalOptions : options || { } ,
259+ filteredOptions,
260+ originalOptionsKeys : options ? Object . keys ( options ) : [ ] ,
261+ filteredOptionsKeys : Object . keys ( filteredOptions ) ,
262+ removedParams : options ? Object . keys ( options ) . filter ( ( key ) => ! ( key in filteredOptions ) ) : [ ] ,
263+ } ,
264+ null ,
265+ 2 ,
266+ ) ,
267+ ) ;
268+
269+ const streamParams = {
210270 model : provider . getModelInstance ( {
211271 model : modelDetails . name ,
212272 serverEnv,
213273 apiKeys,
214274 providerSettings,
215275 } ) ,
216276 system : chatMode === 'build' ? systemPrompt : discussPrompt ( ) ,
217- maxTokens : safeMaxTokens ,
277+ ... tokenParams ,
218278 messages : convertToCoreMessages ( processedMessages as any ) ,
219- ...options ,
220- } ) ;
279+ ...filteredOptions ,
280+
281+ // Set temperature to 1 for reasoning models (required by OpenAI API)
282+ ...( isReasoning ? { temperature : 1 } : { } ) ,
283+ } ;
284+
285+ // DEBUG: Log final streaming parameters
286+ logger . info (
287+ `DEBUG STREAM: Final streaming params for model "${ modelDetails . name } ":` ,
288+ JSON . stringify (
289+ {
290+ hasTemperature : 'temperature' in streamParams ,
291+ hasMaxTokens : 'maxTokens' in streamParams ,
292+ hasMaxCompletionTokens : 'maxCompletionTokens' in streamParams ,
293+ paramKeys : Object . keys ( streamParams ) . filter ( ( key ) => ! [ 'model' , 'messages' , 'system' ] . includes ( key ) ) ,
294+ streamParams : Object . fromEntries (
295+ Object . entries ( streamParams ) . filter ( ( [ key ] ) => ! [ 'model' , 'messages' , 'system' ] . includes ( key ) ) ,
296+ ) ,
297+ } ,
298+ null ,
299+ 2 ,
300+ ) ,
301+ ) ;
302+
303+ return await _streamText ( streamParams ) ;
221304}
0 commit comments