@@ -156,13 +156,14 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
156156 }
157157
158158 const isGrokXAI = this . _isGrokXAI ( this . options . openAiBaseUrl )
159+ const isNvidiaApi = this . _isNvidiaApi ( this . options . openAiBaseUrl )
159160
160161 const requestOptions : OpenAI . Chat . Completions . ChatCompletionCreateParamsStreaming = {
161162 model : modelId ,
162163 temperature : this . options . modelTemperature ?? ( deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0 ) ,
163164 messages : convertedMessages ,
164165 stream : true as const ,
165- ...( isGrokXAI ? { } : { stream_options : { include_usage : true } } ) ,
166+ ...( isGrokXAI || isNvidiaApi ? { } : { stream_options : { include_usage : true } } ) ,
166167 ...( reasoning && reasoning ) ,
167168 }
168169
@@ -317,6 +318,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
317318 if ( this . options . openAiStreamingEnabled ?? true ) {
318319 const isGrokXAI = this . _isGrokXAI ( this . options . openAiBaseUrl )
319320
321+ const isNvidiaApi = this . _isNvidiaApi ( this . options . openAiBaseUrl )
322+
320323 const requestOptions : OpenAI . Chat . Completions . ChatCompletionCreateParamsStreaming = {
321324 model : modelId ,
322325 messages : [
@@ -327,7 +330,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
327330 ...convertToOpenAiMessages ( messages ) ,
328331 ] ,
329332 stream : true ,
330- ...( isGrokXAI ? { } : { stream_options : { include_usage : true } } ) ,
333+ ...( isGrokXAI || isNvidiaApi ? { } : { stream_options : { include_usage : true } } ) ,
331334 reasoning_effort : modelInfo . reasoningEffort as "low" | "medium" | "high" | undefined ,
332335 temperature : undefined ,
333336 }
@@ -423,6 +426,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
423426 return urlHost . endsWith ( ".services.ai.azure.com" )
424427 }
425428
429+ private _isNvidiaApi ( baseUrl ?: string ) : boolean {
430+ const urlHost = this . _getUrlHost ( baseUrl )
431+ // NVIDIA API endpoints typically use integrate.api.nvidia.com or build.nvidia.com
432+ return urlHost . includes ( "nvidia.com" )
433+ }
434+
426435 /**
427436 * Adds max_completion_tokens to the request body if needed based on provider configuration
428437 * Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation
0 commit comments