@@ -15,12 +15,12 @@ import { createOpenAI } from '@ai-sdk/openai';
1515import { createMistral } from '@ai-sdk/mistral' ;
1616import { createOllama } from 'ollama-ai-provider' ;
1717import { createOpenRouter } from '@openrouter/ai-sdk-provider' ;
18- import { processMessages , toAIMessage } from './utils' ;
18+ import { markBedrockCacheBreakpoint , processMessages , toAIMessage } from './utils' ;
1919import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock' ;
2020import { fromNodeProviderChain } from '@aws-sdk/credential-providers' ;
2121import { AnthropicLanguageModel } from './anthropic' ;
2222import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js' ;
23- import { recordRequestTokenUsage , recordTokenUsage } from './extension.js' ;
23+ import { log , recordRequestTokenUsage , recordTokenUsage } from './extension.js' ;
2424
2525/**
2626 * Models used by chat participants and for vscode.lm.* API functionality.
@@ -195,7 +195,7 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
195195 public readonly name ;
196196 public readonly provider ;
197197 public readonly identifier ;
198- public readonly maxOutputTokens ;
198+ public readonly maxOutputTokens : number ;
199199 protected abstract model : ai . LanguageModelV1 ;
200200
201201 capabilities = {
@@ -211,7 +211,26 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
211211 this . identifier = _config . id ;
212212 this . name = _config . name ;
213213 this . provider = _config . provider ;
214+ const maxOutputTokens = vscode . workspace . getConfiguration ( 'positron.assistant' ) . get ( 'maxOutputTokens' , { } as Record < string , number > ) ;
214215 this . maxOutputTokens = _config . maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT ;
216+
217+ // Override maxOutputTokens if specified in the configuration
218+ for ( const [ key , value ] of Object . entries ( maxOutputTokens ) ) {
219+ if ( _config . model . indexOf ( key ) !== - 1 && value ) {
220+ let maxOutputTokens = value ;
221+ if ( typeof maxOutputTokens !== 'number' ) {
222+ log . warn ( `Invalid maxOutputTokens '${ maxOutputTokens } ' for ${ key } (${ _config . model } ); ignoring` ) ;
223+ continue ;
224+ }
225+ if ( maxOutputTokens < 512 ) {
226+ log . warn ( `Specified maxOutputTokens '${ maxOutputTokens } ' for ${ key } (${ _config . model } ) is too low; using 512 instead` ) ;
227+ maxOutputTokens = 512 ;
228+ }
229+ log . debug ( `Setting maxOutputTokens for ${ key } (${ _config . model } ) to ${ maxOutputTokens } ` ) ;
230+ this . maxOutputTokens = maxOutputTokens ;
231+ break ;
232+ }
233+ }
215234 }
216235
217236 get providerName ( ) : string {
@@ -261,9 +280,44 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
261280 const processedMessages = processMessages ( messages ) ;
262281 // Only Anthropic currently supports experimental_content in tool
263282 // results.
264- const toolResultExperimentalContent = this . provider === 'anthropic' ;
265- // Convert messages to the Vercel AI format.
266- const aiMessages = toAIMessage ( processedMessages , toolResultExperimentalContent ) ;
283+ const toolResultExperimentalContent = this . provider === 'anthropic' ||
284+ this . model . modelId . startsWith ( 'us.anthropic' ) ;
285+
286+ // Only select Bedrock models support cache breakpoints; specifically,
287+ // the Claude 3.5 Sonnet models don't support them.
288+ //
289+ // Consider: it'd be more verbose but we should consider including this information
290+ // in the hardcoded model metadata in the model config.
291+ const bedrockCacheBreakpoint = this . provider === 'bedrock' &&
292+ ! this . model . modelId . startsWith ( 'us.anthropic.claude-3-5' )
293+
294+ const aiMessages : ai . CoreMessage [ ] = [ ] ;
295+
296+ // The system message we will send to the model.
297+ let systemMessage : string | undefined = modelOptions . system ;
298+
299+ if ( bedrockCacheBreakpoint && systemMessage ) {
300+ // Add the system prompt as the first message if we have a system
301+ // prompt and cache breakpoints are enabled.
302+ //
303+ // This must be done in order to set a cache breakpoint for the
304+ // system message. In general we prefer to send the system message
305+ // using the 'system' option in streamText; see the
306+ // CoreSystemMessage documentation for a detailed explanation.
307+ const aiSystemMessage : ai . CoreSystemMessage = {
308+ role : 'system' ,
309+ content : systemMessage ,
310+ } ;
311+ markBedrockCacheBreakpoint ( aiSystemMessage ) ;
312+ aiMessages . push ( aiSystemMessage ) ;
313+
314+ // Consume the system message so it doesn't get sent a second time
315+ systemMessage = undefined ;
316+ }
317+
318+ // Convert all other messages to the Vercel AI format.
319+ aiMessages . push ( ...toAIMessage ( processedMessages , toolResultExperimentalContent ,
320+ bedrockCacheBreakpoint ) ) ;
267321
268322 if ( options . tools && options . tools . length > 0 ) {
269323 tools = options . tools . reduce ( ( acc : Record < string , ai . Tool > , tool : vscode . LanguageModelChatTool ) => {
@@ -275,43 +329,72 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
275329 } , { } ) ;
276330 }
277331
332+ const modelTools = this . _config . toolCalls ? tools : undefined ;
333+ const requestId = ( options . modelOptions as any ) ?. requestId ;
334+
335+ log . info ( `[vercel] Start request ${ requestId } to ${ this . _config . name } : ${ aiMessages . length } messages` ) ;
336+ log . debug ( `[${ this . _config . name } ] SEND ${ aiMessages . length } messages, ${ modelTools ? Object . keys ( modelTools ) . length : 0 } tools` ) ;
337+ if ( modelTools ) {
338+ log . trace ( `tools: ${ modelTools ? Object . keys ( modelTools ) . join ( ', ' ) : '(none)' } ` ) ;
339+ }
340+ if ( systemMessage ) {
341+ log . trace ( `system: ${ systemMessage . length > 100 ? `${ systemMessage . substring ( 0 , 100 ) } ...` : systemMessage } (${ systemMessage . length } chars)` ) ;
342+ }
343+ log . trace ( `messages: ${ JSON . stringify ( aiMessages , null , 2 ) } ` ) ;
278344 const result = ai . streamText ( {
279345 model : this . model ,
280- system : modelOptions . system ?? undefined ,
346+ system : systemMessage ,
281347 messages : aiMessages ,
282348 maxSteps : modelOptions . maxSteps ?? 50 ,
283- tools : this . _config . toolCalls ? tools : undefined ,
349+ tools : modelTools ,
284350 abortSignal : signal ,
285351 maxTokens : modelOptions . maxTokens ?? this . maxOutputTokens ,
286352 } ) ;
287353
354+ let accumulatedTextDeltas : string [ ] = [ ] ;
355+
356+ const flushAccumulatedTextDeltas = ( ) => {
357+ if ( accumulatedTextDeltas . length > 0 ) {
358+ const combinedText = accumulatedTextDeltas . join ( '' ) ;
359+ log . trace ( `[${ this . _config . name } ] RECV text-delta (${ accumulatedTextDeltas . length } parts): ${ combinedText } ` ) ;
360+ accumulatedTextDeltas = [ ] ;
361+ }
362+ } ;
363+
288364 for await ( const part of result . fullStream ) {
289365 if ( token . isCancellationRequested ) {
290366 break ;
291367 }
292368
293369 if ( part . type === 'reasoning' ) {
370+ flushAccumulatedTextDeltas ( ) ;
371+ log . trace ( `[${ this . _config . name } ] RECV reasoning: ${ part . textDelta } ` ) ;
294372 progress . report ( {
295373 index : 0 ,
296374 part : new vscode . LanguageModelTextPart ( part . textDelta )
297375 } ) ;
298376 }
299377
300378 if ( part . type === 'text-delta' ) {
379+ accumulatedTextDeltas . push ( part . textDelta ) ;
301380 progress . report ( {
302381 index : 0 ,
303382 part : new vscode . LanguageModelTextPart ( part . textDelta )
304383 } ) ;
305384 }
306385
307386 if ( part . type === 'tool-call' ) {
387+ flushAccumulatedTextDeltas ( ) ;
388+ log . trace ( `[${ this . _config . name } ] RECV tool-call: ${ part . toolCallId } (${ part . toolName } ) with args: ${ JSON . stringify ( part . args ) } ` ) ;
308389 progress . report ( {
309390 index : 0 ,
310391 part : new vscode . LanguageModelToolCallPart ( part . toolCallId , part . toolName , part . args )
311392 } ) ;
312393 }
313394
314395 if ( part . type === 'error' ) {
396+ flushAccumulatedTextDeltas ( ) ;
397+ log . warn ( `[${ this . _config . name } ] RECV error: ${ JSON . stringify ( part . error ) } ` ) ;
315398 // TODO: Deal with various LLM providers' different error response formats
316399 if ( typeof part . error === 'string' ) {
317400 throw new Error ( part . error ) ;
@@ -324,19 +407,47 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
324407 }
325408 }
326409
327- if ( this . _context ) {
328- // ai-sdk provides token usage in the result but it's not clear how it is calculated
329- const usage = await result . usage ;
330- const outputCount = usage . completionTokens ;
331- const inputCount = usage . promptTokens ;
332- const requestId = ( options . modelOptions as any ) ?. requestId ;
410+ // Flush any remaining accumulated text deltas
411+ flushAccumulatedTextDeltas ( ) ;
412+
413+ // Log all the warnings from the response
414+ result . warnings . then ( ( warnings ) => {
415+ if ( warnings ) {
416+ for ( const warning of warnings ) {
417+ log . warn ( `[${ this . model } ] (${ this . identifier } ) warn: ${ warning } ` ) ;
418+ }
419+ }
420+ } ) ;
421+
422+ // ai-sdk provides token usage in the result but it's not clear how it is calculated
423+ const usage = await result . usage ;
424+ const outputCount = usage . completionTokens ;
425+ const inputCount = usage . promptTokens ;
426+
427+ if ( requestId ) {
428+ recordRequestTokenUsage ( requestId , this . provider , inputCount , outputCount ) ;
429+ }
333430
431+ if ( this . _context ) {
334432 recordTokenUsage ( this . _context , this . provider , inputCount , outputCount ) ;
433+ }
335434
336- if ( requestId ) {
337- recordRequestTokenUsage ( requestId , this . provider , inputCount , outputCount ) ;
338- }
435+ const other = await result . providerMetadata ;
339436
437+ log . info ( `[vercel]: End request ${ requestId } ; usage: ${ inputCount } input tokens, ${ outputCount } output tokens` ) ;
438+
439+ // Log Bedrock usage if available
440+ if ( other && other . bedrock && other . bedrock . usage ) {
441+ // Get the Bedrock usage object; it typically contains
442+ // `cacheReadInputTokens` and `cacheWriteInputTokens`
443+ const usage = other . bedrock . usage as Record < string , any > ;
444+
445+ // Add the input and output tokens to the usage object
446+ usage . inputTokens = inputCount ;
447+ usage . outputTokens = outputCount ;
448+
449+ // Log the Bedrock usage
450+ log . debug ( `[${ this . _config . name } ]: Bedrock usage: ${ JSON . stringify ( other . bedrock . usage , null , 2 ) } ` ) ;
340451 }
341452 }
342453
@@ -563,7 +674,7 @@ class VertexLanguageModel extends AILanguageModel implements positron.ai.Languag
563674}
564675
565676export class AWSLanguageModel extends AILanguageModel implements positron . ai . LanguageModelChatProvider {
566- protected model ;
677+ protected model : ai . LanguageModelV1 ;
567678
568679 static source : positron . ai . LanguageModelSource = {
569680 type : positron . PositronLanguageModelType . Chat ,
@@ -583,13 +694,11 @@ export class AWSLanguageModel extends AILanguageModel implements positron.ai.Lan
583694 super ( _config , _context ) ;
584695
585696 this . model = createAmazonBedrock ( {
586- bedrockOptions : {
587- // AWS_ACCESS_KEY_ID, AWS_SESSION_TOKEN, and AWS_SECRET_ACCESS_KEY must be set
588- // sets the AWS region where the models are available
589- region : process . env . AWS_REGION ?? 'us-east-1' ,
590- credentials : fromNodeProviderChain ( ) ,
591- }
592- } ) ( this . _config . model ) ;
697+ // AWS_ACCESS_KEY_ID, AWS_SESSION_TOKEN, and AWS_SECRET_ACCESS_KEY must be set
698+ // sets the AWS region where the models are available
699+ region : process . env . AWS_REGION ?? 'us-east-1' ,
700+ credentialProvider : fromNodeProviderChain ( ) ,
701+ } ) ( this . _config . model ) as ai . LanguageModelV1 ;
593702 }
594703
595704 get providerName ( ) : string {
@@ -712,17 +821,17 @@ export const availableModels = new Map<string, { name: string; identifier: strin
712821 {
713822 name : 'Claude 4 Sonnet Bedrock' ,
714823 identifier : 'us.anthropic.claude-sonnet-4-20250514-v1:0' ,
715- maxOutputTokens : 64_000 , // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
824+ maxOutputTokens : 8_192 , // use more conservative value for Bedrock (up to 64K tokens available)
716825 } ,
717826 {
718827 name : 'Claude 4 Opus Bedrock' ,
719828 identifier : 'us.anthropic.claude-opus-4-20250514-v1:0' ,
720- maxOutputTokens : 32_000 , // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
829+ maxOutputTokens : 8_192 , // use more conservative value for Bedrock (up to 32K tokens available)
721830 } ,
722831 {
723832 name : 'Claude 3.7 Sonnet v1 Bedrock' ,
724833 identifier : 'us.anthropic.claude-3-7-sonnet-20250219-v1:0' ,
725- maxOutputTokens : 64_000 , // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
834+ maxOutputTokens : 8_192 , // use more conservative value for Bedrock (up to 64K tokens available)
726835 } ,
727836 {
728837 name : 'Claude 3.5 Sonnet v2 Bedrock' ,
0 commit comments