posit-dev
diff --git a/‎extensions/positron-assistant/package-lock.json‎
Lines changed: 61 additions & 485 deletions b/‎extensions/positron-assistant/package-lock.json‎
Lines changed: 61 additions & 485 deletions
diff --git a/‎extensions/positron-assistant/package.json‎
Lines changed: 15 additions & 1 deletion b/‎extensions/positron-assistant/package.json‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎extensions/positron-assistant/package.nls.json‎
Lines changed: 3 additions & 1 deletion b/‎extensions/positron-assistant/package.nls.json‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎extensions/positron-assistant/src/anthropic.ts‎
Lines changed: 5 additions & 3 deletions b/‎extensions/positron-assistant/src/anthropic.ts‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎extensions/positron-assistant/src/completion.ts‎
Lines changed: 4 additions & 5 deletions b/‎extensions/positron-assistant/src/completion.ts‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎extensions/positron-assistant/src/models.ts‎
Lines changed: 137 additions & 28 deletions b/‎extensions/positron-assistant/src/models.ts‎
Lines changed: 137 additions & 28 deletions
@@ -202,6 +202,20 @@
             "type": "boolean",
             "default": false,
             "description": "%configuration.showTokenUsage.description%"
+          },
+          "positron.assistant.maxOutputTokens": {
+            "type": "object",
+            "default": {},
+            "description": "%configuration.maxOutputTokens.description%",
+            "additionalProperties": {
+              "type": "number",
+              "minimum": 512
+            }
+          },
+          "positron.assistant.followups.enable": {
+            "type": "boolean",
+            "default": true,
+            "description": "%configuration.followups.enable.description%"
           }
         }
       }
@@ -504,7 +518,7 @@
     "postinstall": "ts-node scripts/post-install.ts"
   },
   "devDependencies": {
-    "@ai-sdk/amazon-bedrock": "^1.1.6",
+    "@ai-sdk/amazon-bedrock": "2.2.12",
     "@ai-sdk/anthropic": "^1.0.5",
     "@ai-sdk/azure": "^1.1.9",
     "@ai-sdk/google": "^1.1.17",
 
@@ -23,5 +23,7 @@
 	"configuration.inlineCompletionExcludes.description": "A list of [glob patterns](https://aka.ms/vscode-glob-patterns) to exclude from inline completions.",
 	"configuration.gitIntegration.description": "Enable Positron Assistant git integration.",
 	"configuration.getTableSummary.description": "Enable Positron Assistant get table summary tool.",
-	"configuration.showTokenUsage.description": "Show token usage in the chat view for supported providers for the session and each message and response including prompts. Check with your provider for detailed usage."
+	"configuration.showTokenUsage.description": "Show token usage in the chat view for supported providers for the session and each message and response including prompts. Check with your provider for detailed usage.",
+	"configuration.maxOutputTokens.description": "Override the maximum output tokens for specific language models. The key selects a model ID (partial match supported); the value is the maximum output tokens for that model.",
+	"configuration.followups.enable.description": "Enable suggested followup prompts after chat responses."
 }
@@ -100,6 +100,7 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 
 		// Log request information - the request ID is only available upon connection.
 		stream.on('connect', () => {
+			log.info(`[anthropic] Start request ${stream.request_id} to ${this._config.model}: ${anthropicMessages.length} messages`);
 			if (log.logLevel <= vscode.LogLevel.Trace) {
 				log.trace(`[anthropic] SEND messages.stream [${stream.request_id}]: ${JSON.stringify(body, null, 2)}`);
 			} else {
@@ -136,6 +137,7 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 			await stream.done();
 		} catch (error) {
 			if (error instanceof Anthropic.APIError) {
+				log.warn(`[anthropic] Error in messages.stream [${stream.request_id}]: ${error.message}`);
 				let data: any;
 				try {
 					data = JSON.parse(error.message);
@@ -146,6 +148,7 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 					throw new Error(`Anthropic's API is temporarily overloaded.`);
 				}
 			} else if (error instanceof Anthropic.AnthropicError) {
+				log.warn(`[anthropic] Error in messages.stream [${stream.request_id}]: ${error.message}`);
 				// This can happen if the API key was not persisted correctly.
 				if (error.message.startsWith('Could not resolve authentication method')) {
 					throw new Error('Something went wrong when storing the Anthropic API key. ' +
@@ -161,9 +164,8 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv
 			log.trace(`[anthropic] RECV messages.stream [${stream.request_id}]: ${JSON.stringify(message, null, 2)}`);
 		} else {
 			log.debug(
-				`[anthropic] RECV messages.stream [${stream.request_id}]: ` +
-				`usage: ${JSON.stringify(message.usage)}`
-			);
+				`[anthropic] RECV messages.stream [${stream.request_id}]`);
+			log.info(`[anthropic] Finished request ${stream.request_id}; usage: ${JSON.stringify(message.usage)}`);
 		}
 
 		// Record token usage
 
@@ -511,7 +511,7 @@ class OpenRouterCompletion extends FimPromptCompletion {
 }
 
 class AWSCompletion extends FimPromptCompletion {
-	protected model;
+	protected model: ai.LanguageModelV1;
 
 	static source: positron.ai.LanguageModelSource = {
 		type: positron.PositronLanguageModelType.Completion,
@@ -529,11 +529,10 @@ class AWSCompletion extends FimPromptCompletion {
 	constructor(_config: ModelConfig) {
 		super(_config);
 
+		// Cast to ai.LanguageModelV1 to satisfy base class type
 		this.model = createAmazonBedrock({
-			bedrockOptions: {
-				credentials: fromNodeProviderChain(),
-			}
-		})(this._config.model);
+			credentialProvider: fromNodeProviderChain(),
+		})(this._config.model) as unknown as ai.LanguageModelV1;
 	}
 }
 
 
@@ -15,12 +15,12 @@ import { createOpenAI } from '@ai-sdk/openai';
 import { createMistral } from '@ai-sdk/mistral';
 import { createOllama } from 'ollama-ai-provider';
 import { createOpenRouter } from '@openrouter/ai-sdk-provider';
-import { processMessages, toAIMessage } from './utils';
+import { markBedrockCacheBreakpoint, processMessages, toAIMessage } from './utils';
 import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
 import { fromNodeProviderChain } from '@aws-sdk/credential-providers';
 import { AnthropicLanguageModel } from './anthropic';
 import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js';
-import { recordRequestTokenUsage, recordTokenUsage } from './extension.js';
+import { log, recordRequestTokenUsage, recordTokenUsage } from './extension.js';
 
 /**
  * Models used by chat participants and for vscode.lm.* API functionality.
@@ -195,7 +195,7 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
 	public readonly name;
 	public readonly provider;
 	public readonly identifier;
-	public readonly maxOutputTokens;
+	public readonly maxOutputTokens: number;
 	protected abstract model: ai.LanguageModelV1;
 
 	capabilities = {
@@ -211,7 +211,26 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
 		this.identifier = _config.id;
 		this.name = _config.name;
 		this.provider = _config.provider;
+		const maxOutputTokens = vscode.workspace.getConfiguration('positron.assistant').get('maxOutputTokens', {} as Record<string, number>);
 		this.maxOutputTokens = _config.maxOutputTokens ?? DEFAULT_MAX_TOKEN_OUTPUT;
+
+		// Override maxOutputTokens if specified in the configuration
+		for (const [key, value] of Object.entries(maxOutputTokens)) {
+			if (_config.model.indexOf(key) !== -1 && value) {
+				let maxOutputTokens = value;
+				if (typeof maxOutputTokens !== 'number') {
+					log.warn(`Invalid maxOutputTokens '${maxOutputTokens}' for ${key} (${_config.model}); ignoring`);
+					continue;
+				}
+				if (maxOutputTokens < 512) {
+					log.warn(`Specified maxOutputTokens '${maxOutputTokens}' for ${key} (${_config.model}) is too low; using 512 instead`);
+					maxOutputTokens = 512;
+				}
+				log.debug(`Setting maxOutputTokens for ${key} (${_config.model}) to ${maxOutputTokens}`);
+				this.maxOutputTokens = maxOutputTokens;
+				break;
+			}
+		}
 	}
 
 	get providerName(): string {
@@ -261,9 +280,44 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
 		const processedMessages = processMessages(messages);
 		// Only Anthropic currently supports experimental_content in tool
 		// results.
-		const toolResultExperimentalContent = this.provider === 'anthropic';
-		// Convert messages to the Vercel AI format.
-		const aiMessages = toAIMessage(processedMessages, toolResultExperimentalContent);
+		const toolResultExperimentalContent = this.provider === 'anthropic' ||
+			this.model.modelId.startsWith('us.anthropic');
+
+		// Only select Bedrock models support cache breakpoints; specifically,
+		// the Claude 3.5 Sonnet models don't support them.
+		//
+		// Consider: it'd be more verbose but we should consider including this information
+		// in the hardcoded model metadata in the model config.
+		const bedrockCacheBreakpoint = this.provider === 'bedrock' &&
+			!this.model.modelId.startsWith('us.anthropic.claude-3-5')
+
+		const aiMessages: ai.CoreMessage[] = [];
+
+		// The system message we will send to the model.
+		let systemMessage: string | undefined = modelOptions.system;
+
+		if (bedrockCacheBreakpoint && systemMessage) {
+			// Add the system prompt as the first message if we have a system
+			// prompt and cache breakpoints are enabled.
+			//
+			// This must be done in order to set a cache breakpoint for the
+			// system message. In general we prefer to send the system message
+			// using the 'system' option in streamText; see the
+			// CoreSystemMessage documentation for a detailed explanation.
+			const aiSystemMessage: ai.CoreSystemMessage = {
+				role: 'system',
+				content: systemMessage,
+			};
+			markBedrockCacheBreakpoint(aiSystemMessage);
+			aiMessages.push(aiSystemMessage);
+
+			// Consume the system message so it doesn't get sent a second time
+			systemMessage = undefined;
+		}
+
+		// Convert all other messages to the Vercel AI format.
+		aiMessages.push(...toAIMessage(processedMessages, toolResultExperimentalContent,
+			bedrockCacheBreakpoint));
 
 		if (options.tools && options.tools.length > 0) {
 			tools = options.tools.reduce((acc: Record<string, ai.Tool>, tool: vscode.LanguageModelChatTool) => {
@@ -275,43 +329,72 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
 			}, {});
 		}
 
+		const modelTools = this._config.toolCalls ? tools : undefined;
+		const requestId = (options.modelOptions as any)?.requestId;
+
+		log.info(`[vercel] Start request ${requestId} to ${this._config.name}: ${aiMessages.length} messages`);
+		log.debug(`[${this._config.name}] SEND ${aiMessages.length} messages, ${modelTools ? Object.keys(modelTools).length : 0} tools`);
+		if (modelTools) {
+			log.trace(`tools: ${modelTools ? Object.keys(modelTools).join(', ') : '(none)'}`);
+		}
+		if (systemMessage) {
+			log.trace(`system: ${systemMessage.length > 100 ? `${systemMessage.substring(0, 100)}...` : systemMessage} (${systemMessage.length} chars)`);
+		}
+		log.trace(`messages: ${JSON.stringify(aiMessages, null, 2)}`);
 		const result = ai.streamText({
 			model: this.model,
-			system: modelOptions.system ?? undefined,
+			system: systemMessage,
 			messages: aiMessages,
 			maxSteps: modelOptions.maxSteps ?? 50,
-			tools: this._config.toolCalls ? tools : undefined,
+			tools: modelTools,
 			abortSignal: signal,
 			maxTokens: modelOptions.maxTokens ?? this.maxOutputTokens,
 		});
 
+		let accumulatedTextDeltas: string[] = [];
+
+		const flushAccumulatedTextDeltas = () => {
+			if (accumulatedTextDeltas.length > 0) {
+				const combinedText = accumulatedTextDeltas.join('');
+				log.trace(`[${this._config.name}] RECV text-delta (${accumulatedTextDeltas.length} parts): ${combinedText}`);
+				accumulatedTextDeltas = [];
+			}
+		};
+
 		for await (const part of result.fullStream) {
 			if (token.isCancellationRequested) {
 				break;
 			}
 
 			if (part.type === 'reasoning') {
+				flushAccumulatedTextDeltas();
+				log.trace(`[${this._config.name}] RECV reasoning: ${part.textDelta}`);
 				progress.report({
 					index: 0,
 					part: new vscode.LanguageModelTextPart(part.textDelta)
 				});
 			}
 
 			if (part.type === 'text-delta') {
+				accumulatedTextDeltas.push(part.textDelta);
 				progress.report({
 					index: 0,
 					part: new vscode.LanguageModelTextPart(part.textDelta)
 				});
 			}
 
 			if (part.type === 'tool-call') {
+				flushAccumulatedTextDeltas();
+				log.trace(`[${this._config.name}] RECV tool-call: ${part.toolCallId} (${part.toolName}) with args: ${JSON.stringify(part.args)}`);
 				progress.report({
 					index: 0,
 					part: new vscode.LanguageModelToolCallPart(part.toolCallId, part.toolName, part.args)
 				});
 			}
 
 			if (part.type === 'error') {
+				flushAccumulatedTextDeltas();
+				log.warn(`[${this._config.name}] RECV error: ${JSON.stringify(part.error)}`);
 				// TODO: Deal with various LLM providers' different error response formats
 				if (typeof part.error === 'string') {
 					throw new Error(part.error);
@@ -324,19 +407,47 @@ abstract class AILanguageModel implements positron.ai.LanguageModelChatProvider
 			}
 		}
 
-		if (this._context) {
-			// ai-sdk provides token usage in the result but it's not clear how it is calculated
-			const usage = await result.usage;
-			const outputCount = usage.completionTokens;
-			const inputCount = usage.promptTokens;
-			const requestId = (options.modelOptions as any)?.requestId;
+		// Flush any remaining accumulated text deltas
+		flushAccumulatedTextDeltas();
+
+		// Log all the warnings from the response
+		result.warnings.then((warnings) => {
+			if (warnings) {
+				for (const warning of warnings) {
+					log.warn(`[${this.model}] (${this.identifier}) warn: ${warning}`);
+				}
+			}
+		});
+
+		// ai-sdk provides token usage in the result but it's not clear how it is calculated
+		const usage = await result.usage;
+		const outputCount = usage.completionTokens;
+		const inputCount = usage.promptTokens;
+
+		if (requestId) {
+			recordRequestTokenUsage(requestId, this.provider, inputCount, outputCount);
+		}
 
+		if (this._context) {
 			recordTokenUsage(this._context, this.provider, inputCount, outputCount);
+		}
 
-			if (requestId) {
-				recordRequestTokenUsage(requestId, this.provider, inputCount, outputCount);
-			}
+		const other = await result.providerMetadata;
 
+		log.info(`[vercel]: End request ${requestId}; usage: ${inputCount} input tokens, ${outputCount} output tokens`);
+
+		// Log Bedrock usage if available
+		if (other && other.bedrock && other.bedrock.usage) {
+			// Get the Bedrock usage object; it typically contains
+			// `cacheReadInputTokens` and `cacheWriteInputTokens`
+			const usage = other.bedrock.usage as Record<string, any>;
+
+			// Add the input and output tokens to the usage object
+			usage.inputTokens = inputCount;
+			usage.outputTokens = outputCount;
+
+			// Log the Bedrock usage
+			log.debug(`[${this._config.name}]: Bedrock usage: ${JSON.stringify(other.bedrock.usage, null, 2)}`);
 		}
 	}
 
@@ -563,7 +674,7 @@ class VertexLanguageModel extends AILanguageModel implements positron.ai.Languag
 }
 
 export class AWSLanguageModel extends AILanguageModel implements positron.ai.LanguageModelChatProvider {
-	protected model;
+	protected model: ai.LanguageModelV1;
 
 	static source: positron.ai.LanguageModelSource = {
 		type: positron.PositronLanguageModelType.Chat,
@@ -583,13 +694,11 @@ export class AWSLanguageModel extends AILanguageModel implements positron.ai.Lan
 		super(_config, _context);
 
 		this.model = createAmazonBedrock({
-			bedrockOptions: {
-				// AWS_ACCESS_KEY_ID, AWS_SESSION_TOKEN, and AWS_SECRET_ACCESS_KEY must be set
-				// sets the AWS region where the models are available
-				region: process.env.AWS_REGION ?? 'us-east-1',
-				credentials: fromNodeProviderChain(),
-			}
-		})(this._config.model);
+			// AWS_ACCESS_KEY_ID, AWS_SESSION_TOKEN, and AWS_SECRET_ACCESS_KEY must be set
+			// sets the AWS region where the models are available
+			region: process.env.AWS_REGION ?? 'us-east-1',
+			credentialProvider: fromNodeProviderChain(),
+		})(this._config.model) as ai.LanguageModelV1;
 	}
 
 	get providerName(): string {
@@ -712,17 +821,17 @@ export const availableModels = new Map<string, { name: string; identifier: strin
 			{
 				name: 'Claude 4 Sonnet Bedrock',
 				identifier: 'us.anthropic.claude-sonnet-4-20250514-v1:0',
-				maxOutputTokens: 64_000, // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
+				maxOutputTokens: 8_192, // use more conservative value for Bedrock (up to 64K tokens available)
 			},
 			{
 				name: 'Claude 4 Opus Bedrock',
 				identifier: 'us.anthropic.claude-opus-4-20250514-v1:0',
-				maxOutputTokens: 32_000, // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
+				maxOutputTokens: 8_192, // use more conservative value for Bedrock (up to 32K tokens available)
 			},
 			{
 				name: 'Claude 3.7 Sonnet v1 Bedrock',
 				identifier: 'us.anthropic.claude-3-7-sonnet-20250219-v1:0',
-				maxOutputTokens: 64_000, // reference: https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-comparison-table
+				maxOutputTokens: 8_192, // use more conservative value for Bedrock (up to 64K tokens available)
 			},
 			{
 				name: 'Claude 3.5 Sonnet v2 Bedrock',
Original file line number	Diff line number	Diff line change
`@@ -23,5 +23,7 @@`
`23`	`23`	`"configuration.inlineCompletionExcludes.description": "A list of [glob patterns](https://aka.ms/vscode-glob-patterns) to exclude from inline completions.",`
`24`	`24`	`"configuration.gitIntegration.description": "Enable Positron Assistant git integration.",`
`25`	`25`	`"configuration.getTableSummary.description": "Enable Positron Assistant get table summary tool.",`
`26`		`- "configuration.showTokenUsage.description": "Show token usage in the chat view for supported providers for the session and each message and response including prompts. Check with your provider for detailed usage."`
	`26`	`+ "configuration.showTokenUsage.description": "Show token usage in the chat view for supported providers for the session and each message and response including prompts. Check with your provider for detailed usage.",`
	`27`	`+ "configuration.maxOutputTokens.description": "Override the maximum output tokens for specific language models. The key selects a model ID (partial match supported); the value is the maximum output tokens for that model.",`
	`28`	`+ "configuration.followups.enable.description": "Enable suggested followup prompts after chat responses."`
`27`	`29`	`}`
Original file line number	Diff line number	Diff line change
`@@ -511,7 +511,7 @@ class OpenRouterCompletion extends FimPromptCompletion {`
`511`	`511`	`}`
`512`	`512`
`513`	`513`	`class AWSCompletion extends FimPromptCompletion {`
`514`		`- protected model;`
	`514`	`+ protected model: ai.LanguageModelV1;`
`515`	`515`
`516`	`516`	`static source: positron.ai.LanguageModelSource = {`
`517`	`517`	`type: positron.PositronLanguageModelType.Completion,`
`@@ -529,11 +529,10 @@ class AWSCompletion extends FimPromptCompletion {`
`529`	`529`	`constructor(_config: ModelConfig) {`
`530`	`530`	`super(_config);`
`531`	`531`
	`532`	`+ // Cast to ai.LanguageModelV1 to satisfy base class type`
`532`	`533`	`this.model = createAmazonBedrock({`
`533`		`- bedrockOptions: {`
`534`		`- credentials: fromNodeProviderChain(),`
`535`		`- }`
`536`		`- })(this._config.model);`
	`534`	`+ credentialProvider: fromNodeProviderChain(),`
	`535`	`+ })(this._config.model) as unknown as ai.LanguageModelV1;`
`537`	`536`	`}`
`538`	`537`	`}`
`539`	`538`