integration

xyOz-dev · xyOz-dev · commit 7c05cce8b947 · 2025-05-17T21:16:36.000+01:00
diff --git a/src/api/providers/lmstudio.ts b/src/api/providers/lmstudio.ts
@@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			...convertToOpenAiMessages(messages),
 		]
 
+		// -------------------------
+		// Track token usage
+		// -------------------------
+		// Build content blocks so we can estimate prompt token count using the shared utility.
+		const toContentBlocks = (
+			blocks: Anthropic.Messages.MessageParam[] | string,
+		): Anthropic.Messages.ContentBlockParam[] => {
+			if (typeof blocks === "string") {
+				return [{ type: "text", text: blocks }]
+			}
+
+			const result: Anthropic.Messages.ContentBlockParam[] = []
+			for (const msg of blocks) {
+				if (typeof msg.content === "string") {
+					result.push({ type: "text", text: msg.content })
+				} else if (Array.isArray(msg.content)) {
+					// Filter out text blocks only for counting purposes
+					for (const part of msg.content) {
+						if (part.type === "text") {
+							result.push({ type: "text", text: part.text })
+						}
+					}
+				}
+			}
+			return result
+		}
+
+		// Count prompt/input tokens (system prompt + user/assistant history)
+		let inputTokens = 0
+		try {
+			inputTokens = await this.countTokens([
+				{ type: "text", text: systemPrompt },
+				...toContentBlocks(messages),
+			])
+		} catch (err) {
+			console.error("[LmStudio] Failed to count input tokens:", err)
+			inputTokens = 0
+		}
+
+		let assistantText = ""
+
 		try {
 			// Create params object with optional draft model
 			const params: any = {
@@ -50,12 +91,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			for await (const chunk of results) {
 				const delta = chunk.choices[0]?.delta
 				if (delta?.content) {
+					assistantText += delta.content
 					yield {
 						type: "text",
 						text: delta.content,
 					}
 				}
 			}
+
+			// After streaming completes, estimate output tokens and yield usage metrics
+			let outputTokens = 0
+			try {
+				outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
+			} catch (err) {
+				console.error("[LmStudio] Failed to count output tokens:", err)
+				outputTokens = 0
+			}
+
+			yield {
+				type: "usage",
+				inputTokens,
+				outputTokens,
+			} as const
 		} catch (error) {
 			// LM Studio doesn't return an error code/body for now
 			throw new Error(
diff --git a/src/api/providers/ollama.ts b/src/api/providers/ollama.ts
@@ -11,6 +11,9 @@ import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { BaseProvider } from "./base-provider"
 
+// Alias for the usage object returned in streaming chunks
+type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"]
+
 export class OllamaHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
@@ -37,6 +40,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
 			messages: openAiMessages,
 			temperature: this.options.modelTemperature ?? 0,
 			stream: true,
+			stream_options: { include_usage: true },
 		})
 		const matcher = new XmlMatcher(
 			"think",
@@ -46,18 +50,30 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
 					text: chunk.data,
 				}) as const,
 		)
+		let lastUsage: CompletionUsage | undefined
 		for await (const chunk of stream) {
 			const delta = chunk.choices[0]?.delta
 
 			if (delta?.content) {
-				for (const chunk of matcher.update(delta.content)) {
-					yield chunk
+				for (const matcherChunk of matcher.update(delta.content)) {
+					yield matcherChunk
 				}
 			}
+			if (chunk.usage) {
+				lastUsage = chunk.usage
+			}
 		}
 		for (const chunk of matcher.final()) {
 			yield chunk
 		}
+
+		if (lastUsage) {
+			yield {
+				type: "usage",
+				inputTokens: lastUsage?.prompt_tokens || 0,
+				outputTokens: lastUsage?.completion_tokens || 0,
+			}
+		}
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {