Skip to content

Commit 7c05cce

Browse files
committed
integration
1 parent 95da34d commit 7c05cce

File tree

2 files changed

+75
-2
lines changed

2 files changed

+75
-2
lines changed

src/api/providers/lmstudio.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
2929
...convertToOpenAiMessages(messages),
3030
]
3131

32+
// -------------------------
33+
// Track token usage
34+
// -------------------------
35+
// Build content blocks so we can estimate prompt token count using the shared utility.
36+
const toContentBlocks = (
37+
blocks: Anthropic.Messages.MessageParam[] | string,
38+
): Anthropic.Messages.ContentBlockParam[] => {
39+
if (typeof blocks === "string") {
40+
return [{ type: "text", text: blocks }]
41+
}
42+
43+
const result: Anthropic.Messages.ContentBlockParam[] = []
44+
for (const msg of blocks) {
45+
if (typeof msg.content === "string") {
46+
result.push({ type: "text", text: msg.content })
47+
} else if (Array.isArray(msg.content)) {
48+
// Filter out text blocks only for counting purposes
49+
for (const part of msg.content) {
50+
if (part.type === "text") {
51+
result.push({ type: "text", text: part.text })
52+
}
53+
}
54+
}
55+
}
56+
return result
57+
}
58+
59+
// Count prompt/input tokens (system prompt + user/assistant history)
60+
let inputTokens = 0
61+
try {
62+
inputTokens = await this.countTokens([
63+
{ type: "text", text: systemPrompt },
64+
...toContentBlocks(messages),
65+
])
66+
} catch (err) {
67+
console.error("[LmStudio] Failed to count input tokens:", err)
68+
inputTokens = 0
69+
}
70+
71+
let assistantText = ""
72+
3273
try {
3374
// Create params object with optional draft model
3475
const params: any = {
@@ -50,12 +91,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
5091
for await (const chunk of results) {
5192
const delta = chunk.choices[0]?.delta
5293
if (delta?.content) {
94+
assistantText += delta.content
5395
yield {
5496
type: "text",
5597
text: delta.content,
5698
}
5799
}
58100
}
101+
102+
// After streaming completes, estimate output tokens and yield usage metrics
103+
let outputTokens = 0
104+
try {
105+
outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
106+
} catch (err) {
107+
console.error("[LmStudio] Failed to count output tokens:", err)
108+
outputTokens = 0
109+
}
110+
111+
yield {
112+
type: "usage",
113+
inputTokens,
114+
outputTokens,
115+
} as const
59116
} catch (error) {
60117
// LM Studio doesn't return an error code/body for now
61118
throw new Error(

src/api/providers/ollama.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
1111
import { XmlMatcher } from "../../utils/xml-matcher"
1212
import { BaseProvider } from "./base-provider"
1313

14+
// Alias for the usage object returned in streaming chunks
15+
type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"]
16+
1417
export class OllamaHandler extends BaseProvider implements SingleCompletionHandler {
1518
protected options: ApiHandlerOptions
1619
private client: OpenAI
@@ -37,6 +40,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
3740
messages: openAiMessages,
3841
temperature: this.options.modelTemperature ?? 0,
3942
stream: true,
43+
stream_options: { include_usage: true },
4044
})
4145
const matcher = new XmlMatcher(
4246
"think",
@@ -46,18 +50,30 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
4650
text: chunk.data,
4751
}) as const,
4852
)
53+
let lastUsage: CompletionUsage | undefined
4954
for await (const chunk of stream) {
5055
const delta = chunk.choices[0]?.delta
5156

5257
if (delta?.content) {
53-
for (const chunk of matcher.update(delta.content)) {
54-
yield chunk
58+
for (const matcherChunk of matcher.update(delta.content)) {
59+
yield matcherChunk
5560
}
5661
}
62+
if (chunk.usage) {
63+
lastUsage = chunk.usage
64+
}
5765
}
5866
for (const chunk of matcher.final()) {
5967
yield chunk
6068
}
69+
70+
if (lastUsage) {
71+
yield {
72+
type: "usage",
73+
inputTokens: lastUsage?.prompt_tokens || 0,
74+
outputTokens: lastUsage?.completion_tokens || 0,
75+
}
76+
}
6177
}
6278

6379
override getModel(): { id: string; info: ModelInfo } {

0 commit comments

Comments
 (0)