Skip to content

Commit 61a251c

Browse files
committed
Fix for LMStudio
Implements a fix for LMStudio not showing token usage.
1 parent ee2033c commit 61a251c

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

src/api/providers/lmstudio.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
2929
...convertToOpenAiMessages(messages),
3030
]
3131

32+
// -------------------------
33+
// Track token usage
34+
// -------------------------
35+
// Build content blocks so we can estimate prompt token count using the shared utility.
36+
const toContentBlocks = (
37+
blocks: Anthropic.Messages.MessageParam[] | string,
38+
): Anthropic.Messages.ContentBlockParam[] => {
39+
if (typeof blocks === "string") {
40+
return [{ type: "text", text: blocks }]
41+
}
42+
43+
const result: Anthropic.Messages.ContentBlockParam[] = []
44+
for (const msg of blocks) {
45+
if (typeof msg.content === "string") {
46+
result.push({ type: "text", text: msg.content })
47+
} else if (Array.isArray(msg.content)) {
48+
// Filter out text blocks only for counting purposes
49+
for (const part of msg.content) {
50+
if (part.type === "text") {
51+
result.push({ type: "text", text: part.text })
52+
}
53+
}
54+
}
55+
}
56+
return result
57+
}
58+
59+
// Count prompt/input tokens (system prompt + user/assistant history)
60+
let inputTokens = 0
61+
try {
62+
inputTokens = await this.countTokens([
63+
{ type: "text", text: systemPrompt },
64+
...toContentBlocks(messages),
65+
])
66+
} catch {
67+
// Fallback to 0 if counting fails
68+
inputTokens = 0
69+
}
70+
71+
let assistantText = ""
72+
3273
try {
3374
// Create params object with optional draft model
3475
const params: any = {
@@ -50,12 +91,27 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
5091
for await (const chunk of results) {
5192
const delta = chunk.choices[0]?.delta
5293
if (delta?.content) {
94+
assistantText += delta.content
5395
yield {
5496
type: "text",
5597
text: delta.content,
5698
}
5799
}
58100
}
101+
102+
// After streaming completes, estimate output tokens and yield usage metrics
103+
let outputTokens = 0
104+
try {
105+
outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
106+
} catch {
107+
outputTokens = 0
108+
}
109+
110+
yield {
111+
type: "usage",
112+
inputTokens,
113+
outputTokens,
114+
} as const
59115
} catch (error) {
60116
// LM Studio doesn't return an error code/body for now
61117
throw new Error(

0 commit comments

Comments
 (0)