Skip to content

Commit 9402a4d

Browse files
authored
Lm studio and ollama usage fix (RooCodeInc#3707)
* integration * Fix
1 parent f274a15 commit 9402a4d

File tree

2 files changed

+112
-45
lines changed

2 files changed

+112
-45
lines changed

src/api/providers/lmstudio.ts

Lines changed: 94 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,57 +25,108 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
2525
}
2626

2727
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
28-
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
29-
{ role: "system", content: systemPrompt },
30-
...convertToOpenAiMessages(messages),
31-
]
28+
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
29+
{ role: "system", content: systemPrompt },
30+
...convertToOpenAiMessages(messages),
31+
]
3232

33-
try {
34-
// Create params object with optional draft model
35-
const params: any = {
36-
model: this.getModel().id,
37-
messages: openAiMessages,
38-
temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
39-
stream: true,
40-
}
41-
42-
// Add draft model if speculative decoding is enabled and a draft model is specified
43-
if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
44-
params.draft_model = this.options.lmStudioDraftModelId
45-
}
33+
// -------------------------
34+
// Track token usage
35+
// -------------------------
36+
const toContentBlocks = (
37+
blocks: Anthropic.Messages.MessageParam[] | string,
38+
): Anthropic.Messages.ContentBlockParam[] => {
39+
if (typeof blocks === "string") {
40+
return [{ type: "text", text: blocks }]
41+
}
4642

47-
const results = await this.client.chat.completions.create(params)
48-
49-
const matcher = new XmlMatcher(
50-
"think",
51-
(chunk) =>
52-
({
53-
type: chunk.matched ? "reasoning" : "text",
54-
text: chunk.data,
55-
}) as const,
56-
)
57-
58-
// Stream handling
59-
// @ts-ignore
60-
for await (const chunk of results) {
61-
const delta = chunk.choices[0]?.delta
62-
63-
if (delta?.content) {
64-
for (const chunk of matcher.update(delta.content)) {
65-
yield chunk
43+
const result: Anthropic.Messages.ContentBlockParam[] = []
44+
for (const msg of blocks) {
45+
if (typeof msg.content === "string") {
46+
result.push({ type: "text", text: msg.content })
47+
} else if (Array.isArray(msg.content)) {
48+
for (const part of msg.content) {
49+
if (part.type === "text") {
50+
result.push({ type: "text", text: part.text })
6651
}
6752
}
6853
}
69-
for (const chunk of matcher.final()) {
70-
yield chunk
54+
}
55+
return result
56+
}
57+
58+
let inputTokens = 0
59+
try {
60+
inputTokens = await this.countTokens([
61+
{ type: "text", text: systemPrompt },
62+
...toContentBlocks(messages),
63+
])
64+
} catch (err) {
65+
console.error("[LmStudio] Failed to count input tokens:", err)
66+
inputTokens = 0
67+
}
68+
69+
let assistantText = ""
70+
71+
try {
72+
const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
73+
model: this.getModel().id,
74+
messages: openAiMessages,
75+
temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
76+
stream: true,
77+
}
78+
79+
if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
80+
params.draft_model = this.options.lmStudioDraftModelId
81+
}
82+
83+
const results = await this.client.chat.completions.create(params)
84+
85+
const matcher = new XmlMatcher(
86+
"think",
87+
(chunk) =>
88+
({
89+
type: chunk.matched ? "reasoning" : "text",
90+
text: chunk.data,
91+
}) as const,
92+
)
93+
94+
for await (const chunk of results) {
95+
const delta = chunk.choices[0]?.delta
96+
97+
if (delta?.content) {
98+
assistantText += delta.content
99+
for (const processedChunk of matcher.update(delta.content)) {
100+
yield processedChunk
101+
}
71102
}
72-
} catch (error) {
73-
// LM Studio doesn't return an error code/body for now
74-
throw new Error(
75-
"Please check the LM Studio developer logs to debug what went wrong. You may need to load the model with a larger context length to work with Roo Code's prompts.",
76-
)
77103
}
104+
105+
for (const processedChunk of matcher.final()) {
106+
yield processedChunk
107+
}
108+
109+
110+
let outputTokens = 0
111+
try {
112+
outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
113+
} catch (err) {
114+
console.error("[LmStudio] Failed to count output tokens:", err)
115+
outputTokens = 0
116+
}
117+
118+
yield {
119+
type: "usage",
120+
inputTokens,
121+
outputTokens,
122+
} as const
123+
} catch (error) {
124+
throw new Error(
125+
"Please check the LM Studio developer logs to debug what went wrong. You may need to load the model with a larger context length to work with Roo Code's prompts.",
126+
)
78127
}
128+
}
129+
79130

80131
override getModel(): { id: string; info: ModelInfo } {
81132
return {

src/api/providers/ollama.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import { DEEP_SEEK_DEFAULT_TEMPERATURE } from "./constants"
1111
import { XmlMatcher } from "../../utils/xml-matcher"
1212
import { BaseProvider } from "./base-provider"
1313

14+
// Alias for the usage object returned in streaming chunks
15+
type CompletionUsage = OpenAI.Chat.Completions.ChatCompletionChunk["usage"]
16+
1417
export class OllamaHandler extends BaseProvider implements SingleCompletionHandler {
1518
protected options: ApiHandlerOptions
1619
private client: OpenAI
@@ -37,6 +40,7 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
3740
messages: openAiMessages,
3841
temperature: this.options.modelTemperature ?? 0,
3942
stream: true,
43+
stream_options: { include_usage: true },
4044
})
4145
const matcher = new XmlMatcher(
4246
"think",
@@ -46,18 +50,30 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
4650
text: chunk.data,
4751
}) as const,
4852
)
53+
let lastUsage: CompletionUsage | undefined
4954
for await (const chunk of stream) {
5055
const delta = chunk.choices[0]?.delta
5156

5257
if (delta?.content) {
53-
for (const chunk of matcher.update(delta.content)) {
54-
yield chunk
58+
for (const matcherChunk of matcher.update(delta.content)) {
59+
yield matcherChunk
5560
}
5661
}
62+
if (chunk.usage) {
63+
lastUsage = chunk.usage
64+
}
5765
}
5866
for (const chunk of matcher.final()) {
5967
yield chunk
6068
}
69+
70+
if (lastUsage) {
71+
yield {
72+
type: "usage",
73+
inputTokens: lastUsage?.prompt_tokens || 0,
74+
outputTokens: lastUsage?.completion_tokens || 0,
75+
}
76+
}
6177
}
6278

6379
override getModel(): { id: string; info: ModelInfo } {

0 commit comments

Comments
 (0)