@@ -25,57 +25,108 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
2525 }
2626
2727 override async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
28- const openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
29- { role : "system" , content : systemPrompt } ,
30- ...convertToOpenAiMessages ( messages ) ,
31- ]
28+ const openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
29+ { role : "system" , content : systemPrompt } ,
30+ ...convertToOpenAiMessages ( messages ) ,
31+ ]
3232
33- try {
34- // Create params object with optional draft model
35- const params : any = {
36- model : this . getModel ( ) . id ,
37- messages : openAiMessages ,
38- temperature : this . options . modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE ,
39- stream : true ,
40- }
41-
42- // Add draft model if speculative decoding is enabled and a draft model is specified
43- if ( this . options . lmStudioSpeculativeDecodingEnabled && this . options . lmStudioDraftModelId ) {
44- params . draft_model = this . options . lmStudioDraftModelId
45- }
33+ // -------------------------
34+ // Track token usage
35+ // -------------------------
36+ const toContentBlocks = (
37+ blocks : Anthropic . Messages . MessageParam [ ] | string ,
38+ ) : Anthropic . Messages . ContentBlockParam [ ] => {
39+ if ( typeof blocks === "string" ) {
40+ return [ { type : "text" , text : blocks } ]
41+ }
4642
47- const results = await this . client . chat . completions . create ( params )
48-
49- const matcher = new XmlMatcher (
50- "think" ,
51- ( chunk ) =>
52- ( {
53- type : chunk . matched ? "reasoning" : "text" ,
54- text : chunk . data ,
55- } ) as const ,
56- )
57-
58- // Stream handling
59- // @ts -ignore
60- for await ( const chunk of results ) {
61- const delta = chunk . choices [ 0 ] ?. delta
62-
63- if ( delta ?. content ) {
64- for ( const chunk of matcher . update ( delta . content ) ) {
65- yield chunk
43+ const result : Anthropic . Messages . ContentBlockParam [ ] = [ ]
44+ for ( const msg of blocks ) {
45+ if ( typeof msg . content === "string" ) {
46+ result . push ( { type : "text" , text : msg . content } )
47+ } else if ( Array . isArray ( msg . content ) ) {
48+ for ( const part of msg . content ) {
49+ if ( part . type === "text" ) {
50+ result . push ( { type : "text" , text : part . text } )
6651 }
6752 }
6853 }
69- for ( const chunk of matcher . final ( ) ) {
70- yield chunk
54+ }
55+ return result
56+ }
57+
58+ let inputTokens = 0
59+ try {
60+ inputTokens = await this . countTokens ( [
61+ { type : "text" , text : systemPrompt } ,
62+ ...toContentBlocks ( messages ) ,
63+ ] )
64+ } catch ( err ) {
65+ console . error ( "[LmStudio] Failed to count input tokens:" , err )
66+ inputTokens = 0
67+ }
68+
69+ let assistantText = ""
70+
71+ try {
72+ const params : OpenAI . Chat . ChatCompletionCreateParamsStreaming & { draft_model ?: string } = {
73+ model : this . getModel ( ) . id ,
74+ messages : openAiMessages ,
75+ temperature : this . options . modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE ,
76+ stream : true ,
77+ }
78+
79+ if ( this . options . lmStudioSpeculativeDecodingEnabled && this . options . lmStudioDraftModelId ) {
80+ params . draft_model = this . options . lmStudioDraftModelId
81+ }
82+
83+ const results = await this . client . chat . completions . create ( params )
84+
85+ const matcher = new XmlMatcher (
86+ "think" ,
87+ ( chunk ) =>
88+ ( {
89+ type : chunk . matched ? "reasoning" : "text" ,
90+ text : chunk . data ,
91+ } ) as const ,
92+ )
93+
94+ for await ( const chunk of results ) {
95+ const delta = chunk . choices [ 0 ] ?. delta
96+
97+ if ( delta ?. content ) {
98+ assistantText += delta . content
99+ for ( const processedChunk of matcher . update ( delta . content ) ) {
100+ yield processedChunk
101+ }
71102 }
72- } catch ( error ) {
73- // LM Studio doesn't return an error code/body for now
74- throw new Error (
75- "Please check the LM Studio developer logs to debug what went wrong. You may need to load the model with a larger context length to work with Roo Code's prompts." ,
76- )
77103 }
104+
105+ for ( const processedChunk of matcher . final ( ) ) {
106+ yield processedChunk
107+ }
108+
109+
110+ let outputTokens = 0
111+ try {
112+ outputTokens = await this . countTokens ( [ { type : "text" , text : assistantText } ] )
113+ } catch ( err ) {
114+ console . error ( "[LmStudio] Failed to count output tokens:" , err )
115+ outputTokens = 0
116+ }
117+
118+ yield {
119+ type : "usage" ,
120+ inputTokens,
121+ outputTokens,
122+ } as const
123+ } catch ( error ) {
124+ throw new Error (
125+ "Please check the LM Studio developer logs to debug what went wrong. You may need to load the model with a larger context length to work with Roo Code's prompts." ,
126+ )
78127 }
128+ }
129+
79130
80131 override getModel ( ) : { id : string ; info : ModelInfo } {
81132 return {
0 commit comments