@@ -29,6 +29,47 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
2929 ...convertToOpenAiMessages ( messages ) ,
3030 ]
3131
32+ // -------------------------
33+ // Track token usage
34+ // -------------------------
35+ // Build content blocks so we can estimate prompt token count using the shared utility.
36+ const toContentBlocks = (
37+ blocks : Anthropic . Messages . MessageParam [ ] | string ,
38+ ) : Anthropic . Messages . ContentBlockParam [ ] => {
39+ if ( typeof blocks === "string" ) {
40+ return [ { type : "text" , text : blocks } ]
41+ }
42+
43+ const result : Anthropic . Messages . ContentBlockParam [ ] = [ ]
44+ for ( const msg of blocks ) {
45+ if ( typeof msg . content === "string" ) {
46+ result . push ( { type : "text" , text : msg . content } )
47+ } else if ( Array . isArray ( msg . content ) ) {
48+ // Filter out text blocks only for counting purposes
49+ for ( const part of msg . content ) {
50+ if ( part . type === "text" ) {
51+ result . push ( { type : "text" , text : part . text } )
52+ }
53+ }
54+ }
55+ }
56+ return result
57+ }
58+
59+ // Count prompt/input tokens (system prompt + user/assistant history)
60+ let inputTokens = 0
61+ try {
62+ inputTokens = await this . countTokens ( [
63+ { type : "text" , text : systemPrompt } ,
64+ ...toContentBlocks ( messages ) ,
65+ ] )
66+ } catch ( err ) {
67+ console . error ( "[LmStudio] Failed to count input tokens:" , err )
68+ inputTokens = 0
69+ }
70+
71+ let assistantText = ""
72+
3273 try {
3374 // Create params object with optional draft model
3475 const params : any = {
@@ -50,12 +91,28 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
5091 for await ( const chunk of results ) {
5192 const delta = chunk . choices [ 0 ] ?. delta
5293 if ( delta ?. content ) {
94+ assistantText += delta . content
5395 yield {
5496 type : "text" ,
5597 text : delta . content ,
5698 }
5799 }
58100 }
101+
102+ // After streaming completes, estimate output tokens and yield usage metrics
103+ let outputTokens = 0
104+ try {
105+ outputTokens = await this . countTokens ( [ { type : "text" , text : assistantText } ] )
106+ } catch ( err ) {
107+ console . error ( "[LmStudio] Failed to count output tokens:" , err )
108+ outputTokens = 0
109+ }
110+
111+ yield {
112+ type : "usage" ,
113+ inputTokens,
114+ outputTokens,
115+ } as const
59116 } catch ( error ) {
60117 // LM Studio doesn't return an error code/body for now
61118 throw new Error (
0 commit comments