@@ -7,6 +7,8 @@ export class SlotsService {
77 private callbacks : Set < ( state : ApiProcessingState ) => void > = new Set ( ) ;
88 private slotsAvailable : boolean | null = null ;
99 private slotsEndpointSupported : boolean | null = null ;
10+ private lastTokenCount : number = 0 ;
11+ private lastTimestamp : number = 0 ;
1012
1113 constructor ( pollingInterval = 500 ) {
1214 this . pollingInterval = pollingInterval ;
@@ -141,7 +143,8 @@ export class SlotsService {
141143 temperature : 0.8 ,
142144 topP : 0.95 ,
143145 speculative : false ,
144- hasNextToken : false
146+ hasNextToken : false ,
147+ tokensPerSecond : 0
145148 } ;
146149 }
147150
@@ -159,6 +162,29 @@ export class SlotsService {
159162 const promptTokens = Math . floor ( activeSlot . prompt . length / 4 ) ; // Rough estimate
160163 const contextUsed = promptTokens + activeSlot . next_token . n_decoded ;
161164
165+ // Calculate tokens per second
166+ let tokensPerSecond = 0 ;
167+ const currentTime = Date . now ( ) ;
168+ const currentTokens = activeSlot . next_token . n_decoded ;
169+
170+ if ( status === 'generating' && this . lastTimestamp > 0 && currentTokens > this . lastTokenCount ) {
171+ const timeDiff = ( currentTime - this . lastTimestamp ) / 1000 ; // Convert to seconds
172+ const tokenDiff = currentTokens - this . lastTokenCount ;
173+ if ( timeDiff > 0 ) {
174+ tokensPerSecond = tokenDiff / timeDiff ;
175+ }
176+ }
177+
178+ // Update tracking for next calculation
179+ if ( status === 'generating' ) {
180+ this . lastTokenCount = currentTokens ;
181+ this . lastTimestamp = currentTime ;
182+ } else if ( status === 'idle' ) {
183+ // Reset when idle
184+ this . lastTokenCount = 0 ;
185+ this . lastTimestamp = 0 ;
186+ }
187+
162188 return {
163189 status,
164190 tokensDecoded : activeSlot . next_token . n_decoded ,
@@ -168,7 +194,8 @@ export class SlotsService {
168194 temperature : activeSlot . params . temperature ,
169195 topP : activeSlot . params . top_p ,
170196 speculative : activeSlot . speculative ,
171- hasNextToken : activeSlot . next_token . has_next_token
197+ hasNextToken : activeSlot . next_token . has_next_token ,
198+ tokensPerSecond
172199 } ;
173200 }
174201
0 commit comments