11import type { ApiSlotData , ApiProcessingState } from '$lib/types/api' ;
22import { serverStore } from '$lib/stores/server.svelte' ;
3- import { SLOTS_DEBOUNCE_TIME } from '$lib/constants/debounce' ;
3+ import { SLOTS_DEBOUNCE_INTERVAL } from '$lib/constants/debounce' ;
44
55export class SlotsService {
66 private callbacks : Set < ( state : ApiProcessingState ) => void > = new Set ( ) ;
@@ -10,9 +10,10 @@ export class SlotsService {
1010 private currentTokensPerSecond : number = 0 ;
1111 private tokenRateHistory : number [ ] = [ ] ;
1212 private lastUpdateTime : number = 0 ;
13- private pendingUpdate : boolean = false ;
1413 private streamStartTime : number = 0 ;
1514 private streamStartTokens : number = 0 ;
15+ private debounceTimer : ReturnType < typeof setTimeout > | null = null ;
16+ private lastKnownState : ApiProcessingState | null = null ;
1617
1718 constructor ( ) { }
1819
@@ -72,9 +73,13 @@ export class SlotsService {
7273 this . currentTokensPerSecond = 0 ;
7374 this . tokenRateHistory = [ ] ;
7475 this . lastUpdateTime = 0 ;
75- this . pendingUpdate = false ;
7676 this . streamStartTime = 0 ;
7777 this . streamStartTokens = 0 ;
78+
79+ if ( this . debounceTimer !== null ) {
80+ clearTimeout ( this . debounceTimer ) ;
81+ this . debounceTimer = null ;
82+ }
7883 }
7984
8085 /**
@@ -96,21 +101,32 @@ export class SlotsService {
96101 const currentTime = Date . now ( ) ;
97102 const timeSinceLastUpdate = currentTime - this . lastUpdateTime ;
98103
99- // For the first few calls, use shorter debouncing to get tokens/sec faster
100- const debounceTime = this . tokenRateHistory . length < 2 ? 50 : SLOTS_DEBOUNCE_TIME ;
101-
102- if ( timeSinceLastUpdate < debounceTime ) {
103- if ( ! this . pendingUpdate ) {
104- this . pendingUpdate = true ;
105- setTimeout ( async ( ) => {
106- this . pendingUpdate = false ;
107- await this . performUpdate ( ) ;
108- } , debounceTime - timeSinceLastUpdate ) ;
104+ if ( timeSinceLastUpdate >= SLOTS_DEBOUNCE_INTERVAL ) {
105+ if ( this . debounceTimer !== null ) {
106+ clearTimeout ( this . debounceTimer ) ;
107+ this . debounceTimer = null ;
109108 }
109+
110+ this . lastUpdateTime = currentTime ;
111+
112+ await this . performUpdate ( ) ;
113+ return ;
114+ }
115+
116+ if ( this . debounceTimer !== null ) {
110117 return ;
111118 }
112119
113- await this . performUpdate ( ) ;
120+ const waitTime = SLOTS_DEBOUNCE_INTERVAL - timeSinceLastUpdate ;
121+
122+ this . debounceTimer = setTimeout ( async ( ) => {
123+ this . debounceTimer = null ;
124+
125+ if ( this . isStreamingActive ) {
126+ this . lastUpdateTime = Date . now ( ) ;
127+ await this . performUpdate ( ) ;
128+ }
129+ } , waitTime ) ;
114130 }
115131
116132
@@ -156,6 +172,7 @@ export class SlotsService {
156172 const slots : ApiSlotData [ ] = await response . json ( ) ;
157173 const processingState = this . parseProcessingState ( slots ) ;
158174
175+ this . lastKnownState = processingState ;
159176
160177 this . callbacks . forEach ( callback => {
161178 try {
@@ -204,13 +221,11 @@ export class SlotsService {
204221 const currentTokens = activeSlot . next_token . n_decoded ;
205222
206223 if ( this . isStreamingActive ) {
207- // Initialize stream tracking on first call
208224 if ( this . streamStartTokens === 0 && currentTokens > 0 ) {
209225 this . streamStartTokens = currentTokens ;
210226 this . streamStartTime = currentTime ;
211227 }
212228
213- // Calculate tokens/sec using multiple methods for reliability
214229 let calculatedRate = 0 ;
215230
216231 // Method 1: Use recent interval (preferred for accuracy)
@@ -233,7 +248,6 @@ export class SlotsService {
233248 }
234249 }
235250
236- // Update rate if we have a valid calculation
237251 if ( calculatedRate > 0 ) {
238252 this . tokenRateHistory . push ( calculatedRate ) ;
239253 if ( this . tokenRateHistory . length > 5 ) {
@@ -242,9 +256,6 @@ export class SlotsService {
242256
243257 this . currentTokensPerSecond = this . tokenRateHistory . reduce ( ( sum , rate ) => sum + rate , 0 ) / this . tokenRateHistory . length ;
244258 }
245-
246- // Always show some rate during active streaming (even if 0 initially)
247- // This ensures the UI always displays tokens/sec field during streaming
248259 }
249260
250261 if ( this . isStreamingActive && currentTokens >= this . lastTokenCount ) {
@@ -267,6 +278,11 @@ export class SlotsService {
267278 }
268279
269280 async getCurrentState ( ) : Promise < ApiProcessingState | null > {
281+ if ( this . isStreamingActive ) {
282+ return this . lastKnownState ;
283+ }
284+
285+ // For non-streaming state, make direct call
270286 const isAvailable = await this . isSlotsEndpointAvailable ( ) ;
271287
272288 if ( ! isAvailable ) {
0 commit comments