@@ -2,58 +2,44 @@ import type { ApiSlotData, ApiProcessingState } from '$lib/types/api';
22import { serverStore } from '$lib/stores/server.svelte' ;
33
44export class SlotsService {
5- private pollingInterval : number ;
6- private pollingTimer : number | null = null ;
75 private callbacks : Set < ( state : ApiProcessingState ) => void > = new Set ( ) ;
8- private slotsAvailable : boolean | null = null ;
9- private slotsEndpointSupported : boolean | null = null ;
106 private lastTokenCount : number = 0 ;
117 private lastTimestamp : number = 0 ;
8+ private isStreamingActive : boolean = false ;
9+ private currentTokensPerSecond : number = 0 ;
10+ private tokenRateHistory : number [ ] = [ ] ;
11+ private lastUpdateTime : number = 0 ;
12+ private pendingUpdate : boolean = false ;
13+ private streamStartTime : number = 0 ;
14+ private streamStartTokens : number = 0 ;
1215
13- constructor ( pollingInterval = 500 ) {
14- this . pollingInterval = pollingInterval ;
15- }
16+ constructor ( ) { }
1617
1718 /**
1819 * Check if slots endpoint is available based on server properties and endpoint support
1920 */
2021 private async isSlotsEndpointAvailable ( ) : Promise < boolean > {
21- // If we've already determined endpoint support, use cached result
22- if ( this . slotsEndpointSupported !== null ) {
23- return this . slotsEndpointSupported ;
24- }
25-
26- // First check server properties
2722 const serverProps = serverStore . serverProps ;
23+
2824 if ( ! serverProps ) {
29- this . slotsEndpointSupported = false ;
3025 return false ;
3126 }
3227
33- // Check if server has slots support (total_slots > 0)
3428 if ( serverProps . total_slots <= 0 ) {
35- this . slotsEndpointSupported = false ;
3629 return false ;
3730 }
3831
39- // Test if the endpoint is actually implemented
4032 try {
4133 const response = await fetch ( '/slots' ) ;
4234
43- // Handle 501 Not Implemented specifically
4435 if ( response . status === 501 ) {
4536 console . info ( 'Slots endpoint not implemented - server started without --slots flag' ) ;
46- this . slotsEndpointSupported = false ;
4737 return false ;
4838 }
4939
50- // If we get any successful response or other error, assume it's supported
51- this . slotsEndpointSupported = true ;
5240 return true ;
5341 } catch ( error ) {
54- // Network errors - assume endpoint might be supported but server is down
5542 console . warn ( 'Unable to test slots endpoint availability:' , error ) ;
56- this . slotsEndpointSupported = false ;
5743 return false ;
5844 }
5945 }
@@ -62,33 +48,87 @@ export class SlotsService {
6248 * Reset slots availability check (call when server properties change)
6349 */
6450 resetAvailabilityCheck ( ) : void {
65- this . slotsAvailable = null ;
66- this . slotsEndpointSupported = null ;
6751 }
6852
69- async startPolling ( ) : Promise < void > {
70- if ( this . pollingTimer ) {
53+ /**
54+ * Start streaming session tracking
55+ */
56+ startStreamingPolling ( ) : void {
57+ this . isStreamingActive = true ;
58+ this . streamStartTime = Date . now ( ) ;
59+ this . streamStartTokens = 0 ;
60+ this . currentTokensPerSecond = 0 ;
61+ this . tokenRateHistory = [ ] ;
62+ }
63+
64+ /**
65+ * Stop streaming session tracking
66+ */
67+ stopStreamingPolling ( ) : void {
68+ this . isStreamingActive = false ;
69+ this . lastTokenCount = 0 ;
70+ this . lastTimestamp = 0 ;
71+ this . currentTokensPerSecond = 0 ;
72+ this . tokenRateHistory = [ ] ;
73+ this . lastUpdateTime = 0 ;
74+ this . pendingUpdate = false ;
75+ this . streamStartTime = 0 ;
76+ this . streamStartTokens = 0 ;
77+ }
78+
79+ /**
80+ * Check if currently in a streaming session
81+ */
82+ isStreaming ( ) : boolean {
83+ return this . isStreamingActive ;
84+ }
85+
86+ /**
87+ * Fetch and update slots state on demand (called during streaming chunks)
88+ * Debounced to prevent excessive requests during high-frequency streaming
89+ */
90+ async updateSlotsState ( ) : Promise < void > {
91+ if ( ! this . isStreamingActive ) {
7192 return ;
7293 }
7394
74- // Only start polling if slots endpoint is available
75- const isAvailable = await this . isSlotsEndpointAvailable ( ) ;
76- if ( ! isAvailable ) {
77- console . info ( 'Slots endpoint not available - polling disabled' ) ;
95+ const currentTime = Date . now ( ) ;
96+ const timeSinceLastUpdate = currentTime - this . lastUpdateTime ;
97+
98+ // For the first few calls, use shorter debouncing to get tokens/sec faster
99+ const debounceTime = this . tokenRateHistory . length < 2 ? 50 : 100 ;
100+
101+ if ( timeSinceLastUpdate < debounceTime ) {
102+ if ( ! this . pendingUpdate ) {
103+ this . pendingUpdate = true ;
104+ setTimeout ( async ( ) => {
105+ this . pendingUpdate = false ;
106+ await this . performUpdate ( ) ;
107+ } , debounceTime - timeSinceLastUpdate ) ;
108+ }
78109 return ;
79110 }
80111
81- this . poll ( ) ;
82- this . pollingTimer = window . setInterval ( ( ) => {
83- this . poll ( ) ;
84- } , this . pollingInterval ) ;
112+ await this . performUpdate ( ) ;
85113 }
86114
87- stopPolling ( ) : void {
88- if ( this . pollingTimer ) {
89- clearInterval ( this . pollingTimer ) ;
90- this . pollingTimer = null ;
115+
116+ /**
117+ * Perform the actual slots state update
118+ */
119+ private async performUpdate ( ) : Promise < void > {
120+ if ( ! this . isStreamingActive ) {
121+ return ;
122+ }
123+
124+ const isAvailable = await this . isSlotsEndpointAvailable ( ) ;
125+
126+ if ( ! isAvailable ) {
127+ return ;
91128 }
129+
130+ this . lastUpdateTime = Date . now ( ) ;
131+ await this . fetchAndNotify ( ) ;
92132 }
93133
94134 subscribe ( callback : ( state : ApiProcessingState ) => void ) : ( ) => void {
@@ -98,15 +138,12 @@ export class SlotsService {
98138 } ;
99139 }
100140
101- private async poll ( ) : Promise < void > {
141+ private async fetchAndNotify ( ) : Promise < void > {
102142 try {
103143 const response = await fetch ( `/slots` ) ;
104144
105- // Handle 501 Not Implemented - stop polling and mark as unsupported
106145 if ( response . status === 501 ) {
107- console . info ( 'Slots endpoint not implemented - stopping polling' ) ;
108- this . slotsEndpointSupported = false ;
109- this . stopPolling ( ) ;
146+ console . info ( 'Slots endpoint not implemented' ) ;
110147 return ;
111148 }
112149
@@ -118,6 +155,7 @@ export class SlotsService {
118155 const slots : ApiSlotData [ ] = await response . json ( ) ;
119156 const processingState = this . parseProcessingState ( slots ) ;
120157
158+
121159 this . callbacks . forEach ( callback => {
122160 try {
123161 callback ( processingState ) ;
@@ -126,7 +164,7 @@ export class SlotsService {
126164 }
127165 } ) ;
128166 } catch ( error ) {
129- console . warn ( 'Error polling slots:' , error ) ;
167+ console . warn ( 'Error fetching slots:' , error ) ;
130168 }
131169 }
132170
@@ -158,31 +196,59 @@ export class SlotsService {
158196 status = 'preparing' ;
159197 }
160198
161- // Calculate context usage (estimate based on prompt length and decoded tokens)
162- const promptTokens = Math . floor ( activeSlot . prompt . length / 4 ) ; // Rough estimate
199+ const promptTokens = Math . floor ( activeSlot . prompt . length / 4 ) ;
163200 const contextUsed = promptTokens + activeSlot . next_token . n_decoded ;
164201
165- // Calculate tokens per second
166- let tokensPerSecond = 0 ;
167202 const currentTime = Date . now ( ) ;
168203 const currentTokens = activeSlot . next_token . n_decoded ;
169204
170- if ( status === 'generating' && this . lastTimestamp > 0 && currentTokens > this . lastTokenCount ) {
171- const timeDiff = ( currentTime - this . lastTimestamp ) / 1000 ; // Convert to seconds
172- const tokenDiff = currentTokens - this . lastTokenCount ;
173- if ( timeDiff > 0 ) {
174- tokensPerSecond = tokenDiff / timeDiff ;
205+ if ( this . isStreamingActive ) {
206+ // Initialize stream tracking on first call
207+ if ( this . streamStartTokens === 0 && currentTokens > 0 ) {
208+ this . streamStartTokens = currentTokens ;
209+ this . streamStartTime = currentTime ;
175210 }
211+
212+ // Calculate tokens/sec using multiple methods for reliability
213+ let calculatedRate = 0 ;
214+
215+ // Method 1: Use recent interval (preferred for accuracy)
216+ if ( this . lastTimestamp > 0 && currentTokens > this . lastTokenCount ) {
217+ const timeDiff = ( currentTime - this . lastTimestamp ) / 1000 ;
218+ const tokenDiff = currentTokens - this . lastTokenCount ;
219+
220+ if ( timeDiff > 0.02 ) {
221+ calculatedRate = tokenDiff / timeDiff ;
222+ }
223+ }
224+
225+ // Method 2: Use total stream time (fallback for early display)
226+ if ( calculatedRate === 0 && this . streamStartTime > 0 && currentTokens > this . streamStartTokens ) {
227+ const totalTimeDiff = ( currentTime - this . streamStartTime ) / 1000 ;
228+ const totalTokenDiff = currentTokens - this . streamStartTokens ;
229+
230+ if ( totalTimeDiff > 0.1 ) { // At least 100ms of streaming
231+ calculatedRate = totalTokenDiff / totalTimeDiff ;
232+ }
233+ }
234+
235+ // Update rate if we have a valid calculation
236+ if ( calculatedRate > 0 ) {
237+ this . tokenRateHistory . push ( calculatedRate ) ;
238+ if ( this . tokenRateHistory . length > 5 ) {
239+ this . tokenRateHistory . shift ( ) ;
240+ }
241+
242+ this . currentTokensPerSecond = this . tokenRateHistory . reduce ( ( sum , rate ) => sum + rate , 0 ) / this . tokenRateHistory . length ;
243+ }
244+
245+ // Always show some rate during active streaming (even if 0 initially)
246+ // This ensures the UI always displays tokens/sec field during streaming
176247 }
177248
178- // Update tracking for next calculation
179- if ( status === 'generating' ) {
249+ if ( this . isStreamingActive && currentTokens >= this . lastTokenCount ) {
180250 this . lastTokenCount = currentTokens ;
181251 this . lastTimestamp = currentTime ;
182- } else if ( status === 'idle' ) {
183- // Reset when idle
184- this . lastTokenCount = 0 ;
185- this . lastTimestamp = 0 ;
186252 }
187253
188254 return {
@@ -195,24 +261,22 @@ export class SlotsService {
195261 topP : activeSlot . params . top_p ,
196262 speculative : activeSlot . speculative ,
197263 hasNextToken : activeSlot . next_token . has_next_token ,
198- tokensPerSecond
264+ tokensPerSecond : this . currentTokensPerSecond
199265 } ;
200266 }
201267
202268 async getCurrentState ( ) : Promise < ApiProcessingState | null > {
203- // Check if slots endpoint is available before making request
204269 const isAvailable = await this . isSlotsEndpointAvailable ( ) ;
270+
205271 if ( ! isAvailable ) {
206272 return null ;
207273 }
208274
209275 try {
210276 const response = await fetch ( `/slots` ) ;
211277
212- // Handle 501 Not Implemented
213278 if ( response . status === 501 ) {
214279 console . info ( 'Slots endpoint not implemented' ) ;
215- this . slotsEndpointSupported = false ;
216280 return null ;
217281 }
218282
0 commit comments