Skip to content

Commit de207be

Browse files
committed
fix: Fixes slots update debouncing
Refactors the slots service to use a more robust debouncing mechanism.
1 parent 9e7f48e commit de207be

File tree

3 files changed

+41
-27
lines changed

3 files changed

+41
-27
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
export const SLOTS_DEBOUNCE_TIME = 100;
1+
export const SLOTS_DEBOUNCE_INTERVAL = 100;

tools/server/webui/src/lib/services/slots.ts

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { ApiSlotData, ApiProcessingState } from '$lib/types/api';
22
import { serverStore } from '$lib/stores/server.svelte';
3-
import { SLOTS_DEBOUNCE_TIME } from '$lib/constants/debounce';
3+
import { SLOTS_DEBOUNCE_INTERVAL } from '$lib/constants/debounce';
44

55
export class SlotsService {
66
private callbacks: Set<(state: ApiProcessingState) => void> = new Set();
@@ -10,9 +10,10 @@ export class SlotsService {
1010
private currentTokensPerSecond: number = 0;
1111
private tokenRateHistory: number[] = [];
1212
private lastUpdateTime: number = 0;
13-
private pendingUpdate: boolean = false;
1413
private streamStartTime: number = 0;
1514
private streamStartTokens: number = 0;
15+
private debounceTimer: ReturnType<typeof setTimeout> | null = null;
16+
private lastKnownState: ApiProcessingState | null = null;
1617

1718
constructor() {}
1819

@@ -72,9 +73,13 @@ export class SlotsService {
7273
this.currentTokensPerSecond = 0;
7374
this.tokenRateHistory = [];
7475
this.lastUpdateTime = 0;
75-
this.pendingUpdate = false;
7676
this.streamStartTime = 0;
7777
this.streamStartTokens = 0;
78+
79+
if (this.debounceTimer !== null) {
80+
clearTimeout(this.debounceTimer);
81+
this.debounceTimer = null;
82+
}
7883
}
7984

8085
/**
@@ -96,21 +101,32 @@ export class SlotsService {
96101
const currentTime = Date.now();
97102
const timeSinceLastUpdate = currentTime - this.lastUpdateTime;
98103

99-
// For the first few calls, use shorter debouncing to get tokens/sec faster
100-
const debounceTime = this.tokenRateHistory.length < 2 ? 50 : SLOTS_DEBOUNCE_TIME;
101-
102-
if (timeSinceLastUpdate < debounceTime) {
103-
if (!this.pendingUpdate) {
104-
this.pendingUpdate = true;
105-
setTimeout(async () => {
106-
this.pendingUpdate = false;
107-
await this.performUpdate();
108-
}, debounceTime - timeSinceLastUpdate);
104+
if (timeSinceLastUpdate >= SLOTS_DEBOUNCE_INTERVAL) {
105+
if (this.debounceTimer !== null) {
106+
clearTimeout(this.debounceTimer);
107+
this.debounceTimer = null;
109108
}
109+
110+
this.lastUpdateTime = currentTime;
111+
112+
await this.performUpdate();
113+
return;
114+
}
115+
116+
if (this.debounceTimer !== null) {
110117
return;
111118
}
112119

113-
await this.performUpdate();
120+
const waitTime = SLOTS_DEBOUNCE_INTERVAL - timeSinceLastUpdate;
121+
122+
this.debounceTimer = setTimeout(async () => {
123+
this.debounceTimer = null;
124+
125+
if (this.isStreamingActive) {
126+
this.lastUpdateTime = Date.now();
127+
await this.performUpdate();
128+
}
129+
}, waitTime);
114130
}
115131

116132

@@ -156,6 +172,7 @@ export class SlotsService {
156172
const slots: ApiSlotData[] = await response.json();
157173
const processingState = this.parseProcessingState(slots);
158174

175+
this.lastKnownState = processingState;
159176

160177
this.callbacks.forEach(callback => {
161178
try {
@@ -204,13 +221,11 @@ export class SlotsService {
204221
const currentTokens = activeSlot.next_token.n_decoded;
205222

206223
if (this.isStreamingActive) {
207-
// Initialize stream tracking on first call
208224
if (this.streamStartTokens === 0 && currentTokens > 0) {
209225
this.streamStartTokens = currentTokens;
210226
this.streamStartTime = currentTime;
211227
}
212228

213-
// Calculate tokens/sec using multiple methods for reliability
214229
let calculatedRate = 0;
215230

216231
// Method 1: Use recent interval (preferred for accuracy)
@@ -233,7 +248,6 @@ export class SlotsService {
233248
}
234249
}
235250

236-
// Update rate if we have a valid calculation
237251
if (calculatedRate > 0) {
238252
this.tokenRateHistory.push(calculatedRate);
239253
if (this.tokenRateHistory.length > 5) {
@@ -242,9 +256,6 @@ export class SlotsService {
242256

243257
this.currentTokensPerSecond = this.tokenRateHistory.reduce((sum, rate) => sum + rate, 0) / this.tokenRateHistory.length;
244258
}
245-
246-
// Always show some rate during active streaming (even if 0 initially)
247-
// This ensures the UI always displays tokens/sec field during streaming
248259
}
249260

250261
if (this.isStreamingActive && currentTokens >= this.lastTokenCount) {
@@ -267,6 +278,11 @@ export class SlotsService {
267278
}
268279

269280
async getCurrentState(): Promise<ApiProcessingState | null> {
281+
if (this.isStreamingActive) {
282+
return this.lastKnownState;
283+
}
284+
285+
// For non-streaming state, make direct call
270286
const isAvailable = await this.isSlotsEndpointAvailable();
271287

272288
if (!isAvailable) {

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,6 @@ class ChatStore {
206206
streamedContent += chunk;
207207
this.currentResponse = streamedContent;
208208

209-
// Update slots state on each chunk
210-
slotsService.updateSlotsState();
211-
212209
// Parse thinking content during streaming
213210
const partialThinking = extractPartialThinking(streamedContent);
214211

@@ -220,14 +217,13 @@ class ChatStore {
220217
// Update message with parsed content
221218
this.activeMessages[messageIndex].content = partialThinking.remainingContent || streamedContent;
222219
}
220+
221+
slotsService.updateSlotsState();
223222
},
224223

225224
onReasoningChunk: (reasoningChunk: string) => {
226225
streamedReasoningContent += reasoningChunk;
227226

228-
// Update slots state on reasoning chunks too
229-
slotsService.updateSlotsState();
230-
231227
const messageIndex = this.activeMessages.findIndex(
232228
(m) => m.id === assistantMessage.id
233229
);
@@ -236,6 +232,8 @@ class ChatStore {
236232
// Update message with reasoning content
237233
this.activeMessages[messageIndex].thinking = streamedReasoningContent;
238234
}
235+
236+
slotsService.updateSlotsState();
239237
},
240238

241239
onComplete: async (finalContent?: string, reasoningContent?: string) => {

0 commit comments

Comments
 (0)