Skip to content

Commit 2f68ce7

Browse files
webui: auto-refresh /props on inference start to resync model metadata (ggml-org#16784)
* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
1 parent e4a7159 commit 2f68ce7

File tree

7 files changed

+182
-72
lines changed

7 files changed

+182
-72
lines changed

tools/server/public/index.html.gz

437 Bytes
Binary file not shown.

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@
8585
let displayedModel = $derived((): string | null => {
8686
if (!currentConfig.showModelInfo) return null;
8787
88-
if (currentConfig.modelSelectorEnabled) {
89-
return message.model ?? null;
88+
if (message.model) {
89+
return message.model;
9090
}
9191
9292
return serverModel;

tools/server/webui/src/lib/services/chat.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ export class ChatService {
5454
onError,
5555
onReasoningChunk,
5656
onModel,
57+
onFirstValidChunk,
5758
// Generation parameters
5859
temperature,
5960
max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
201202
onError,
202203
onReasoningChunk,
203204
onModel,
205+
onFirstValidChunk,
204206
conversationId,
205207
abortController.signal
206208
);
@@ -267,6 +269,7 @@ export class ChatService {
267269
onError?: (error: Error) => void,
268270
onReasoningChunk?: (chunk: string) => void,
269271
onModel?: (model: string) => void,
272+
onFirstValidChunk?: () => void,
270273
conversationId?: string,
271274
abortSignal?: AbortSignal
272275
): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
283286
let lastTimings: ChatMessageTimings | undefined;
284287
let streamFinished = false;
285288
let modelEmitted = false;
289+
let firstValidChunkEmitted = false;
286290

287291
try {
288292
let chunk = '';
@@ -311,17 +315,25 @@ export class ChatService {
311315
try {
312316
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
313317

314-
const chunkModel = this.extractModelName(parsed);
315-
if (chunkModel && !modelEmitted) {
316-
modelEmitted = true;
317-
onModel?.(chunkModel);
318+
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
319+
firstValidChunkEmitted = true;
320+
321+
if (!abortSignal?.aborted) {
322+
onFirstValidChunk?.();
323+
}
318324
}
319325

320326
const content = parsed.choices[0]?.delta?.content;
321327
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
322328
const timings = parsed.timings;
323329
const promptProgress = parsed.prompt_progress;
324330

331+
const chunkModel = this.extractModelName(parsed);
332+
if (chunkModel && !modelEmitted) {
333+
modelEmitted = true;
334+
onModel?.(chunkModel);
335+
}
336+
325337
if (timings || promptProgress) {
326338
this.updateProcessingState(timings, promptProgress, conversationId);
327339
if (timings) {

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { DatabaseStore } from '$lib/stores/database';
22
import { chatService, slotsService } from '$lib/services';
33
import { config } from '$lib/stores/settings.svelte';
4+
import { serverStore } from '$lib/stores/server.svelte';
45
import { normalizeModelName } from '$lib/utils/model-names';
56
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
67
import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
362363

363364
let resolvedModel: string | null = null;
364365
let modelPersisted = false;
366+
const currentConfig = config();
367+
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
368+
let serverPropsRefreshed = false;
369+
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
370+
371+
const refreshServerPropsOnce = () => {
372+
if (serverPropsRefreshed) {
373+
return;
374+
}
375+
376+
serverPropsRefreshed = true;
377+
378+
const hasExistingProps = serverStore.serverProps !== null;
365379

366-
const recordModel = (modelName: string, persistImmediately = true): void => {
367-
const normalizedModel = normalizeModelName(modelName);
380+
serverStore
381+
.fetchServerProps({ silent: hasExistingProps })
382+
.then(() => {
383+
updateModelFromServerProps?.(true);
384+
})
385+
.catch((error) => {
386+
console.warn('Failed to refresh server props after streaming started:', error);
387+
});
388+
};
389+
390+
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
391+
const serverModelName = serverStore.modelName;
392+
const preferredModelSource = preferServerPropsModel
393+
? (serverModelName ?? modelName ?? null)
394+
: (modelName ?? serverModelName ?? null);
395+
396+
if (!preferredModelSource) {
397+
return;
398+
}
399+
400+
const normalizedModel = normalizeModelName(preferredModelSource);
368401

369402
if (!normalizedModel || normalizedModel === resolvedModel) {
370403
return;
@@ -388,6 +421,20 @@ class ChatStore {
388421
}
389422
};
390423

424+
if (preferServerPropsModel) {
425+
updateModelFromServerProps = (persistImmediately = true) => {
426+
const currentServerModel = serverStore.modelName;
427+
428+
if (!currentServerModel) {
429+
return;
430+
}
431+
432+
recordModel(currentServerModel, persistImmediately);
433+
};
434+
435+
updateModelFromServerProps(false);
436+
}
437+
391438
slotsService.startStreaming();
392439
slotsService.setActiveConversation(assistantMessage.convId);
393440

@@ -396,6 +443,9 @@ class ChatStore {
396443
{
397444
...this.getApiOptions(),
398445

446+
onFirstValidChunk: () => {
447+
refreshServerPropsOnce();
448+
},
399449
onChunk: (chunk: string) => {
400450
streamedContent += chunk;
401451
this.setConversationStreaming(

tools/server/webui/src/lib/stores/server.svelte.ts

Lines changed: 110 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class ServerStore {
5252
private _error = $state<string | null>(null);
5353
private _serverWarning = $state<string | null>(null);
5454
private _slotsEndpointAvailable = $state<boolean | null>(null);
55+
private fetchServerPropsPromise: Promise<void> | null = null;
5556

5657
private readCachedServerProps(): ApiLlamaCppServerProps | null {
5758
if (!browser) return null;
@@ -171,88 +172,132 @@ class ServerStore {
171172
/**
172173
* Fetches server properties from the server
173174
*/
174-
async fetchServerProps(): Promise<void> {
175-
this._loading = true;
176-
this._error = null;
177-
this._serverWarning = null;
175+
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
176+
const { silent = false } = options;
177+
const isSilent = silent && this._serverProps !== null;
178178

179-
try {
180-
console.log('Fetching server properties...');
181-
const props = await ChatService.getServerProps();
182-
this._serverProps = props;
183-
this.persistServerProps(props);
184-
console.log('Server properties loaded:', props);
185-
186-
// Check slots endpoint availability after server props are loaded
187-
await this.checkSlotsEndpointAvailability();
188-
} catch (error) {
189-
const hadCachedProps = this._serverProps !== null;
190-
let errorMessage = 'Failed to connect to server';
191-
let isOfflineLikeError = false;
192-
let isServerSideError = false;
193-
194-
if (error instanceof Error) {
195-
// Handle specific error types with user-friendly messages
196-
if (error.name === 'TypeError' && error.message.includes('fetch')) {
197-
errorMessage = 'Server is not running or unreachable';
198-
isOfflineLikeError = true;
199-
} else if (error.message.includes('ECONNREFUSED')) {
200-
errorMessage = 'Connection refused - server may be offline';
201-
isOfflineLikeError = true;
202-
} else if (error.message.includes('ENOTFOUND')) {
203-
errorMessage = 'Server not found - check server address';
204-
isOfflineLikeError = true;
205-
} else if (error.message.includes('ETIMEDOUT')) {
206-
errorMessage = 'Request timed out - the server took too long to respond';
207-
isOfflineLikeError = true;
208-
} else if (error.message.includes('503')) {
209-
errorMessage = 'Server temporarily unavailable - try again shortly';
210-
isServerSideError = true;
211-
} else if (error.message.includes('500')) {
212-
errorMessage = 'Server error - check server logs';
213-
isServerSideError = true;
214-
} else if (error.message.includes('404')) {
215-
errorMessage = 'Server endpoint not found';
216-
} else if (error.message.includes('403') || error.message.includes('401')) {
217-
errorMessage = 'Access denied';
179+
if (this.fetchServerPropsPromise) {
180+
return this.fetchServerPropsPromise;
181+
}
182+
183+
if (!isSilent) {
184+
this._loading = true;
185+
this._error = null;
186+
this._serverWarning = null;
187+
}
188+
189+
const hadProps = this._serverProps !== null;
190+
191+
const fetchPromise = (async () => {
192+
try {
193+
const props = await ChatService.getServerProps();
194+
this._serverProps = props;
195+
this.persistServerProps(props);
196+
this._error = null;
197+
this._serverWarning = null;
198+
await this.checkSlotsEndpointAvailability();
199+
} catch (error) {
200+
if (isSilent && hadProps) {
201+
console.warn('Silent server props refresh failed, keeping cached data:', error);
202+
return;
203+
}
204+
205+
this.handleFetchServerPropsError(error, hadProps);
206+
} finally {
207+
if (!isSilent) {
208+
this._loading = false;
218209
}
210+
211+
this.fetchServerPropsPromise = null;
219212
}
213+
})();
214+
215+
this.fetchServerPropsPromise = fetchPromise;
216+
217+
await fetchPromise;
218+
}
220219

221-
let cachedProps: ApiLlamaCppServerProps | null = null;
220+
/**
221+
* Handles fetch failures by attempting to recover cached server props and
222+
* updating the user-facing error or warning state appropriately.
223+
*/
224+
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
225+
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
222226

223-
if (!hadCachedProps) {
224-
cachedProps = this.readCachedServerProps();
225-
if (cachedProps) {
226-
this._serverProps = cachedProps;
227-
this._error = null;
227+
let cachedProps: ApiLlamaCppServerProps | null = null;
228228

229-
if (isOfflineLikeError || isServerSideError) {
230-
this._serverWarning = errorMessage;
231-
}
229+
if (!hadProps) {
230+
cachedProps = this.readCachedServerProps();
232231

233-
console.warn(
234-
'Failed to refresh server properties, using cached values from localStorage:',
235-
errorMessage
236-
);
237-
} else {
238-
this._error = errorMessage;
239-
}
240-
} else {
232+
if (cachedProps) {
233+
this._serverProps = cachedProps;
241234
this._error = null;
242235

243236
if (isOfflineLikeError || isServerSideError) {
244237
this._serverWarning = errorMessage;
245238
}
246239

247240
console.warn(
248-
'Failed to refresh server properties, continuing with cached values:',
241+
'Failed to refresh server properties, using cached values from localStorage:',
249242
errorMessage
250243
);
244+
} else {
245+
this._error = errorMessage;
246+
}
247+
} else {
248+
this._error = null;
249+
250+
if (isOfflineLikeError || isServerSideError) {
251+
this._serverWarning = errorMessage;
251252
}
252-
console.error('Error fetching server properties:', error);
253-
} finally {
254-
this._loading = false;
253+
254+
console.warn(
255+
'Failed to refresh server properties, continuing with cached values:',
256+
errorMessage
257+
);
255258
}
259+
260+
console.error('Error fetching server properties:', error);
261+
}
262+
263+
private normalizeFetchError(error: unknown): {
264+
errorMessage: string;
265+
isOfflineLikeError: boolean;
266+
isServerSideError: boolean;
267+
} {
268+
let errorMessage = 'Failed to connect to server';
269+
let isOfflineLikeError = false;
270+
let isServerSideError = false;
271+
272+
if (error instanceof Error) {
273+
const message = error.message || '';
274+
275+
if (error.name === 'TypeError' && message.includes('fetch')) {
276+
errorMessage = 'Server is not running or unreachable';
277+
isOfflineLikeError = true;
278+
} else if (message.includes('ECONNREFUSED')) {
279+
errorMessage = 'Connection refused - server may be offline';
280+
isOfflineLikeError = true;
281+
} else if (message.includes('ENOTFOUND')) {
282+
errorMessage = 'Server not found - check server address';
283+
isOfflineLikeError = true;
284+
} else if (message.includes('ETIMEDOUT')) {
285+
errorMessage = 'Request timed out - the server took too long to respond';
286+
isOfflineLikeError = true;
287+
} else if (message.includes('503')) {
288+
errorMessage = 'Server temporarily unavailable - try again shortly';
289+
isServerSideError = true;
290+
} else if (message.includes('500')) {
291+
errorMessage = 'Server error - check server logs';
292+
isServerSideError = true;
293+
} else if (message.includes('404')) {
294+
errorMessage = 'Server endpoint not found';
295+
} else if (message.includes('403') || message.includes('401')) {
296+
errorMessage = 'Access denied';
297+
}
298+
}
299+
300+
return { errorMessage, isOfflineLikeError, isServerSideError };
256301
}
257302

258303
/**
@@ -264,6 +309,7 @@ class ServerStore {
264309
this._serverWarning = null;
265310
this._loading = false;
266311
this._slotsEndpointAvailable = null;
312+
this.fetchServerPropsPromise = null;
267313
this.persistServerProps(null);
268314
}
269315
}

tools/server/webui/src/lib/types/api.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
186186
}
187187

188188
export interface ApiChatCompletionStreamChunk {
189+
object?: string;
189190
model?: string;
190191
choices: Array<{
191192
model?: string;

tools/server/webui/src/lib/types/settings.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
4242
onChunk?: (chunk: string) => void;
4343
onReasoningChunk?: (chunk: string) => void;
4444
onModel?: (model: string) => void;
45+
onFirstValidChunk?: () => void;
4546
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
4647
onError?: (error: Error) => void;
4748
}

0 commit comments

Comments
 (0)