diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index c026f36c4844d..cd3d254b1c2b8 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte index 5976e5dd03d7b..adf9f880ae670 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte @@ -13,6 +13,7 @@ updateConversationName } from '$lib/stores/chat.svelte'; import ChatSidebarActions from './ChatSidebarActions.svelte'; + import ModelSelector from './ModelSelector.svelte'; const sidebar = Sidebar.useSidebar(); @@ -110,6 +111,8 @@

llama.cpp

+ + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte new file mode 100644 index 0000000000000..ca74610b4a179 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte @@ -0,0 +1,99 @@ + + +{#if loading && options.length === 0 && !isMounted} +
+ + Loading models… +
+{:else if options.length === 0} +

No models available.

+{:else} + {@const selectedOption = getDisplayOption()} + + + + {selectedOption?.name || 'Select model'} + + {#if updating} + + {/if} + + + + {#each options as option (option.id)} + + {option.name} + + {#if option.description} + {option.description} + {/if} + + {/each} + + +{/if} + +{#if error} +

{error}

+{/if} diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts index 63a99f4343320..f9273162873b1 100644 --- a/tools/server/webui/src/lib/components/app/index.ts +++ b/tools/server/webui/src/lib/components/app/index.ts @@ -29,6 +29,7 @@ export { default as ChatSettingsFields } from './chat/ChatSettings/ChatSettingsF export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte'; export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte'; export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte'; +export { default as ChatSidebarModelSelector } from './chat/ChatSidebar/ModelSelector.svelte'; export { default as ChatErrorDialog } from './dialogs/ChatErrorDialog.svelte'; export { default as EmptyFileAlertDialog } from './dialogs/EmptyFileAlertDialog.svelte'; diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 37e60b85b5a6a..0c4375789acef 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,4 +1,5 @@ import { config } from '$lib/stores/settings.svelte'; +import { selectedModelName } from '$lib/stores/models.svelte'; import { slotsService } from './slots'; /** * ChatService - Low-level API communication layer for llama.cpp server interactions @@ -50,6 +51,8 @@ export class ChatService { onChunk, onComplete, onError, + onReasoningChunk, + onModel, // Generation parameters temperature, max_tokens, @@ -118,6 +121,11 @@ export class ChatService { stream }; + const activeModel = selectedModelName(); + if (activeModel) { + requestBody.model = activeModel; + } + requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto'; if (temperature !== undefined) requestBody.temperature = temperature; @@ -190,10 +198,11 @@ export class ChatService { onChunk, onComplete, onError, - options.onReasoningChunk + onReasoningChunk, + onModel ); } else { - return this.handleNonStreamResponse(response, onComplete, onError); + return this.handleNonStreamResponse(response, onComplete, onError, onModel); } } catch (error) { if (error instanceof Error && error.name === 'AbortError') { @@ -251,7 +260,8 @@ export class ChatService { timings?: ChatMessageTimings ) => void, onError?: (error: Error) => void, - onReasoningChunk?: (chunk: string) => void + onReasoningChunk?: (chunk: string) => void, + onModel?: (model: string) => void ): Promise { const reader = response.body?.getReader(); @@ -265,6 +275,7 @@ export class ChatService { let hasReceivedData = false; let lastTimings: ChatMessageTimings | undefined; let streamFinished = false; + let modelEmitted = false; try { let chunk = ''; @@ -274,7 +285,7 @@ export class ChatService { chunk += decoder.decode(value, { stream: true }); const lines = chunk.split('\n'); - chunk = lines.pop() || ''; // Save incomplete line for next read + chunk = lines.pop() || ''; for (const line of lines) { if (line.startsWith('data: ')) { @@ -287,6 +298,12 @@ export class ChatService { try { const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); + const chunkModel = this.extractModelName(parsed); + if (chunkModel && !modelEmitted) { + modelEmitted = true; + onModel?.(chunkModel); + } + const content = parsed.choices[0]?.delta?.content; const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; const timings = parsed.timings; @@ -295,7 +312,6 @@ export class ChatService { if (timings || promptProgress) { this.updateProcessingState(timings, promptProgress); - // Store the latest timing data if (timings) { lastTimings = timings; } @@ -355,7 +371,8 @@ export class ChatService { reasoningContent?: string, timings?: ChatMessageTimings ) => void, - onError?: (error: Error) => void + onError?: (error: Error) => void, + onModel?: (model: string) => void ): Promise { try { const responseText = await response.text(); @@ -366,6 +383,11 @@ export class ChatService { } const data: ApiChatCompletionResponse = JSON.parse(responseText); + const responseModel = this.extractModelName(data); + if (responseModel) { + onModel?.(responseModel); + } + const content = data.choices[0]?.message?.content || ''; const reasoningContent = data.choices[0]?.message?.reasoning_content; @@ -588,6 +610,69 @@ export class ChatService { } } + private extractModelName(data: unknown): string | undefined { + if (!data || typeof data !== 'object') { + return undefined; + } + + const record = data as Record; + const normalize = (value: unknown): string | undefined => { + if (typeof value !== 'string') { + return undefined; + } + + const trimmed = value.trim(); + + return trimmed.length > 0 ? trimmed : undefined; + }; + + const rootModel = normalize(record['model']); + if (rootModel) { + return rootModel; + } + + const choices = record['choices']; + if (!Array.isArray(choices) || choices.length === 0) { + return undefined; + } + + const firstChoice = choices[0] as Record | undefined; + if (!firstChoice) { + return undefined; + } + + const choiceModel = normalize(firstChoice['model']); + if (choiceModel) { + return choiceModel; + } + + const delta = firstChoice['delta'] as Record | undefined; + if (delta) { + const deltaModel = normalize(delta['model']); + if (deltaModel) { + return deltaModel; + } + } + + const message = firstChoice['message'] as Record | undefined; + if (message) { + const messageModel = normalize(message['model']); + if (messageModel) { + return messageModel; + } + } + + const metadata = firstChoice['metadata'] as Record | undefined; + if (metadata) { + const metadataModel = normalize(metadata['model']); + if (metadataModel) { + return metadataModel; + } + } + + return undefined; + } + private updateProcessingState( timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts new file mode 100644 index 0000000000000..1c7fa3b45631c --- /dev/null +++ b/tools/server/webui/src/lib/services/models.ts @@ -0,0 +1,22 @@ +import { base } from '$app/paths'; +import { config } from '$lib/stores/settings.svelte'; +import type { ApiModelListResponse } from '$lib/types/api'; + +export class ModelsService { + static async list(): Promise { + const currentConfig = config(); + const apiKey = currentConfig.apiKey?.toString().trim(); + + const response = await fetch(`${base}/v1/models`, { + headers: { + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) + } + }); + + if (!response.ok) { + throw new Error(`Failed to fetch model list (status ${response.status})`); + } + + return response.json() as Promise; + } +} diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 5b77abb4cb21c..ed5b9127b22e9 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,6 +1,5 @@ import { DatabaseStore } from '$lib/stores/database'; import { chatService, slotsService } from '$lib/services'; -import { serverStore } from '$lib/stores/server.svelte'; import { config } from '$lib/stores/settings.svelte'; import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching'; import { browser } from '$app/environment'; @@ -300,30 +299,30 @@ class ChatStore { ): Promise { let streamedContent = ''; let streamedReasoningContent = ''; - let modelCaptured = false; + let resolvedModel: string | null = null; + let modelPersisted = false; - const captureModelIfNeeded = (updateDbImmediately = true): string | undefined => { - if (!modelCaptured) { - const currentModelName = serverStore.modelName; + const recordModel = (modelName: string, persistImmediately = true): void => { + const trimmedModel = modelName.trim(); - if (currentModelName) { - if (updateDbImmediately) { - DatabaseStore.updateMessage(assistantMessage.id, { model: currentModelName }).catch( - console.error - ); - } + if (!trimmedModel || trimmedModel === resolvedModel) { + return; + } - const messageIndex = this.findMessageIndex(assistantMessage.id); + resolvedModel = trimmedModel; - this.updateMessageAtIndex(messageIndex, { model: currentModelName }); - modelCaptured = true; + const messageIndex = this.findMessageIndex(assistantMessage.id); - return currentModelName; - } + this.updateMessageAtIndex(messageIndex, { model: trimmedModel }); + + if (persistImmediately && !modelPersisted) { + modelPersisted = true; + DatabaseStore.updateMessage(assistantMessage.id, { model: trimmedModel }).catch((error) => { + console.error('Failed to persist model name:', error); + modelPersisted = false; + }); } - return undefined; }; - slotsService.startStreaming(); await chatService.sendMessage(allMessages, { @@ -333,7 +332,6 @@ class ChatStore { streamedContent += chunk; this.currentResponse = streamedContent; - captureModelIfNeeded(); const messageIndex = this.findMessageIndex(assistantMessage.id); this.updateMessageAtIndex(messageIndex, { content: streamedContent @@ -343,13 +341,15 @@ class ChatStore { onReasoningChunk: (reasoningChunk: string) => { streamedReasoningContent += reasoningChunk; - captureModelIfNeeded(); - const messageIndex = this.findMessageIndex(assistantMessage.id); this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); }, + onModel: (modelName: string) => { + recordModel(modelName); + }, + onComplete: async ( finalContent?: string, reasoningContent?: string, @@ -368,10 +368,9 @@ class ChatStore { timings: timings }; - const capturedModel = captureModelIfNeeded(false); - - if (capturedModel) { - updateData.model = capturedModel; + if (resolvedModel && !modelPersisted) { + updateData.model = resolvedModel; + modelPersisted = true; } await DatabaseStore.updateMessage(assistantMessage.id, updateData); diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts new file mode 100644 index 0000000000000..967346fcc964a --- /dev/null +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -0,0 +1,223 @@ +import { browser } from '$app/environment'; +import { ModelsService } from '$lib/services/models'; +import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; + +export interface ModelOption { + id: string; + name: string; + model: string; + description?: string; + capabilities: string[]; + details?: ApiModelDetails['details']; + meta?: ApiModelDataEntry['meta']; +} + +type PersistedModelSelection = { + id: string; + model: string; +}; + +const STORAGE_KEY = 'llama.cpp:selectedModel'; + +class ModelsStore { + private _models = $state([]); + private _loading = $state(false); + private _updating = $state(false); + private _error = $state(null); + private _selectedModelId = $state(null); + private _selectedModelName = $state(null); + + constructor() { + const persisted = this.readPersistedSelection(); + if (persisted) { + this._selectedModelId = persisted.id; + this._selectedModelName = persisted.model; + } + } + + get models(): ModelOption[] { + return this._models; + } + + get loading(): boolean { + return this._loading; + } + + get updating(): boolean { + return this._updating; + } + + get error(): string | null { + return this._error; + } + + get selectedModelId(): string | null { + return this._selectedModelId; + } + + get selectedModelName(): string | null { + return this._selectedModelName; + } + + get selectedModel(): ModelOption | null { + if (!this._selectedModelId) { + return null; + } + + return this._models.find((model) => model.id === this._selectedModelId) ?? null; + } + + async fetch(force = false): Promise { + if (this._loading) return; + if (this._models.length > 0 && !force) return; + + this._loading = true; + this._error = null; + + try { + const response = await ModelsService.list(); + + const models: ModelOption[] = response.data.map((item, index) => { + const details = response.models?.[index]; + const rawCapabilities = Array.isArray(details?.capabilities) + ? [...(details?.capabilities ?? [])] + : []; + const displayNameSource = + details?.name && details.name.trim().length > 0 ? details.name : item.id; + const displayName = this.toDisplayName(displayNameSource); + + return { + id: item.id, + name: displayName, + model: details?.model || item.id, + description: details?.description, + capabilities: rawCapabilities.filter((value): value is string => Boolean(value)), + details: details?.details, + meta: item.meta ?? null + } satisfies ModelOption; + }); + + this._models = models; + + const persisted = this.readPersistedSelection(); + let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null; + let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null; + if (nextSelectionId) { + const match = models.find((model) => model.id === nextSelectionId); + if (match) { + nextSelectionId = match.id; + nextSelectionName = match.model; + } else if (models[0]) { + nextSelectionId = models[0].id; + nextSelectionName = models[0].model; + } else { + nextSelectionId = null; + nextSelectionName = null; + } + } else if (models[0]) { + nextSelectionId = models[0].id; + nextSelectionName = models[0].model; + } + + this._selectedModelId = nextSelectionId; + this._selectedModelName = nextSelectionName; + this.persistSelection( + nextSelectionId && nextSelectionName + ? { id: nextSelectionId, model: nextSelectionName } + : null + ); + } catch (error) { + this._models = []; + this._error = error instanceof Error ? error.message : 'Failed to load models'; + throw error; + } finally { + this._loading = false; + } + } + + async select(modelId: string): Promise { + if (!modelId || this._updating) { + return; + } + + if (this._selectedModelId === modelId) { + return; + } + + const option = this._models.find((model) => model.id === modelId); + if (!option) { + throw new Error('Selected model is not available'); + } + + this._updating = true; + this._error = null; + + try { + this._selectedModelId = option.id; + this._selectedModelName = option.model; + this.persistSelection({ id: option.id, model: option.model }); + } finally { + this._updating = false; + } + } + + private toDisplayName(id: string): string { + const segments = id.split(/\\|\//); + const candidate = segments.pop(); + return candidate && candidate.trim().length > 0 ? candidate : id; + } + + private readPersistedSelection(): PersistedModelSelection | null { + if (!browser) { + return null; + } + + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) { + return null; + } + + const parsed = JSON.parse(raw); + if (parsed && typeof parsed.id === 'string') { + const id = parsed.id; + const model = + typeof parsed.model === 'string' && parsed.model.length > 0 ? parsed.model : id; + return { id, model }; + } + } catch (error) { + console.warn('Failed to read model selection from localStorage:', error); + } + + return null; + } + + private persistSelection(selection: PersistedModelSelection | null): void { + if (!browser) { + return; + } + + try { + if (selection) { + localStorage.setItem(STORAGE_KEY, JSON.stringify(selection)); + } else { + localStorage.removeItem(STORAGE_KEY); + } + } catch (error) { + console.warn('Failed to persist model selection to localStorage:', error); + } + } +} + +export const modelsStore = new ModelsStore(); + +export const modelOptions = () => modelsStore.models; +export const modelsLoading = () => modelsStore.loading; +export const modelsUpdating = () => modelsStore.updating; +export const modelsError = () => modelsStore.error; +export const selectedModelId = () => modelsStore.selectedModelId; +export const selectedModelName = () => modelsStore.selectedModelName; +export const selectedModelOption = () => modelsStore.selectedModel; + +export const fetchModels = modelsStore.fetch.bind(modelsStore); +export const selectModel = modelsStore.select.bind(modelsStore); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index d0e60a6c13706..6d76ab1f68e9d 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -36,6 +36,41 @@ export interface ApiChatMessageData { timestamp?: number; } +export interface ApiModelDataEntry { + id: string; + object: string; + created: number; + owned_by: string; + meta?: Record | null; +} + +export interface ApiModelDetails { + name: string; + model: string; + modified_at?: string; + size?: string | number; + digest?: string; + type?: string; + description?: string; + tags?: string[]; + capabilities?: string[]; + parameters?: string; + details?: { + parent_model?: string; + format?: string; + family?: string; + families?: string[]; + parameter_size?: string; + quantization_level?: string; + }; +} + +export interface ApiModelListResponse { + object: string; + data: ApiModelDataEntry[]; + models?: ApiModelDetails[]; +} + export interface ApiLlamaCppServerProps { default_generation_settings: { id: number; @@ -120,6 +155,7 @@ export interface ApiChatCompletionRequest { content: string | ApiChatMessageContentPart[]; }>; stream?: boolean; + model?: string; // Reasoning parameters reasoning_format?: string; // Generation parameters @@ -150,10 +186,14 @@ export interface ApiChatCompletionRequest { } export interface ApiChatCompletionStreamChunk { + model?: string; choices: Array<{ + model?: string; + metadata?: { model?: string }; delta: { content?: string; reasoning_content?: string; + model?: string; }; }>; timings?: { @@ -167,10 +207,14 @@ export interface ApiChatCompletionStreamChunk { } export interface ApiChatCompletionResponse { + model?: string; choices: Array<{ + model?: string; + metadata?: { model?: string }; message: { content: string; reasoning_content?: string; + model?: string; }; }>; } diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 4311f779ad841..659fb0c7d1cf5 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -41,6 +41,7 @@ export interface SettingsChatServiceOptions { // Callbacks onChunk?: (chunk: string) => void; onReasoningChunk?: (chunk: string) => void; + onModel?: (model: string) => void; onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void; onError?: (error: Error) => void; } diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte index 0245cf3abcef4..6b64aa1e9fd6c 100644 --- a/tools/server/webui/src/routes/+layout.svelte +++ b/tools/server/webui/src/routes/+layout.svelte @@ -139,7 +139,7 @@ - +