diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index c026f36c4844d..cd3d254b1c2b8 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
index 5976e5dd03d7b..adf9f880ae670 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebar.svelte
@@ -13,6 +13,7 @@
updateConversationName
} from '$lib/stores/chat.svelte';
import ChatSidebarActions from './ChatSidebarActions.svelte';
+ import ModelSelector from './ModelSelector.svelte';
const sidebar = Sidebar.useSidebar();
@@ -110,6 +111,8 @@
llama.cpp
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte
new file mode 100644
index 0000000000000..ca74610b4a179
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ModelSelector.svelte
@@ -0,0 +1,99 @@
+
+
+{#if loading && options.length === 0 && !isMounted}
+
+
+ Loading models…
+
+{:else if options.length === 0}
+ No models available.
+{:else}
+ {@const selectedOption = getDisplayOption()}
+
+
+
+ {selectedOption?.name || 'Select model'}
+
+ {#if updating}
+
+ {/if}
+
+
+
+ {#each options as option (option.id)}
+
+ {option.name}
+
+ {#if option.description}
+ {option.description}
+ {/if}
+
+ {/each}
+
+
+{/if}
+
+{#if error}
+ {error}
+{/if}
diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts
index 63a99f4343320..f9273162873b1 100644
--- a/tools/server/webui/src/lib/components/app/index.ts
+++ b/tools/server/webui/src/lib/components/app/index.ts
@@ -29,6 +29,7 @@ export { default as ChatSettingsFields } from './chat/ChatSettings/ChatSettingsF
export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte';
export { default as ChatSidebarConversationItem } from './chat/ChatSidebar/ChatSidebarConversationItem.svelte';
export { default as ChatSidebarSearch } from './chat/ChatSidebar/ChatSidebarSearch.svelte';
+export { default as ChatSidebarModelSelector } from './chat/ChatSidebar/ModelSelector.svelte';
export { default as ChatErrorDialog } from './dialogs/ChatErrorDialog.svelte';
export { default as EmptyFileAlertDialog } from './dialogs/EmptyFileAlertDialog.svelte';
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 37e60b85b5a6a..0c4375789acef 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -1,4 +1,5 @@
import { config } from '$lib/stores/settings.svelte';
+import { selectedModelName } from '$lib/stores/models.svelte';
import { slotsService } from './slots';
/**
* ChatService - Low-level API communication layer for llama.cpp server interactions
@@ -50,6 +51,8 @@ export class ChatService {
onChunk,
onComplete,
onError,
+ onReasoningChunk,
+ onModel,
// Generation parameters
temperature,
max_tokens,
@@ -118,6 +121,11 @@ export class ChatService {
stream
};
+ const activeModel = selectedModelName();
+ if (activeModel) {
+ requestBody.model = activeModel;
+ }
+
requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto';
if (temperature !== undefined) requestBody.temperature = temperature;
@@ -190,10 +198,11 @@ export class ChatService {
onChunk,
onComplete,
onError,
- options.onReasoningChunk
+ onReasoningChunk,
+ onModel
);
} else {
- return this.handleNonStreamResponse(response, onComplete, onError);
+ return this.handleNonStreamResponse(response, onComplete, onError, onModel);
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
@@ -251,7 +260,8 @@ export class ChatService {
timings?: ChatMessageTimings
) => void,
onError?: (error: Error) => void,
- onReasoningChunk?: (chunk: string) => void
+ onReasoningChunk?: (chunk: string) => void,
+ onModel?: (model: string) => void
): Promise {
const reader = response.body?.getReader();
@@ -265,6 +275,7 @@ export class ChatService {
let hasReceivedData = false;
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
+ let modelEmitted = false;
try {
let chunk = '';
@@ -274,7 +285,7 @@ export class ChatService {
chunk += decoder.decode(value, { stream: true });
const lines = chunk.split('\n');
- chunk = lines.pop() || ''; // Save incomplete line for next read
+ chunk = lines.pop() || '';
for (const line of lines) {
if (line.startsWith('data: ')) {
@@ -287,6 +298,12 @@ export class ChatService {
try {
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
+ const chunkModel = this.extractModelName(parsed);
+ if (chunkModel && !modelEmitted) {
+ modelEmitted = true;
+ onModel?.(chunkModel);
+ }
+
const content = parsed.choices[0]?.delta?.content;
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
const timings = parsed.timings;
@@ -295,7 +312,6 @@ export class ChatService {
if (timings || promptProgress) {
this.updateProcessingState(timings, promptProgress);
- // Store the latest timing data
if (timings) {
lastTimings = timings;
}
@@ -355,7 +371,8 @@ export class ChatService {
reasoningContent?: string,
timings?: ChatMessageTimings
) => void,
- onError?: (error: Error) => void
+ onError?: (error: Error) => void,
+ onModel?: (model: string) => void
): Promise {
try {
const responseText = await response.text();
@@ -366,6 +383,11 @@ export class ChatService {
}
const data: ApiChatCompletionResponse = JSON.parse(responseText);
+ const responseModel = this.extractModelName(data);
+ if (responseModel) {
+ onModel?.(responseModel);
+ }
+
const content = data.choices[0]?.message?.content || '';
const reasoningContent = data.choices[0]?.message?.reasoning_content;
@@ -588,6 +610,69 @@ export class ChatService {
}
}
+ private extractModelName(data: unknown): string | undefined {
+ if (!data || typeof data !== 'object') {
+ return undefined;
+ }
+
+ const record = data as Record;
+ const normalize = (value: unknown): string | undefined => {
+ if (typeof value !== 'string') {
+ return undefined;
+ }
+
+ const trimmed = value.trim();
+
+ return trimmed.length > 0 ? trimmed : undefined;
+ };
+
+ const rootModel = normalize(record['model']);
+ if (rootModel) {
+ return rootModel;
+ }
+
+ const choices = record['choices'];
+ if (!Array.isArray(choices) || choices.length === 0) {
+ return undefined;
+ }
+
+ const firstChoice = choices[0] as Record | undefined;
+ if (!firstChoice) {
+ return undefined;
+ }
+
+ const choiceModel = normalize(firstChoice['model']);
+ if (choiceModel) {
+ return choiceModel;
+ }
+
+ const delta = firstChoice['delta'] as Record | undefined;
+ if (delta) {
+ const deltaModel = normalize(delta['model']);
+ if (deltaModel) {
+ return deltaModel;
+ }
+ }
+
+ const message = firstChoice['message'] as Record | undefined;
+ if (message) {
+ const messageModel = normalize(message['model']);
+ if (messageModel) {
+ return messageModel;
+ }
+ }
+
+ const metadata = firstChoice['metadata'] as Record | undefined;
+ if (metadata) {
+ const metadataModel = normalize(metadata['model']);
+ if (metadataModel) {
+ return metadataModel;
+ }
+ }
+
+ return undefined;
+ }
+
private updateProcessingState(
timings?: ChatMessageTimings,
promptProgress?: ChatMessagePromptProgress
diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts
new file mode 100644
index 0000000000000..1c7fa3b45631c
--- /dev/null
+++ b/tools/server/webui/src/lib/services/models.ts
@@ -0,0 +1,22 @@
+import { base } from '$app/paths';
+import { config } from '$lib/stores/settings.svelte';
+import type { ApiModelListResponse } from '$lib/types/api';
+
+export class ModelsService {
+ static async list(): Promise {
+ const currentConfig = config();
+ const apiKey = currentConfig.apiKey?.toString().trim();
+
+ const response = await fetch(`${base}/v1/models`, {
+ headers: {
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
+ }
+ });
+
+ if (!response.ok) {
+ throw new Error(`Failed to fetch model list (status ${response.status})`);
+ }
+
+ return response.json() as Promise;
+ }
+}
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 5b77abb4cb21c..ed5b9127b22e9 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1,6 +1,5 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
-import { serverStore } from '$lib/stores/server.svelte';
import { config } from '$lib/stores/settings.svelte';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
@@ -300,30 +299,30 @@ class ChatStore {
): Promise {
let streamedContent = '';
let streamedReasoningContent = '';
- let modelCaptured = false;
+ let resolvedModel: string | null = null;
+ let modelPersisted = false;
- const captureModelIfNeeded = (updateDbImmediately = true): string | undefined => {
- if (!modelCaptured) {
- const currentModelName = serverStore.modelName;
+ const recordModel = (modelName: string, persistImmediately = true): void => {
+ const trimmedModel = modelName.trim();
- if (currentModelName) {
- if (updateDbImmediately) {
- DatabaseStore.updateMessage(assistantMessage.id, { model: currentModelName }).catch(
- console.error
- );
- }
+ if (!trimmedModel || trimmedModel === resolvedModel) {
+ return;
+ }
- const messageIndex = this.findMessageIndex(assistantMessage.id);
+ resolvedModel = trimmedModel;
- this.updateMessageAtIndex(messageIndex, { model: currentModelName });
- modelCaptured = true;
+ const messageIndex = this.findMessageIndex(assistantMessage.id);
- return currentModelName;
- }
+ this.updateMessageAtIndex(messageIndex, { model: trimmedModel });
+
+ if (persistImmediately && !modelPersisted) {
+ modelPersisted = true;
+ DatabaseStore.updateMessage(assistantMessage.id, { model: trimmedModel }).catch((error) => {
+ console.error('Failed to persist model name:', error);
+ modelPersisted = false;
+ });
}
- return undefined;
};
-
slotsService.startStreaming();
await chatService.sendMessage(allMessages, {
@@ -333,7 +332,6 @@ class ChatStore {
streamedContent += chunk;
this.currentResponse = streamedContent;
- captureModelIfNeeded();
const messageIndex = this.findMessageIndex(assistantMessage.id);
this.updateMessageAtIndex(messageIndex, {
content: streamedContent
@@ -343,13 +341,15 @@ class ChatStore {
onReasoningChunk: (reasoningChunk: string) => {
streamedReasoningContent += reasoningChunk;
- captureModelIfNeeded();
-
const messageIndex = this.findMessageIndex(assistantMessage.id);
this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
},
+ onModel: (modelName: string) => {
+ recordModel(modelName);
+ },
+
onComplete: async (
finalContent?: string,
reasoningContent?: string,
@@ -368,10 +368,9 @@ class ChatStore {
timings: timings
};
- const capturedModel = captureModelIfNeeded(false);
-
- if (capturedModel) {
- updateData.model = capturedModel;
+ if (resolvedModel && !modelPersisted) {
+ updateData.model = resolvedModel;
+ modelPersisted = true;
}
await DatabaseStore.updateMessage(assistantMessage.id, updateData);
diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts
new file mode 100644
index 0000000000000..967346fcc964a
--- /dev/null
+++ b/tools/server/webui/src/lib/stores/models.svelte.ts
@@ -0,0 +1,223 @@
+import { browser } from '$app/environment';
+import { ModelsService } from '$lib/services/models';
+import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
+
+export interface ModelOption {
+ id: string;
+ name: string;
+ model: string;
+ description?: string;
+ capabilities: string[];
+ details?: ApiModelDetails['details'];
+ meta?: ApiModelDataEntry['meta'];
+}
+
+type PersistedModelSelection = {
+ id: string;
+ model: string;
+};
+
+const STORAGE_KEY = 'llama.cpp:selectedModel';
+
+class ModelsStore {
+ private _models = $state([]);
+ private _loading = $state(false);
+ private _updating = $state(false);
+ private _error = $state(null);
+ private _selectedModelId = $state(null);
+ private _selectedModelName = $state(null);
+
+ constructor() {
+ const persisted = this.readPersistedSelection();
+ if (persisted) {
+ this._selectedModelId = persisted.id;
+ this._selectedModelName = persisted.model;
+ }
+ }
+
+ get models(): ModelOption[] {
+ return this._models;
+ }
+
+ get loading(): boolean {
+ return this._loading;
+ }
+
+ get updating(): boolean {
+ return this._updating;
+ }
+
+ get error(): string | null {
+ return this._error;
+ }
+
+ get selectedModelId(): string | null {
+ return this._selectedModelId;
+ }
+
+ get selectedModelName(): string | null {
+ return this._selectedModelName;
+ }
+
+ get selectedModel(): ModelOption | null {
+ if (!this._selectedModelId) {
+ return null;
+ }
+
+ return this._models.find((model) => model.id === this._selectedModelId) ?? null;
+ }
+
+ async fetch(force = false): Promise {
+ if (this._loading) return;
+ if (this._models.length > 0 && !force) return;
+
+ this._loading = true;
+ this._error = null;
+
+ try {
+ const response = await ModelsService.list();
+
+ const models: ModelOption[] = response.data.map((item, index) => {
+ const details = response.models?.[index];
+ const rawCapabilities = Array.isArray(details?.capabilities)
+ ? [...(details?.capabilities ?? [])]
+ : [];
+ const displayNameSource =
+ details?.name && details.name.trim().length > 0 ? details.name : item.id;
+ const displayName = this.toDisplayName(displayNameSource);
+
+ return {
+ id: item.id,
+ name: displayName,
+ model: details?.model || item.id,
+ description: details?.description,
+ capabilities: rawCapabilities.filter((value): value is string => Boolean(value)),
+ details: details?.details,
+ meta: item.meta ?? null
+ } satisfies ModelOption;
+ });
+
+ this._models = models;
+
+ const persisted = this.readPersistedSelection();
+ let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null;
+ let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null;
+ if (nextSelectionId) {
+ const match = models.find((model) => model.id === nextSelectionId);
+ if (match) {
+ nextSelectionId = match.id;
+ nextSelectionName = match.model;
+ } else if (models[0]) {
+ nextSelectionId = models[0].id;
+ nextSelectionName = models[0].model;
+ } else {
+ nextSelectionId = null;
+ nextSelectionName = null;
+ }
+ } else if (models[0]) {
+ nextSelectionId = models[0].id;
+ nextSelectionName = models[0].model;
+ }
+
+ this._selectedModelId = nextSelectionId;
+ this._selectedModelName = nextSelectionName;
+ this.persistSelection(
+ nextSelectionId && nextSelectionName
+ ? { id: nextSelectionId, model: nextSelectionName }
+ : null
+ );
+ } catch (error) {
+ this._models = [];
+ this._error = error instanceof Error ? error.message : 'Failed to load models';
+ throw error;
+ } finally {
+ this._loading = false;
+ }
+ }
+
+ async select(modelId: string): Promise {
+ if (!modelId || this._updating) {
+ return;
+ }
+
+ if (this._selectedModelId === modelId) {
+ return;
+ }
+
+ const option = this._models.find((model) => model.id === modelId);
+ if (!option) {
+ throw new Error('Selected model is not available');
+ }
+
+ this._updating = true;
+ this._error = null;
+
+ try {
+ this._selectedModelId = option.id;
+ this._selectedModelName = option.model;
+ this.persistSelection({ id: option.id, model: option.model });
+ } finally {
+ this._updating = false;
+ }
+ }
+
+ private toDisplayName(id: string): string {
+ const segments = id.split(/\\|\//);
+ const candidate = segments.pop();
+ return candidate && candidate.trim().length > 0 ? candidate : id;
+ }
+
+ private readPersistedSelection(): PersistedModelSelection | null {
+ if (!browser) {
+ return null;
+ }
+
+ try {
+ const raw = localStorage.getItem(STORAGE_KEY);
+ if (!raw) {
+ return null;
+ }
+
+ const parsed = JSON.parse(raw);
+ if (parsed && typeof parsed.id === 'string') {
+ const id = parsed.id;
+ const model =
+ typeof parsed.model === 'string' && parsed.model.length > 0 ? parsed.model : id;
+ return { id, model };
+ }
+ } catch (error) {
+ console.warn('Failed to read model selection from localStorage:', error);
+ }
+
+ return null;
+ }
+
+ private persistSelection(selection: PersistedModelSelection | null): void {
+ if (!browser) {
+ return;
+ }
+
+ try {
+ if (selection) {
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(selection));
+ } else {
+ localStorage.removeItem(STORAGE_KEY);
+ }
+ } catch (error) {
+ console.warn('Failed to persist model selection to localStorage:', error);
+ }
+ }
+}
+
+export const modelsStore = new ModelsStore();
+
+export const modelOptions = () => modelsStore.models;
+export const modelsLoading = () => modelsStore.loading;
+export const modelsUpdating = () => modelsStore.updating;
+export const modelsError = () => modelsStore.error;
+export const selectedModelId = () => modelsStore.selectedModelId;
+export const selectedModelName = () => modelsStore.selectedModelName;
+export const selectedModelOption = () => modelsStore.selectedModel;
+
+export const fetchModels = modelsStore.fetch.bind(modelsStore);
+export const selectModel = modelsStore.select.bind(modelsStore);
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index d0e60a6c13706..6d76ab1f68e9d 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -36,6 +36,41 @@ export interface ApiChatMessageData {
timestamp?: number;
}
+export interface ApiModelDataEntry {
+ id: string;
+ object: string;
+ created: number;
+ owned_by: string;
+ meta?: Record | null;
+}
+
+export interface ApiModelDetails {
+ name: string;
+ model: string;
+ modified_at?: string;
+ size?: string | number;
+ digest?: string;
+ type?: string;
+ description?: string;
+ tags?: string[];
+ capabilities?: string[];
+ parameters?: string;
+ details?: {
+ parent_model?: string;
+ format?: string;
+ family?: string;
+ families?: string[];
+ parameter_size?: string;
+ quantization_level?: string;
+ };
+}
+
+export interface ApiModelListResponse {
+ object: string;
+ data: ApiModelDataEntry[];
+ models?: ApiModelDetails[];
+}
+
export interface ApiLlamaCppServerProps {
default_generation_settings: {
id: number;
@@ -120,6 +155,7 @@ export interface ApiChatCompletionRequest {
content: string | ApiChatMessageContentPart[];
}>;
stream?: boolean;
+ model?: string;
// Reasoning parameters
reasoning_format?: string;
// Generation parameters
@@ -150,10 +186,14 @@ export interface ApiChatCompletionRequest {
}
export interface ApiChatCompletionStreamChunk {
+ model?: string;
choices: Array<{
+ model?: string;
+ metadata?: { model?: string };
delta: {
content?: string;
reasoning_content?: string;
+ model?: string;
};
}>;
timings?: {
@@ -167,10 +207,14 @@ export interface ApiChatCompletionStreamChunk {
}
export interface ApiChatCompletionResponse {
+ model?: string;
choices: Array<{
+ model?: string;
+ metadata?: { model?: string };
message: {
content: string;
reasoning_content?: string;
+ model?: string;
};
}>;
}
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 4311f779ad841..659fb0c7d1cf5 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -41,6 +41,7 @@ export interface SettingsChatServiceOptions {
// Callbacks
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
+ onModel?: (model: string) => void;
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
onError?: (error: Error) => void;
}
diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte
index 0245cf3abcef4..6b64aa1e9fd6c 100644
--- a/tools/server/webui/src/routes/+layout.svelte
+++ b/tools/server/webui/src/routes/+layout.svelte
@@ -139,7 +139,7 @@
-
+