Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified tools/server/public/index.html.gz
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@
let displayedModel = $derived((): string | null => {
if (!currentConfig.showModelInfo) return null;

if (currentConfig.modelSelectorEnabled) {
return message.model ?? null;
if (message.model) {
return message.model;
}

return serverModel;
Expand Down
20 changes: 16 additions & 4 deletions tools/server/webui/src/lib/services/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
// Generation parameters
temperature,
max_tokens,
Expand Down Expand Up @@ -201,6 +202,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
conversationId,
abortController.signal
);
Expand Down Expand Up @@ -267,6 +269,7 @@ export class ChatService {
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onFirstValidChunk?: () => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise<void> {
Expand All @@ -283,6 +286,7 @@ export class ChatService {
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
let firstValidChunkEmitted = false;

try {
let chunk = '';
Expand Down Expand Up @@ -311,17 +315,25 @@ export class ChatService {
try {
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);

const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
firstValidChunkEmitted = true;

if (!abortSignal?.aborted) {
onFirstValidChunk?.();
}
}

const content = parsed.choices[0]?.delta?.content;
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;

const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
}

if (timings || promptProgress) {
this.updateProcessingState(timings, promptProgress, conversationId);
if (timings) {
Expand Down
54 changes: 52 additions & 2 deletions tools/server/webui/src/lib/stores/chat.svelte.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
import { config } from '$lib/stores/settings.svelte';
import { serverStore } from '$lib/stores/server.svelte';
import { normalizeModelName } from '$lib/utils/model-names';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
Expand Down Expand Up @@ -362,9 +363,41 @@ class ChatStore {

let resolvedModel: string | null = null;
let modelPersisted = false;
const currentConfig = config();
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
let serverPropsRefreshed = false;
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;

const refreshServerPropsOnce = () => {
if (serverPropsRefreshed) {
return;
}

serverPropsRefreshed = true;

const hasExistingProps = serverStore.serverProps !== null;

const recordModel = (modelName: string, persistImmediately = true): void => {
const normalizedModel = normalizeModelName(modelName);
serverStore
.fetchServerProps({ silent: hasExistingProps })
.then(() => {
updateModelFromServerProps?.(true);
})
.catch((error) => {
console.warn('Failed to refresh server props after streaming started:', error);
});
};

const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
const serverModelName = serverStore.modelName;
const preferredModelSource = preferServerPropsModel
? (serverModelName ?? modelName ?? null)
: (modelName ?? serverModelName ?? null);

if (!preferredModelSource) {
return;
}

const normalizedModel = normalizeModelName(preferredModelSource);

if (!normalizedModel || normalizedModel === resolvedModel) {
return;
Expand All @@ -388,6 +421,20 @@ class ChatStore {
}
};

if (preferServerPropsModel) {
updateModelFromServerProps = (persistImmediately = true) => {
const currentServerModel = serverStore.modelName;

if (!currentServerModel) {
return;
}

recordModel(currentServerModel, persistImmediately);
};

updateModelFromServerProps(false);
}

slotsService.startStreaming();
slotsService.setActiveConversation(assistantMessage.convId);

Expand All @@ -396,6 +443,9 @@ class ChatStore {
{
...this.getApiOptions(),

onFirstValidChunk: () => {
refreshServerPropsOnce();
},
onChunk: (chunk: string) => {
streamedContent += chunk;
this.setConversationStreaming(
Expand Down
174 changes: 110 additions & 64 deletions tools/server/webui/src/lib/stores/server.svelte.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class ServerStore {
private _error = $state<string | null>(null);
private _serverWarning = $state<string | null>(null);
private _slotsEndpointAvailable = $state<boolean | null>(null);
private fetchServerPropsPromise: Promise<void> | null = null;

private readCachedServerProps(): ApiLlamaCppServerProps | null {
if (!browser) return null;
Expand Down Expand Up @@ -171,88 +172,132 @@ class ServerStore {
/**
* Fetches server properties from the server
*/
async fetchServerProps(): Promise<void> {
this._loading = true;
this._error = null;
this._serverWarning = null;
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
const { silent = false } = options;
const isSilent = silent && this._serverProps !== null;

try {
console.log('Fetching server properties...');
const props = await ChatService.getServerProps();
this._serverProps = props;
this.persistServerProps(props);
console.log('Server properties loaded:', props);

// Check slots endpoint availability after server props are loaded
await this.checkSlotsEndpointAvailability();
} catch (error) {
const hadCachedProps = this._serverProps !== null;
let errorMessage = 'Failed to connect to server';
let isOfflineLikeError = false;
let isServerSideError = false;

if (error instanceof Error) {
// Handle specific error types with user-friendly messages
if (error.name === 'TypeError' && error.message.includes('fetch')) {
errorMessage = 'Server is not running or unreachable';
isOfflineLikeError = true;
} else if (error.message.includes('ECONNREFUSED')) {
errorMessage = 'Connection refused - server may be offline';
isOfflineLikeError = true;
} else if (error.message.includes('ENOTFOUND')) {
errorMessage = 'Server not found - check server address';
isOfflineLikeError = true;
} else if (error.message.includes('ETIMEDOUT')) {
errorMessage = 'Request timed out - the server took too long to respond';
isOfflineLikeError = true;
} else if (error.message.includes('503')) {
errorMessage = 'Server temporarily unavailable - try again shortly';
isServerSideError = true;
} else if (error.message.includes('500')) {
errorMessage = 'Server error - check server logs';
isServerSideError = true;
} else if (error.message.includes('404')) {
errorMessage = 'Server endpoint not found';
} else if (error.message.includes('403') || error.message.includes('401')) {
errorMessage = 'Access denied';
if (this.fetchServerPropsPromise) {
return this.fetchServerPropsPromise;
}

if (!isSilent) {
this._loading = true;
this._error = null;
this._serverWarning = null;
}

const hadProps = this._serverProps !== null;

const fetchPromise = (async () => {
try {
const props = await ChatService.getServerProps();
this._serverProps = props;
this.persistServerProps(props);
this._error = null;
this._serverWarning = null;
await this.checkSlotsEndpointAvailability();
} catch (error) {
if (isSilent && hadProps) {
console.warn('Silent server props refresh failed, keeping cached data:', error);
return;
}

this.handleFetchServerPropsError(error, hadProps);
} finally {
if (!isSilent) {
this._loading = false;
}

this.fetchServerPropsPromise = null;
}
})();

this.fetchServerPropsPromise = fetchPromise;

await fetchPromise;
}

let cachedProps: ApiLlamaCppServerProps | null = null;
/**
* Handles fetch failures by attempting to recover cached server props and
* updating the user-facing error or warning state appropriately.
*/
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);

if (!hadCachedProps) {
cachedProps = this.readCachedServerProps();
if (cachedProps) {
this._serverProps = cachedProps;
this._error = null;
let cachedProps: ApiLlamaCppServerProps | null = null;

if (isOfflineLikeError || isServerSideError) {
this._serverWarning = errorMessage;
}
if (!hadProps) {
cachedProps = this.readCachedServerProps();

console.warn(
'Failed to refresh server properties, using cached values from localStorage:',
errorMessage
);
} else {
this._error = errorMessage;
}
} else {
if (cachedProps) {
this._serverProps = cachedProps;
this._error = null;

if (isOfflineLikeError || isServerSideError) {
this._serverWarning = errorMessage;
}

console.warn(
'Failed to refresh server properties, continuing with cached values:',
'Failed to refresh server properties, using cached values from localStorage:',
errorMessage
);
} else {
this._error = errorMessage;
}
} else {
this._error = null;

if (isOfflineLikeError || isServerSideError) {
this._serverWarning = errorMessage;
}
console.error('Error fetching server properties:', error);
} finally {
this._loading = false;

console.warn(
'Failed to refresh server properties, continuing with cached values:',
errorMessage
);
}

console.error('Error fetching server properties:', error);
}

private normalizeFetchError(error: unknown): {
errorMessage: string;
isOfflineLikeError: boolean;
isServerSideError: boolean;
} {
let errorMessage = 'Failed to connect to server';
let isOfflineLikeError = false;
let isServerSideError = false;

if (error instanceof Error) {
const message = error.message || '';

if (error.name === 'TypeError' && message.includes('fetch')) {
errorMessage = 'Server is not running or unreachable';
isOfflineLikeError = true;
} else if (message.includes('ECONNREFUSED')) {
errorMessage = 'Connection refused - server may be offline';
isOfflineLikeError = true;
} else if (message.includes('ENOTFOUND')) {
errorMessage = 'Server not found - check server address';
isOfflineLikeError = true;
} else if (message.includes('ETIMEDOUT')) {
errorMessage = 'Request timed out - the server took too long to respond';
isOfflineLikeError = true;
} else if (message.includes('503')) {
errorMessage = 'Server temporarily unavailable - try again shortly';
isServerSideError = true;
} else if (message.includes('500')) {
errorMessage = 'Server error - check server logs';
isServerSideError = true;
} else if (message.includes('404')) {
errorMessage = 'Server endpoint not found';
} else if (message.includes('403') || message.includes('401')) {
errorMessage = 'Access denied';
}
}

return { errorMessage, isOfflineLikeError, isServerSideError };
}

/**
Expand All @@ -264,6 +309,7 @@ class ServerStore {
this._serverWarning = null;
this._loading = false;
this._slotsEndpointAvailable = null;
this.fetchServerPropsPromise = null;
this.persistServerProps(null);
}
}
Expand Down
1 change: 1 addition & 0 deletions tools/server/webui/src/lib/types/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
}

export interface ApiChatCompletionStreamChunk {
object?: string;
model?: string;
choices: Array<{
model?: string;
Expand Down
1 change: 1 addition & 0 deletions tools/server/webui/src/lib/types/settings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export interface SettingsChatServiceOptions {
onChunk?: (chunk: string) => void;
onReasoningChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
onFirstValidChunk?: () => void;
onComplete?: (response: string, reasoningContent?: string, timings?: ChatMessageTimings) => void;
onError?: (error: Error) => void;
}
Expand Down