diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 4f18a634ce545..36511b300c8e8 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
index c10d7dbf1d781..cee5352c960d0 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
@@ -2,23 +2,31 @@
import { PROCESSING_INFO_TIMEOUT } from '$lib/constants/processing-info';
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
import { slotsService } from '$lib/services/slots';
- import { isLoading, activeMessages, activeConversation } from '$lib/stores/chat.svelte';
+ import {
+ isConversationLoading,
+ activeMessages,
+ activeConversation
+ } from '$lib/stores/chat.svelte';
import { config } from '$lib/stores/settings.svelte';
const processingState = useProcessingState();
let processingDetails = $derived(processingState.getProcessingDetails());
- let showSlotsInfo = $derived(isLoading() || config().keepStatsVisible);
+ let isCurrentConversationLoading = $derived(
+ activeConversation() ? isConversationLoading(activeConversation()!.id) : false
+ );
+
+ let showSlotsInfo = $derived(isCurrentConversationLoading || config().keepStatsVisible);
$effect(() => {
const keepStatsVisible = config().keepStatsVisible;
- if (keepStatsVisible || isLoading()) {
+ if (keepStatsVisible || isCurrentConversationLoading) {
processingState.startMonitoring();
}
- if (!isLoading() && !keepStatsVisible) {
+ if (!isCurrentConversationLoading && !keepStatsVisible) {
setTimeout(() => {
if (!config().keepStatsVisible) {
processingState.stopMonitoring();
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
index 666febf0d28d6..5fe92ee9a81c5 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
@@ -23,6 +23,7 @@
activeConversation,
deleteConversation,
isLoading,
+ isConversationLoading,
sendMessage,
stopGeneration,
setMaxContextError
@@ -81,6 +82,10 @@
let isServerLoading = $derived(serverLoading());
+ let isCurrentConversationLoading = $derived(
+ activeConversation() ? isConversationLoading(activeConversation()!.id) : false
+ );
+
async function handleDeleteConfirm() {
const conversation = activeConversation();
if (conversation) {
@@ -261,7 +266,7 @@
});
$effect(() => {
- if (isLoading() && autoScrollEnabled) {
+ if (isCurrentConversationLoading && autoScrollEnabled) {
scrollInterval = setInterval(scrollChatToBottom, AUTO_SCROLL_INTERVAL);
} else if (scrollInterval) {
clearInterval(scrollInterval);
@@ -312,7 +317,7 @@
= new Map();
/**
* Sends a chat completion request to the llama.cpp server.
@@ -44,7 +44,8 @@ export class ChatService {
*/
async sendMessage(
messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
- options: SettingsChatServiceOptions = {}
+ options: SettingsChatServiceOptions = {},
+ conversationId?: string
): Promise {
const {
stream,
@@ -78,9 +79,17 @@ export class ChatService {
timings_per_token
} = options;
- // Cancel any ongoing request and create a new abort controller
- this.abort();
- this.abortController = new AbortController();
+ // Create or get abort controller for this conversation
+ const requestId = conversationId || 'default';
+
+ // Cancel any existing request for this conversation
+ if (this.abortControllers.has(requestId)) {
+ this.abortControllers.get(requestId)?.abort();
+ }
+
+ // Create new abort controller for this conversation
+ const abortController = new AbortController();
+ this.abortControllers.set(requestId, abortController);
// Convert database messages with attachments to API format if needed
const normalizedMessages: ApiChatMessageData[] = messages
@@ -171,7 +180,7 @@ export class ChatService {
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
},
body: JSON.stringify(requestBody),
- signal: this.abortController.signal
+ signal: abortController.signal
});
if (!response.ok) {
@@ -223,6 +232,8 @@ export class ChatService {
onError(userFriendlyError);
}
throw userFriendlyError;
+ } finally {
+ this.abortControllers.delete(requestId);
}
}
@@ -603,10 +614,20 @@ export class ChatService {
*
* @public
*/
- public abort(): void {
- if (this.abortController) {
- this.abortController.abort();
- this.abortController = null;
+ public abort(conversationId?: string): void {
+ if (conversationId) {
+ const abortController = this.abortControllers.get(conversationId);
+
+ if (abortController) {
+ abortController.abort();
+ this.abortControllers.delete(conversationId);
+ }
+ } else {
+ for (const controller of this.abortControllers.values()) {
+ controller.abort();
+ }
+
+ this.abortControllers.clear();
}
}
diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts
index 06c0a77de9138..73fd34d5b04fb 100644
--- a/tools/server/webui/src/lib/services/slots.ts
+++ b/tools/server/webui/src/lib/services/slots.ts
@@ -37,6 +37,9 @@ export class SlotsService {
private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set();
private isStreamingActive: boolean = false;
private lastKnownState: ApiProcessingState | null = null;
+ // Track per-conversation streaming states and timing data
+ private conversationStates: Map = new Map();
+ private activeConversationId: string | null = null;
/**
* Start streaming session tracking
@@ -52,6 +55,65 @@ export class SlotsService {
this.isStreamingActive = false;
}
+ /**
+ * Set the active conversation for statistics display
+ */
+ setActiveConversation(conversationId: string | null): void {
+ this.activeConversationId = conversationId;
+ // Update display to show stats for the active conversation
+ this.notifyCallbacks();
+ }
+
+ /**
+ * Update processing state for a specific conversation
+ */
+ updateConversationState(conversationId: string, state: ApiProcessingState | null): void {
+ this.conversationStates.set(conversationId, state);
+
+ // If this is the active conversation, update the display
+ if (conversationId === this.activeConversationId) {
+ this.lastKnownState = state;
+ this.notifyCallbacks();
+ }
+ }
+
+ /**
+ * Get processing state for a specific conversation
+ */
+ getConversationState(conversationId: string): ApiProcessingState | null {
+ return this.conversationStates.get(conversationId) || null;
+ }
+
+ /**
+ * Clear state for a specific conversation
+ */
+ clearConversationState(conversationId: string): void {
+ this.conversationStates.delete(conversationId);
+
+ // If this was the active conversation, clear display
+ if (conversationId === this.activeConversationId) {
+ this.lastKnownState = null;
+ this.notifyCallbacks();
+ }
+ }
+
+ /**
+ * Notify all callbacks with current state
+ */
+ private notifyCallbacks(): void {
+ const currentState = this.activeConversationId
+ ? this.conversationStates.get(this.activeConversationId) || null
+ : this.lastKnownState;
+
+ for (const callback of this.callbacks) {
+ try {
+ callback(currentState);
+ } catch (error) {
+ console.error('Error in slots service callback:', error);
+ }
+ }
+ }
+
/**
* Clear the current processing state
* Used when switching to a conversation without timing data
@@ -100,13 +162,16 @@ export class SlotsService {
/**
* Updates processing state with timing data from ChatService streaming response
*/
- async updateFromTimingData(timingData: {
- prompt_n: number;
- predicted_n: number;
- predicted_per_second: number;
- cache_n: number;
- prompt_progress?: ChatMessagePromptProgress;
- }): Promise {
+ async updateFromTimingData(
+ timingData: {
+ prompt_n: number;
+ predicted_n: number;
+ predicted_per_second: number;
+ cache_n: number;
+ prompt_progress?: ChatMessagePromptProgress;
+ },
+ conversationId?: string
+ ): Promise {
const processingState = await this.parseCompletionTimingData(timingData);
// Only update if we successfully parsed the state
@@ -115,14 +180,13 @@ export class SlotsService {
return;
}
- this.lastKnownState = processingState;
-
- for (const callback of this.callbacks) {
- try {
- callback(processingState);
- } catch (error) {
- console.error('Error in timing callback:', error);
- }
+ if (conversationId) {
+ // Update per-conversation state
+ this.updateConversationState(conversationId, processingState);
+ } else {
+ // Fallback to global state for backward compatibility
+ this.lastKnownState = processingState;
+ this.notifyCallbacks();
}
}
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 4a6d3a8c61f60..0d6d4dcfc0dc9 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -49,6 +49,11 @@ class ChatStore {
currentResponse = $state('');
isInitialized = $state(false);
isLoading = $state(false);
+ // Track loading and streaming state per conversation
+ conversationLoadingStates = $state