diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 4f18a634ce545..36511b300c8e8 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte index c10d7dbf1d781..cee5352c960d0 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte @@ -2,23 +2,31 @@ import { PROCESSING_INFO_TIMEOUT } from '$lib/constants/processing-info'; import { useProcessingState } from '$lib/hooks/use-processing-state.svelte'; import { slotsService } from '$lib/services/slots'; - import { isLoading, activeMessages, activeConversation } from '$lib/stores/chat.svelte'; + import { + isConversationLoading, + activeMessages, + activeConversation + } from '$lib/stores/chat.svelte'; import { config } from '$lib/stores/settings.svelte'; const processingState = useProcessingState(); let processingDetails = $derived(processingState.getProcessingDetails()); - let showSlotsInfo = $derived(isLoading() || config().keepStatsVisible); + let isCurrentConversationLoading = $derived( + activeConversation() ? isConversationLoading(activeConversation()!.id) : false + ); + + let showSlotsInfo = $derived(isCurrentConversationLoading || config().keepStatsVisible); $effect(() => { const keepStatsVisible = config().keepStatsVisible; - if (keepStatsVisible || isLoading()) { + if (keepStatsVisible || isCurrentConversationLoading) { processingState.startMonitoring(); } - if (!isLoading() && !keepStatsVisible) { + if (!isCurrentConversationLoading && !keepStatsVisible) { setTimeout(() => { if (!config().keepStatsVisible) { processingState.stopMonitoring(); diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte index 666febf0d28d6..5fe92ee9a81c5 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte @@ -23,6 +23,7 @@ activeConversation, deleteConversation, isLoading, + isConversationLoading, sendMessage, stopGeneration, setMaxContextError @@ -81,6 +82,10 @@ let isServerLoading = $derived(serverLoading()); + let isCurrentConversationLoading = $derived( + activeConversation() ? isConversationLoading(activeConversation()!.id) : false + ); + async function handleDeleteConfirm() { const conversation = activeConversation(); if (conversation) { @@ -261,7 +266,7 @@ }); $effect(() => { - if (isLoading() && autoScrollEnabled) { + if (isCurrentConversationLoading && autoScrollEnabled) { scrollInterval = setInterval(scrollChatToBottom, AUTO_SCROLL_INTERVAL); } else if (scrollInterval) { clearInterval(scrollInterval); @@ -312,7 +317,7 @@
= new Map(); /** * Sends a chat completion request to the llama.cpp server. @@ -44,7 +44,8 @@ export class ChatService { */ async sendMessage( messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[], - options: SettingsChatServiceOptions = {} + options: SettingsChatServiceOptions = {}, + conversationId?: string ): Promise { const { stream, @@ -78,9 +79,17 @@ export class ChatService { timings_per_token } = options; - // Cancel any ongoing request and create a new abort controller - this.abort(); - this.abortController = new AbortController(); + // Create or get abort controller for this conversation + const requestId = conversationId || 'default'; + + // Cancel any existing request for this conversation + if (this.abortControllers.has(requestId)) { + this.abortControllers.get(requestId)?.abort(); + } + + // Create new abort controller for this conversation + const abortController = new AbortController(); + this.abortControllers.set(requestId, abortController); // Convert database messages with attachments to API format if needed const normalizedMessages: ApiChatMessageData[] = messages @@ -171,7 +180,7 @@ export class ChatService { ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) }, body: JSON.stringify(requestBody), - signal: this.abortController.signal + signal: abortController.signal }); if (!response.ok) { @@ -223,6 +232,8 @@ export class ChatService { onError(userFriendlyError); } throw userFriendlyError; + } finally { + this.abortControllers.delete(requestId); } } @@ -603,10 +614,20 @@ export class ChatService { * * @public */ - public abort(): void { - if (this.abortController) { - this.abortController.abort(); - this.abortController = null; + public abort(conversationId?: string): void { + if (conversationId) { + const abortController = this.abortControllers.get(conversationId); + + if (abortController) { + abortController.abort(); + this.abortControllers.delete(conversationId); + } + } else { + for (const controller of this.abortControllers.values()) { + controller.abort(); + } + + this.abortControllers.clear(); } } diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts index 06c0a77de9138..73fd34d5b04fb 100644 --- a/tools/server/webui/src/lib/services/slots.ts +++ b/tools/server/webui/src/lib/services/slots.ts @@ -37,6 +37,9 @@ export class SlotsService { private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set(); private isStreamingActive: boolean = false; private lastKnownState: ApiProcessingState | null = null; + // Track per-conversation streaming states and timing data + private conversationStates: Map = new Map(); + private activeConversationId: string | null = null; /** * Start streaming session tracking @@ -52,6 +55,65 @@ export class SlotsService { this.isStreamingActive = false; } + /** + * Set the active conversation for statistics display + */ + setActiveConversation(conversationId: string | null): void { + this.activeConversationId = conversationId; + // Update display to show stats for the active conversation + this.notifyCallbacks(); + } + + /** + * Update processing state for a specific conversation + */ + updateConversationState(conversationId: string, state: ApiProcessingState | null): void { + this.conversationStates.set(conversationId, state); + + // If this is the active conversation, update the display + if (conversationId === this.activeConversationId) { + this.lastKnownState = state; + this.notifyCallbacks(); + } + } + + /** + * Get processing state for a specific conversation + */ + getConversationState(conversationId: string): ApiProcessingState | null { + return this.conversationStates.get(conversationId) || null; + } + + /** + * Clear state for a specific conversation + */ + clearConversationState(conversationId: string): void { + this.conversationStates.delete(conversationId); + + // If this was the active conversation, clear display + if (conversationId === this.activeConversationId) { + this.lastKnownState = null; + this.notifyCallbacks(); + } + } + + /** + * Notify all callbacks with current state + */ + private notifyCallbacks(): void { + const currentState = this.activeConversationId + ? this.conversationStates.get(this.activeConversationId) || null + : this.lastKnownState; + + for (const callback of this.callbacks) { + try { + callback(currentState); + } catch (error) { + console.error('Error in slots service callback:', error); + } + } + } + /** * Clear the current processing state * Used when switching to a conversation without timing data @@ -100,13 +162,16 @@ export class SlotsService { /** * Updates processing state with timing data from ChatService streaming response */ - async updateFromTimingData(timingData: { - prompt_n: number; - predicted_n: number; - predicted_per_second: number; - cache_n: number; - prompt_progress?: ChatMessagePromptProgress; - }): Promise { + async updateFromTimingData( + timingData: { + prompt_n: number; + predicted_n: number; + predicted_per_second: number; + cache_n: number; + prompt_progress?: ChatMessagePromptProgress; + }, + conversationId?: string + ): Promise { const processingState = await this.parseCompletionTimingData(timingData); // Only update if we successfully parsed the state @@ -115,14 +180,13 @@ export class SlotsService { return; } - this.lastKnownState = processingState; - - for (const callback of this.callbacks) { - try { - callback(processingState); - } catch (error) { - console.error('Error in timing callback:', error); - } + if (conversationId) { + // Update per-conversation state + this.updateConversationState(conversationId, processingState); + } else { + // Fallback to global state for backward compatibility + this.lastKnownState = processingState; + this.notifyCallbacks(); } } diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 4a6d3a8c61f60..0d6d4dcfc0dc9 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -49,6 +49,11 @@ class ChatStore { currentResponse = $state(''); isInitialized = $state(false); isLoading = $state(false); + // Track loading and streaming state per conversation + conversationLoadingStates = $state>(new Map()); + conversationStreamingStates = $state>( + new Map() + ); maxContextError = $state<{ message: string; estimatedTokens: number; maxContext: number } | null>( null ); @@ -60,6 +65,47 @@ class ChatStore { } } + /** + * Helper methods for per-conversation loading state management + */ + private setConversationLoading(convId: string, loading: boolean): void { + if (loading) { + this.conversationLoadingStates.set(convId, true); + } else { + this.conversationLoadingStates.delete(convId); + } + // Update global isLoading for backward compatibility (active conversation only) + if (this.activeConversation?.id === convId) { + this.isLoading = loading; + } + } + + private isConversationLoading(convId: string): boolean { + return this.conversationLoadingStates.get(convId) || false; + } + + private setConversationStreaming(convId: string, response: string, messageId: string): void { + this.conversationStreamingStates.set(convId, { response, messageId }); + // Update global currentResponse for backward compatibility (active conversation only) + if (this.activeConversation?.id === convId) { + this.currentResponse = response; + } + } + + private clearConversationStreaming(convId: string): void { + this.conversationStreamingStates.delete(convId); + // Clear global currentResponse for backward compatibility (active conversation only) + if (this.activeConversation?.id === convId) { + this.currentResponse = ''; + } + } + + private getConversationStreaming( + convId: string + ): { response: string; messageId: string } | undefined { + return this.conversationStreamingStates.get(convId); + } + /** * Initializes the chat store by loading conversations from the database * Sets up the initial state and loads existing conversations @@ -120,6 +166,9 @@ class ChatStore { this.activeConversation = conversation; + // Set this conversation as active for statistics display + slotsService.setActiveConversation(convId); + if (conversation.currNode) { const allMessages = await DatabaseStore.getConversationMessages(convId); this.activeMessages = filterByLeafNodeId( @@ -311,134 +360,150 @@ class ChatStore { let streamedReasoningContent = ''; slotsService.startStreaming(); + // Set this conversation as active for statistics display + slotsService.setActiveConversation(assistantMessage.convId); - await chatService.sendMessage(allMessages, { - ...this.getApiOptions(), + await chatService.sendMessage( + allMessages, + { + ...this.getApiOptions(), + + onChunk: (chunk: string) => { + streamedContent += chunk; + // Update per-conversation streaming state + this.setConversationStreaming( + assistantMessage.convId, + streamedContent, + assistantMessage.id + ); - onChunk: (chunk: string) => { - streamedContent += chunk; - this.currentResponse = streamedContent; + const partialThinking = extractPartialThinking(streamedContent); + const messageIndex = this.findMessageIndex(assistantMessage.id); + this.updateMessageAtIndex(messageIndex, { + content: partialThinking.remainingContent || streamedContent + }); + }, - const partialThinking = extractPartialThinking(streamedContent); - const messageIndex = this.findMessageIndex(assistantMessage.id); - this.updateMessageAtIndex(messageIndex, { - content: partialThinking.remainingContent || streamedContent - }); - }, + onReasoningChunk: (reasoningChunk: string) => { + streamedReasoningContent += reasoningChunk; + const messageIndex = this.findMessageIndex(assistantMessage.id); + this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); + }, - onReasoningChunk: (reasoningChunk: string) => { - streamedReasoningContent += reasoningChunk; - const messageIndex = this.findMessageIndex(assistantMessage.id); - this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); - }, + onComplete: async ( + finalContent?: string, + reasoningContent?: string, + timings?: ChatMessageTimings + ) => { + slotsService.stopStreaming(); - onComplete: async ( - finalContent?: string, - reasoningContent?: string, - timings?: ChatMessageTimings - ) => { - slotsService.stopStreaming(); - - await DatabaseStore.updateMessage(assistantMessage.id, { - content: finalContent || streamedContent, - thinking: reasoningContent || streamedReasoningContent, - timings: timings - }); + await DatabaseStore.updateMessage(assistantMessage.id, { + content: finalContent || streamedContent, + thinking: reasoningContent || streamedReasoningContent, + timings: timings + }); - const messageIndex = this.findMessageIndex(assistantMessage.id); + const messageIndex = this.findMessageIndex(assistantMessage.id); - this.updateMessageAtIndex(messageIndex, { - timings: timings - }); + this.updateMessageAtIndex(messageIndex, { + timings: timings + }); - await DatabaseStore.updateCurrentNode(this.activeConversation!.id, assistantMessage.id); - this.activeConversation!.currNode = assistantMessage.id; - await this.refreshActiveMessages(); + await DatabaseStore.updateCurrentNode(this.activeConversation!.id, assistantMessage.id); + this.activeConversation!.currNode = assistantMessage.id; - if (onComplete) { - await onComplete(streamedContent); - } + await this.refreshActiveMessages(); - this.isLoading = false; - this.currentResponse = ''; - }, + if (onComplete) { + await onComplete(streamedContent); + } - onError: (error: Error) => { - slotsService.stopStreaming(); + // Clear per-conversation loading and streaming states + this.setConversationLoading(assistantMessage.convId, false); + this.clearConversationStreaming(assistantMessage.convId); + slotsService.clearConversationState(assistantMessage.convId); + }, - if (error.name === 'AbortError' || error instanceof DOMException) { - this.isLoading = false; - this.currentResponse = ''; - return; - } + onError: (error: Error) => { + slotsService.stopStreaming(); - if (error.name === 'ContextError') { - console.warn('Context error detected:', error.message); - this.isLoading = false; - this.currentResponse = ''; + if (this.isAbortError(error)) { + this.setConversationLoading(assistantMessage.convId, false); + this.clearConversationStreaming(assistantMessage.convId); + slotsService.clearConversationState(assistantMessage.convId); + return; + } - const messageIndex = this.activeMessages.findIndex( - (m: DatabaseMessage) => m.id === assistantMessage.id - ); + if (error.name === 'ContextError') { + console.warn('Context error detected:', error.message); + this.setConversationLoading(assistantMessage.convId, false); + this.clearConversationStreaming(assistantMessage.convId); - if (messageIndex !== -1) { - this.activeMessages.splice(messageIndex, 1); - DatabaseStore.deleteMessage(assistantMessage.id).catch(console.error); - } + const messageIndex = this.activeMessages.findIndex( + (m: DatabaseMessage) => m.id === assistantMessage.id + ); - // Use structured context info from new exceed_context_size_error format if available - const contextInfo = ( - error as Error & { - contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number }; + if (messageIndex !== -1) { + this.activeMessages.splice(messageIndex, 1); + DatabaseStore.deleteMessage(assistantMessage.id).catch(console.error); } - ).contextInfo; - let estimatedTokens = 0; - let maxContext = serverStore.serverProps?.default_generation_settings.n_ctx || 8192; - - if (contextInfo) { - // Use precise token counts from server response - estimatedTokens = contextInfo.promptTokens; - maxContext = contextInfo.maxContext; - } else { - // Fallback to estimation for older error format - try { - // Rough estimation: ~4 characters per token - const messageContent = JSON.stringify(messages); - estimatedTokens = Math.ceil(messageContent.length / 4); - } catch { - estimatedTokens = 0; + + // Use structured context info from new exceed_context_size_error format if available + const contextInfo = ( + error as Error & { + contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number }; + } + ).contextInfo; + let estimatedTokens = 0; + let maxContext = serverStore.serverProps?.default_generation_settings.n_ctx || 8192; + + if (contextInfo) { + // Use precise token counts from server response + estimatedTokens = contextInfo.promptTokens; + maxContext = contextInfo.maxContext; + } else { + // Fallback to estimation for older error format + try { + // Rough estimation: ~4 characters per token + const messageContent = JSON.stringify(messages); + estimatedTokens = Math.ceil(messageContent.length / 4); + } catch { + estimatedTokens = 0; + } } - } - this.maxContextError = { - message: error.message, - estimatedTokens, - maxContext - }; + this.maxContextError = { + message: error.message, + estimatedTokens, + maxContext + }; - if (onError) { - onError(error); + if (onError) { + onError(error); + } + return; } - return; - } - console.error('Streaming error:', error); - this.isLoading = false; - this.currentResponse = ''; + console.error('Streaming error:', error); + this.setConversationLoading(assistantMessage.convId, false); + this.clearConversationStreaming(assistantMessage.convId); + slotsService.clearConversationState(assistantMessage.convId); - const messageIndex = this.activeMessages.findIndex( - (m: DatabaseMessage) => m.id === assistantMessage.id - ); + const messageIndex = this.activeMessages.findIndex( + (m: DatabaseMessage) => m.id === assistantMessage.id + ); - if (messageIndex !== -1) { - this.activeMessages[messageIndex].content = `Error: ${error.message}`; - } + if (messageIndex !== -1) { + this.activeMessages[messageIndex].content = `Error: ${error.message}`; + } - if (onError) { - onError(error); + if (onError) { + onError(error); + } } - } - }); + }, + assistantMessage.convId + ); } /** @@ -518,7 +583,12 @@ class ChatStore { * @param extras - Optional extra data (files, attachments, etc.) */ async sendMessage(content: string, extras?: DatabaseMessageExtra[]): Promise { - if ((!content.trim() && (!extras || extras.length === 0)) || this.isLoading) return; + if (!content.trim() && (!extras || extras.length === 0)) return; + + if (this.activeConversation && this.isConversationLoading(this.activeConversation.id)) { + console.log('Cannot send message: current conversation is already processing a message'); + return; + } let isNewConversation = false; @@ -532,8 +602,9 @@ class ChatStore { return; } - this.isLoading = true; - this.currentResponse = ''; + // Set loading state for this specific conversation + this.setConversationLoading(this.activeConversation.id, true); + this.clearConversationStreaming(this.activeConversation.id); let userMessage: DatabaseMessage | null = null; @@ -571,7 +642,7 @@ class ChatStore { }); } catch (error) { if (this.isAbortError(error)) { - this.isLoading = false; + this.setConversationLoading(this.activeConversation!.id, false); return; } @@ -584,7 +655,7 @@ class ChatStore { } console.error('Failed to send message:', error); - this.isLoading = false; + this.setConversationLoading(this.activeConversation!.id, false); } } @@ -596,6 +667,9 @@ class ChatStore { slotsService.stopStreaming(); chatService.abort(); this.savePartialResponseIfNeeded(); + + this.conversationLoadingStates.clear(); + this.conversationStreamingStates.clear(); this.isLoading = false; this.currentResponse = ''; } @@ -609,6 +683,9 @@ class ChatStore { slotsService.stopStreaming(); chatService.abort(); await this.savePartialResponseIfNeeded(); + + this.conversationLoadingStates.clear(); + this.conversationStreamingStates.clear(); this.isLoading = false; this.currentResponse = ''; } @@ -1030,12 +1107,13 @@ class ChatStore { /** * Clears the active conversation and resets state * Used when navigating away from chat or starting fresh + * Note: Does not stop ongoing streaming to allow background completion */ clearActiveConversation(): void { this.activeConversation = null; this.activeMessages = []; - this.currentResponse = ''; - this.isLoading = false; + // Don't clear currentResponse or isLoading to allow streaming to continue in background + // The streaming will complete and save to database automatically this.maxContextError = null; } @@ -1366,6 +1444,27 @@ class ChatStore { this.isLoading = false; } } + + /** + * Public methods for accessing per-conversation states + */ + public isConversationLoadingPublic(convId: string): boolean { + return this.isConversationLoading(convId); + } + + public getConversationStreamingPublic( + convId: string + ): { response: string; messageId: string } | undefined { + return this.getConversationStreaming(convId); + } + + public getAllLoadingConversations(): string[] { + return Array.from(this.conversationLoadingStates.keys()); + } + + public getAllStreamingConversations(): string[] { + return Array.from(this.conversationStreamingStates.keys()); + } } export const chatStore = new ChatStore(); @@ -1402,3 +1501,11 @@ export function stopGeneration() { chatStore.stopGeneration(); } export const messages = () => chatStore.activeMessages; + +// Per-conversation state access +export const isConversationLoading = (convId: string) => + chatStore.isConversationLoadingPublic(convId); +export const getConversationStreaming = (convId: string) => + chatStore.getConversationStreamingPublic(convId); +export const getAllLoadingConversations = () => chatStore.getAllLoadingConversations(); +export const getAllStreamingConversations = () => chatStore.getAllStreamingConversations(); diff --git a/tools/server/webui/src/routes/chat/[id]/+page.svelte b/tools/server/webui/src/routes/chat/[id]/+page.svelte index 5b6c73d6d4796..d71cb8ac33324 100644 --- a/tools/server/webui/src/routes/chat/[id]/+page.svelte +++ b/tools/server/webui/src/routes/chat/[id]/+page.svelte @@ -1,43 +1,19 @@