Skip to content

Commit 930f9ab

Browse files
refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing - Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops - Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic - Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
1 parent 3fe7e66 commit 930f9ab

File tree

6 files changed

+51
-274
lines changed

6 files changed

+51
-274
lines changed

tools/server/public/index.html.gz

1.9 KB
Binary file not shown.

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
<script lang="ts">
22
import { getDeletionInfo } from '$lib/stores/chat.svelte';
33
import { copyToClipboard } from '$lib/utils/copy';
4-
import { parseThinkingContent } from '$lib/utils/thinking';
54
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
65
import ChatMessageUser from './ChatMessageUser.svelte';
76
@@ -47,25 +46,13 @@
4746
4847
let thinkingContent = $derived.by(() => {
4948
if (message.role === 'assistant') {
50-
if (message.thinking) {
51-
return message.thinking;
52-
}
53-
54-
const parsed = parseThinkingContent(message.content);
55-
56-
return parsed.thinking;
49+
const trimmedThinking = message.thinking?.trim();
50+
return trimmedThinking ? trimmedThinking : null;
5751
}
5852
return null;
5953
});
6054
61-
let messageContent = $derived.by(() => {
62-
if (message.role === 'assistant') {
63-
const parsed = parseThinkingContent(message.content);
64-
return parsed.cleanContent?.replace('<|channel|>analysis', '');
65-
}
66-
67-
return message.content?.replace('<|channel|>analysis', '');
68-
});
55+
let messageContent = $derived.by(() => message.content);
6956
7057
function handleCancelEdit() {
7158
isEditing = false;

tools/server/webui/src/lib/services/chat.ts

Lines changed: 15 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,8 @@ export class ChatService {
262262
}
263263

264264
const decoder = new TextDecoder();
265-
let fullResponse = '';
265+
let aggregatedContent = '';
266266
let fullReasoningContent = '';
267-
let regularContent = '';
268-
let insideThinkTag = false;
269267
let hasReceivedData = false;
270268
let lastTimings: ChatMessageTimings | undefined;
271269

@@ -283,7 +281,7 @@ export class ChatService {
283281
if (line.startsWith('data: ')) {
284282
const data = line.slice(6);
285283
if (data === '[DONE]') {
286-
if (!hasReceivedData && fullResponse.length === 0) {
284+
if (!hasReceivedData && aggregatedContent.length === 0) {
287285
const contextError = new Error(
288286
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
289287
);
@@ -292,7 +290,7 @@ export class ChatService {
292290
return;
293291
}
294292

295-
onComplete?.(regularContent, fullReasoningContent || undefined, lastTimings);
293+
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
296294

297295
return;
298296
}
@@ -314,44 +312,25 @@ export class ChatService {
314312
}
315313
}
316314

317-
if (content) {
318-
hasReceivedData = true;
319-
fullResponse += content;
320-
321-
// Track the regular content before processing this chunk
322-
const regularContentBefore = regularContent;
323-
324-
// Process content character by character to handle think tags
325-
insideThinkTag = this.processContentForThinkTags(
326-
content,
327-
insideThinkTag,
328-
() => {
329-
// Think content is ignored - we don't include it in API requests
330-
},
331-
(regularChunk) => {
332-
regularContent += regularChunk;
333-
}
334-
);
335-
336-
const newRegularContent = regularContent.slice(regularContentBefore.length);
337-
if (newRegularContent) {
338-
onChunk?.(newRegularContent);
339-
}
340-
}
341-
342-
if (reasoningContent) {
343-
hasReceivedData = true;
344-
fullReasoningContent += reasoningContent;
345-
onReasoningChunk?.(reasoningContent);
346-
}
315+
if (content) {
316+
hasReceivedData = true;
317+
aggregatedContent += content;
318+
onChunk?.(content);
319+
}
320+
321+
if (reasoningContent) {
322+
hasReceivedData = true;
323+
fullReasoningContent += reasoningContent;
324+
onReasoningChunk?.(reasoningContent);
325+
}
347326
} catch (e) {
348327
console.error('Error parsing JSON chunk:', e);
349328
}
350329
}
351330
}
352331
}
353332

354-
if (!hasReceivedData && fullResponse.length === 0) {
333+
if (!hasReceivedData && aggregatedContent.length === 0) {
355334
const contextError = new Error(
356335
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
357336
);
@@ -558,51 +537,6 @@ export class ChatService {
558537
}
559538
}
560539

561-
/**
562-
* Processes content to separate thinking tags from regular content.
563-
* Parses <think> and </think> tags to route content to appropriate handlers.
564-
*
565-
* @param content - The content string to process
566-
* @param currentInsideThinkTag - Current state of whether we're inside a think tag
567-
* @param addThinkContent - Callback to handle content inside think tags
568-
* @param addRegularContent - Callback to handle regular content outside think tags
569-
* @returns Boolean indicating if we're still inside a think tag after processing
570-
* @private
571-
*/
572-
private processContentForThinkTags(
573-
content: string,
574-
currentInsideThinkTag: boolean,
575-
addThinkContent: (chunk: string) => void,
576-
addRegularContent: (chunk: string) => void
577-
): boolean {
578-
let i = 0;
579-
let insideThinkTag = currentInsideThinkTag;
580-
581-
while (i < content.length) {
582-
if (!insideThinkTag && content.substring(i, i + 7) === '<think>') {
583-
insideThinkTag = true;
584-
i += 7; // Skip the <think> tag
585-
continue;
586-
}
587-
588-
if (insideThinkTag && content.substring(i, i + 8) === '</think>') {
589-
insideThinkTag = false;
590-
i += 8; // Skip the </think> tag
591-
continue;
592-
}
593-
594-
if (insideThinkTag) {
595-
addThinkContent(content[i]);
596-
} else {
597-
addRegularContent(content[i]);
598-
}
599-
600-
i++;
601-
}
602-
603-
return insideThinkTag;
604-
}
605-
606540
/**
607541
* Aborts any ongoing chat completion request.
608542
* Cancels the current request and cleans up the abort controller.

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import { config } from '$lib/stores/settings.svelte';
55
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
66
import { browser } from '$app/environment';
77
import { goto } from '$app/navigation';
8-
import { extractPartialThinking } from '$lib/utils/thinking';
98

109
/**
1110
* ChatStore - Central state management for chat conversations and AI interactions
@@ -328,10 +327,9 @@ class ChatStore {
328327
streamedContent += chunk;
329328
this.currentResponse = streamedContent;
330329

331-
const partialThinking = extractPartialThinking(streamedContent);
332330
const messageIndex = this.findMessageIndex(assistantMessage.id);
333331
this.updateMessageAtIndex(messageIndex, {
334-
content: partialThinking.remainingContent || streamedContent
332+
content: streamedContent
335333
});
336334
},
337335

@@ -653,18 +651,18 @@ class ChatStore {
653651

654652
if (lastMessage && lastMessage.role === 'assistant') {
655653
try {
656-
const partialThinking = extractPartialThinking(this.currentResponse);
654+
const contentToSave = this.currentResponse;
657655

658656
const updateData: {
659657
content: string;
660658
thinking?: string;
661659
timings?: ChatMessageTimings;
662660
} = {
663-
content: partialThinking.remainingContent || this.currentResponse
661+
content: contentToSave
664662
};
665663

666-
if (partialThinking.thinking) {
667-
updateData.thinking = partialThinking.thinking;
664+
if (lastMessage.thinking?.trim()) {
665+
updateData.thinking = lastMessage.thinking;
668666
}
669667

670668
const lastKnownState = await slotsService.getCurrentState();
@@ -684,7 +682,10 @@ class ChatStore {
684682

685683
await DatabaseStore.updateMessage(lastMessage.id, updateData);
686684

687-
lastMessage.content = partialThinking.remainingContent || this.currentResponse;
685+
lastMessage.content = contentToSave;
686+
if (updateData.thinking !== undefined) {
687+
lastMessage.thinking = updateData.thinking;
688+
}
688689
if (updateData.timings) {
689690
lastMessage.timings = updateData.timings;
690691
}

tools/server/webui/src/lib/utils/thinking.ts

Lines changed: 0 additions & 143 deletions
This file was deleted.

0 commit comments

Comments
 (0)