Skip to content

Commit 5c1e475

Browse files
refactor: unify reasoning handling via backend reasoning_content, drop frontend tag parsing
- Updated the chat message component to surface backend-supplied reasoning via message.thinking while showing the raw assistant content without inline tag scrubbing - Simplified chat streaming to append content chunks directly, stream reasoning into the message model, and persist any partial reasoning when generation stops - Refactored the chat service SSE handler to rely on server-provided reasoning_content, removing legacy <think> parsing logic - Refreshed Storybook data and streaming flows to populate the thinking field explicitly for static and streaming assistant messages
1 parent 91a2a56 commit 5c1e475

File tree

6 files changed

+51
-274
lines changed

6 files changed

+51
-274
lines changed

tools/server/public/index.html.gz

1.9 KB
Binary file not shown.

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
<script lang="ts">
22
import { getDeletionInfo } from '$lib/stores/chat.svelte';
33
import { copyToClipboard } from '$lib/utils/copy';
4-
import { parseThinkingContent } from '$lib/utils/thinking';
54
import ChatMessageAssistant from './ChatMessageAssistant.svelte';
65
import ChatMessageUser from './ChatMessageUser.svelte';
76
@@ -47,25 +46,13 @@
4746
4847
let thinkingContent = $derived.by(() => {
4948
if (message.role === 'assistant') {
50-
if (message.thinking) {
51-
return message.thinking;
52-
}
53-
54-
const parsed = parseThinkingContent(message.content);
55-
56-
return parsed.thinking;
49+
const trimmedThinking = message.thinking?.trim();
50+
return trimmedThinking ? trimmedThinking : null;
5751
}
5852
return null;
5953
});
6054
61-
let messageContent = $derived.by(() => {
62-
if (message.role === 'assistant') {
63-
const parsed = parseThinkingContent(message.content);
64-
return parsed.cleanContent?.replace('<|channel|>analysis', '');
65-
}
66-
67-
return message.content?.replace('<|channel|>analysis', '');
68-
});
55+
let messageContent = $derived.by(() => message.content);
6956
7057
function handleCancelEdit() {
7158
isEditing = false;

tools/server/webui/src/lib/services/chat.ts

Lines changed: 15 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,8 @@ export class ChatService {
256256
}
257257

258258
const decoder = new TextDecoder();
259-
let fullResponse = '';
259+
let aggregatedContent = '';
260260
let fullReasoningContent = '';
261-
let regularContent = '';
262-
let insideThinkTag = false;
263261
let hasReceivedData = false;
264262
let lastTimings: ChatMessageTimings | undefined;
265263

@@ -277,7 +275,7 @@ export class ChatService {
277275
if (line.startsWith('data: ')) {
278276
const data = line.slice(6);
279277
if (data === '[DONE]') {
280-
if (!hasReceivedData && fullResponse.length === 0) {
278+
if (!hasReceivedData && aggregatedContent.length === 0) {
281279
const contextError = new Error(
282280
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
283281
);
@@ -286,7 +284,7 @@ export class ChatService {
286284
return;
287285
}
288286

289-
onComplete?.(regularContent, fullReasoningContent || undefined, lastTimings);
287+
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
290288

291289
return;
292290
}
@@ -308,44 +306,25 @@ export class ChatService {
308306
}
309307
}
310308

311-
if (content) {
312-
hasReceivedData = true;
313-
fullResponse += content;
314-
315-
// Track the regular content before processing this chunk
316-
const regularContentBefore = regularContent;
317-
318-
// Process content character by character to handle think tags
319-
insideThinkTag = this.processContentForThinkTags(
320-
content,
321-
insideThinkTag,
322-
() => {
323-
// Think content is ignored - we don't include it in API requests
324-
},
325-
(regularChunk) => {
326-
regularContent += regularChunk;
327-
}
328-
);
329-
330-
const newRegularContent = regularContent.slice(regularContentBefore.length);
331-
if (newRegularContent) {
332-
onChunk?.(newRegularContent);
333-
}
334-
}
335-
336-
if (reasoningContent) {
337-
hasReceivedData = true;
338-
fullReasoningContent += reasoningContent;
339-
onReasoningChunk?.(reasoningContent);
340-
}
309+
if (content) {
310+
hasReceivedData = true;
311+
aggregatedContent += content;
312+
onChunk?.(content);
313+
}
314+
315+
if (reasoningContent) {
316+
hasReceivedData = true;
317+
fullReasoningContent += reasoningContent;
318+
onReasoningChunk?.(reasoningContent);
319+
}
341320
} catch (e) {
342321
console.error('Error parsing JSON chunk:', e);
343322
}
344323
}
345324
}
346325
}
347326

348-
if (!hasReceivedData && fullResponse.length === 0) {
327+
if (!hasReceivedData && aggregatedContent.length === 0) {
349328
const contextError = new Error(
350329
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
351330
);
@@ -552,51 +531,6 @@ export class ChatService {
552531
}
553532
}
554533

555-
/**
556-
* Processes content to separate thinking tags from regular content.
557-
* Parses <think> and </think> tags to route content to appropriate handlers.
558-
*
559-
* @param content - The content string to process
560-
* @param currentInsideThinkTag - Current state of whether we're inside a think tag
561-
* @param addThinkContent - Callback to handle content inside think tags
562-
* @param addRegularContent - Callback to handle regular content outside think tags
563-
* @returns Boolean indicating if we're still inside a think tag after processing
564-
* @private
565-
*/
566-
private processContentForThinkTags(
567-
content: string,
568-
currentInsideThinkTag: boolean,
569-
addThinkContent: (chunk: string) => void,
570-
addRegularContent: (chunk: string) => void
571-
): boolean {
572-
let i = 0;
573-
let insideThinkTag = currentInsideThinkTag;
574-
575-
while (i < content.length) {
576-
if (!insideThinkTag && content.substring(i, i + 7) === '<think>') {
577-
insideThinkTag = true;
578-
i += 7; // Skip the <think> tag
579-
continue;
580-
}
581-
582-
if (insideThinkTag && content.substring(i, i + 8) === '</think>') {
583-
insideThinkTag = false;
584-
i += 8; // Skip the </think> tag
585-
continue;
586-
}
587-
588-
if (insideThinkTag) {
589-
addThinkContent(content[i]);
590-
} else {
591-
addRegularContent(content[i]);
592-
}
593-
594-
i++;
595-
}
596-
597-
return insideThinkTag;
598-
}
599-
600534
/**
601535
* Aborts any ongoing chat completion request.
602536
* Cancels the current request and cleans up the abort controller.

tools/server/webui/src/lib/stores/chat.svelte.ts

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import { config } from '$lib/stores/settings.svelte';
55
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
66
import { browser } from '$app/environment';
77
import { goto } from '$app/navigation';
8-
import { extractPartialThinking } from '$lib/utils/thinking';
98

109
/**
1110
* ChatStore - Central state management for chat conversations and AI interactions
@@ -319,10 +318,9 @@ class ChatStore {
319318
streamedContent += chunk;
320319
this.currentResponse = streamedContent;
321320

322-
const partialThinking = extractPartialThinking(streamedContent);
323321
const messageIndex = this.findMessageIndex(assistantMessage.id);
324322
this.updateMessageAtIndex(messageIndex, {
325-
content: partialThinking.remainingContent || streamedContent
323+
content: streamedContent
326324
});
327325
},
328326

@@ -644,18 +642,18 @@ class ChatStore {
644642

645643
if (lastMessage && lastMessage.role === 'assistant') {
646644
try {
647-
const partialThinking = extractPartialThinking(this.currentResponse);
645+
const contentToSave = this.currentResponse;
648646

649647
const updateData: {
650648
content: string;
651649
thinking?: string;
652650
timings?: ChatMessageTimings;
653651
} = {
654-
content: partialThinking.remainingContent || this.currentResponse
652+
content: contentToSave
655653
};
656654

657-
if (partialThinking.thinking) {
658-
updateData.thinking = partialThinking.thinking;
655+
if (lastMessage.thinking?.trim()) {
656+
updateData.thinking = lastMessage.thinking;
659657
}
660658

661659
const lastKnownState = await slotsService.getCurrentState();
@@ -675,7 +673,10 @@ class ChatStore {
675673

676674
await DatabaseStore.updateMessage(lastMessage.id, updateData);
677675

678-
lastMessage.content = partialThinking.remainingContent || this.currentResponse;
676+
lastMessage.content = contentToSave;
677+
if (updateData.thinking !== undefined) {
678+
lastMessage.thinking = updateData.thinking;
679+
}
679680
if (updateData.timings) {
680681
lastMessage.timings = updateData.timings;
681682
}

tools/server/webui/src/lib/utils/thinking.ts

Lines changed: 0 additions & 143 deletions
This file was deleted.

0 commit comments

Comments
 (0)