Skip to content

Commit 30a3c62

Browse files
author
jiangpeiling
committed
✨ Add adapt to deep thinking model.
1 parent b61e705 commit 30a3c62

File tree

21 files changed

+1016
-66
lines changed

21 files changed

+1016
-66
lines changed

backend/agents/create_agent_info.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,21 @@
2020
logger = logging.getLogger("create_agent_info")
2121

2222
async def create_model_config_list(tenant_id):
23-
main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
24-
sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
25-
26-
return [ModelConfig(cite_name="main_model",
27-
api_key=main_model_config.get("api_key", ""),
28-
model_name=get_model_name_from_config(main_model_config) if main_model_config.get("model_name") else "",
29-
url=main_model_config.get("base_url", "")),
23+
main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
24+
sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
25+
26+
return [ModelConfig(cite_name="main_model",
27+
api_key=main_model_config.get("api_key", ""),
28+
model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
29+
"model_name") else "",
30+
url=main_model_config.get("base_url", ""),
31+
is_deep_thinking=main_model_config.get("is_deep_thinking", False)),
3032
ModelConfig(cite_name="sub_model",
3133
api_key=sub_model_config.get("api_key", ""),
32-
model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get("model_name") else "",
33-
url=sub_model_config.get("base_url", ""))]
34+
model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get(
35+
"model_name") else "",
36+
url=sub_model_config.get("base_url", ""),
37+
is_deep_thinking=main_model_config.get("is_deep_thinking", False))]
3438

3539

3640
async def create_agent_config(agent_id, tenant_id, user_id, language: str = 'zh'):
@@ -240,10 +244,11 @@ async def create_agent_run_info(agent_id, minio_files, query, history, authoriza
240244

241245
agent_run_info = AgentRunInfo(
242246
query=final_query,
243-
model_config_list= model_list,
247+
model_config_list=model_list,
244248
observer=MessageObserver(lang=language),
245-
agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id, language=language),
246-
mcp_host= mcp_host,
249+
agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id,
250+
language=language),
251+
mcp_host=mcp_host,
247252
history=history,
248253
stop_event=threading.Event()
249254
)

backend/database/db_models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ class ModelRecord(TableBase):
132132
used_token = Column(Integer, doc="Number of tokens already used by the model in Q&A")
133133
display_name = Column(String(100), doc="Model name directly displayed on the frontend, customized by the user")
134134
connect_status = Column(String(100), doc="Model connectivity status of the latest detection. Optional values: Detecting, Available, Unavailable")
135+
is_deep_thinking = Column(Boolean, doc="Whether the model opens up deep thinking")
135136
tenant_id = Column(String(100), doc="Tenant ID for filtering")
136137
create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Creation time, audit field")
137138
delete_flag = Column(String(1), default="N", doc="After the user deletes it on the frontend, the deletion flag will be set to \"Y\" for soft deletion. Optional values: Y/N")

backend/services/conversation_management_service.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
delete_conversation, get_conversation, create_conversation, update_message_opinion
1616

1717
from utils.config_utils import tenant_config_manager,get_model_name_from_config
18+
from utils.str_utils import remove_think_tags, add_no_think_token
1819

1920
logger = logging.getLogger("conversation_management_service")
2021

@@ -250,11 +251,12 @@ def call_llm_for_title(content: str, tenant_id: str) -> str:
250251
"content": prompt_template["SYSTEM_PROMPT"]},
251252
{"role": "user",
252253
"content": user_prompt}]
254+
add_no_think_token(messages)
253255

254256
# Call the model
255257
response = llm(messages, max_tokens=10)
256258

257-
return response.content.strip()
259+
return remove_think_tags(response.content.strip())
258260

259261

260262
def update_conversation_title(conversation_id: int, title: str, user_id: str = None) -> bool:

backend/services/prompt_service.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from utils.auth_utils import get_current_user_info
1616
from fastapi import Header, Request
1717

18+
from utils.str_utils import remove_think_tags, add_no_think_token
19+
1820
# Configure logging
1921
logger = logging.getLogger("prompt_service")
2022

@@ -41,6 +43,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
4143
)
4244
messages = [{"role": "system", "content": system_prompt},
4345
{"role": "user", "content": user_prompt}]
46+
add_no_think_token(messages)
4447
try:
4548
completion_kwargs = llm._prepare_completion_kwargs(
4649
messages=messages,
@@ -53,6 +56,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
5356
for chunk in current_request:
5457
new_token = chunk.choices[0].delta.content
5558
if new_token is not None:
59+
new_token = remove_think_tags(new_token)
5660
token_join.append(new_token)
5761
current_text = "".join(token_join)
5862
if callback is not None:

backend/utils/str_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from typing import List
2+
3+
4+
def remove_think_tags(text: str) -> str:
5+
"""
6+
Remove thinking tags from text
7+
8+
Args:
9+
text: Input text that may contain thinking tags
10+
11+
Returns:
12+
str: Text with thinking tags removed
13+
"""
14+
return text.replace("<think>", "").replace("</think>", "")
15+
16+
17+
def add_no_think_token(messages: List[dict]):
18+
if messages[-1]["role"] == "user":
19+
messages[-1]["content"] += " /no_think"

docker/init.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ CREATE TABLE IF NOT EXISTS "model_record_t" (
165165
"used_token" int4,
166166
"display_name" varchar(100) COLLATE "pg_catalog"."default",
167167
"connect_status" varchar(100) COLLATE "pg_catalog"."default",
168+
"is_deep_thinking" BOOLEAN DEFAULT FALSE,
168169
"create_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
169170
"delete_flag" varchar(1) COLLATE "pg_catalog"."default" DEFAULT 'N'::character varying,
170171
"update_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE nexent.model_record_t
2+
ADD COLUMN is_deep_thinking BOOLEAN DEFAULT FALSE;
3+
COMMENT ON COLUMN nexent.model_record_t.is_deep_thinking IS 'deep thinking switch, true=open, false=close';

frontend/app/[locale]/chat/streaming/chatStreamHandler.tsx

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,7 @@ export const handleStreamResponse = async (
164164
break;
165165

166166
case "model_output_thinking":
167-
// Process thinking content
168-
// If there's no currentStep, create one
167+
// Merge consecutive thinking chunks; create new group only when previous subType is not "thinking"
169168
if (!currentStep) {
170169
currentStep = {
171170
id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
@@ -180,34 +179,69 @@ export const handleStreamResponse = async (
180179
};
181180
}
182181

183-
// Ensure contents exists
184-
currentContentText = messageContent;
182+
const shouldAppendThinking =
183+
lastContentType === "model_output" &&
184+
lastModelOutputIndex >= 0 &&
185+
currentStep.contents[lastModelOutputIndex] &&
186+
currentStep.contents[lastModelOutputIndex].subType === "thinking";
185187

186-
// If the last streaming output is thinking content, append
187-
if (lastContentType === "model_output" && lastModelOutputIndex >= 0) {
188-
const modelOutput = currentStep.contents[lastModelOutputIndex];
189-
// Update content directly without prefix check
190-
let newContent = modelOutput.content + messageContent;
191-
// Remove "思考:" prefix if present
192-
const thinkingPrefix = t('chatStreamHandler.thinkingPrefix');
193-
if (newContent.startsWith(thinkingPrefix)) {
194-
newContent = newContent.substring(thinkingPrefix.length);
195-
}
196-
modelOutput.content = newContent;
188+
if (shouldAppendThinking) {
189+
// Append to existing thinking content
190+
currentStep.contents[lastModelOutputIndex].content += messageContent;
197191
} else {
198-
// Otherwise, create new thinking content
192+
// Create a new thinking content group
199193
currentStep.contents.push({
200-
id: `model-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
194+
id: `thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
201195
type: "model_output",
202196
subType: "thinking",
203-
content: currentContentText,
197+
content: messageContent,
198+
expanded: true,
199+
timestamp: Date.now()
200+
});
201+
lastModelOutputIndex = currentStep.contents.length - 1;
202+
}
203+
204+
lastContentType = "model_output";
205+
break;
206+
207+
case "model_output_deep_thinking":
208+
// Consecutive deep_thinking chunks should be combined until a thinking chunk arrives
209+
if (!currentStep) {
210+
currentStep = {
211+
id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
212+
title: "AI Thinking",
213+
content: "",
214+
expanded: true,
215+
contents: [],
216+
metrics: "",
217+
thinking: { content: "", expanded: true },
218+
code: { content: "", expanded: true },
219+
output: { content: "", expanded: true }
220+
};
221+
}
222+
223+
const shouldAppendDeep =
224+
lastContentType === "model_output" &&
225+
lastModelOutputIndex >= 0 &&
226+
currentStep.contents[lastModelOutputIndex] &&
227+
currentStep.contents[lastModelOutputIndex].subType === "deep_thinking";
228+
229+
if (shouldAppendDeep) {
230+
// Append to existing deep_thinking content
231+
currentStep.contents[lastModelOutputIndex].content += messageContent;
232+
} else {
233+
// Create a new deep_thinking content group
234+
currentStep.contents.push({
235+
id: `deep-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
236+
type: "model_output",
237+
subType: "deep_thinking",
238+
content: messageContent,
204239
expanded: true,
205240
timestamp: Date.now()
206241
});
207242
lastModelOutputIndex = currentStep.contents.length - 1;
208243
}
209244

210-
// Update the last processed content type
211245
lastContentType = "model_output";
212246
break;
213247

frontend/app/[locale]/chat/streaming/chatStreamMain.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ export function ChatStreamMain({
143143
step.contents.forEach((content: any) => {
144144
const taskMsg = {
145145
type: content.type,
146+
subType: content.subType, // Preserve subType for styling (e.g., deep_thinking)
146147
content: content.content,
147148
id: content.id,
148149
assistantId: message.id,

frontend/app/[locale]/chat/streaming/taskWindow.tsx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ const messageHandlers: MessageHandler[] = [
3939
canHandle: (message) =>
4040
message.type === "agent_new_run" ||
4141
message.type === "generating_code" ||
42-
message.type === "executing",
42+
message.type === "executing" ||
43+
message.type === "model_output_thinking" ||
44+
message.type === "model_output_deep_thinking",
4345
render: (message, _t) => (
4446
<div style={{
4547
fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif",
@@ -571,7 +573,7 @@ const messageHandlers: MessageHandler[] = [
571573
fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif",
572574
fontSize: "0.875rem",
573575
lineHeight: 1.5,
574-
color: "#1f2937",
576+
color: message.subType === "deep_thinking" ? "#6b7280" : "#1f2937",
575577
fontWeight: 400
576578
}}>
577579
<MarkdownRenderer content={message.content} className="task-message-content" />

0 commit comments

Comments
 (0)