✨ Add adapt to deep thinking model.

jiangpeiling · jiangpeiling · commit 30a3c62cef24 · 2025-08-06T11:14:42.000+08:00
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
@@ -20,17 +20,21 @@
 logger = logging.getLogger("create_agent_info")
 
 async def create_model_config_list(tenant_id):
-     main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
-     sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
-
-     return [ModelConfig(cite_name="main_model",
-                         api_key=main_model_config.get("api_key", ""),
-                         model_name=get_model_name_from_config(main_model_config) if main_model_config.get("model_name") else "",
-                         url=main_model_config.get("base_url", "")),
+    main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
+    sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
+
+    return [ModelConfig(cite_name="main_model",
+                        api_key=main_model_config.get("api_key", ""),
+                        model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
+                            "model_name") else "",
+                        url=main_model_config.get("base_url", ""),
+                        is_deep_thinking=main_model_config.get("is_deep_thinking", False)),
             ModelConfig(cite_name="sub_model",
                         api_key=sub_model_config.get("api_key", ""),
-                        model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get("model_name") else "",
-                        url=sub_model_config.get("base_url", ""))]
+                        model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get(
+                            "model_name") else "",
+                        url=sub_model_config.get("base_url", ""),
+                        is_deep_thinking=main_model_config.get("is_deep_thinking", False))]
 
 
 async def create_agent_config(agent_id, tenant_id, user_id, language: str = 'zh'):
@@ -240,10 +244,11 @@ async def create_agent_run_info(agent_id, minio_files, query, history, authoriza
 
     agent_run_info = AgentRunInfo(
         query=final_query,
-        model_config_list= model_list,
+        model_config_list=model_list,
         observer=MessageObserver(lang=language),
-        agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id, language=language),
-        mcp_host= mcp_host,
+        agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id,
+                                               language=language),
+        mcp_host=mcp_host,
         history=history,
         stop_event=threading.Event()
     )
diff --git a/backend/database/db_models.py b/backend/database/db_models.py
@@ -132,6 +132,7 @@ class ModelRecord(TableBase):
     used_token = Column(Integer, doc="Number of tokens already used by the model in Q&A")
     display_name = Column(String(100), doc="Model name directly displayed on the frontend, customized by the user")
     connect_status = Column(String(100), doc="Model connectivity status of the latest detection. Optional values: Detecting, Available, Unavailable")
+    is_deep_thinking = Column(Boolean, doc="Whether the model opens up deep thinking")
     tenant_id = Column(String(100), doc="Tenant ID for filtering")
     create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Creation time, audit field")
     delete_flag = Column(String(1), default="N", doc="After the user deletes it on the frontend, the deletion flag will be set to \"Y\" for soft deletion. Optional values: Y/N")
diff --git a/backend/services/conversation_management_service.py b/backend/services/conversation_management_service.py
@@ -15,6 +15,7 @@
     delete_conversation, get_conversation, create_conversation, update_message_opinion
 
 from utils.config_utils import tenant_config_manager,get_model_name_from_config
+from utils.str_utils import remove_think_tags, add_no_think_token
 
 logger = logging.getLogger("conversation_management_service")
 
@@ -250,11 +251,12 @@ def call_llm_for_title(content: str, tenant_id: str) -> str:
                  "content": prompt_template["SYSTEM_PROMPT"]},
                 {"role": "user",
                  "content": user_prompt}]
+    add_no_think_token(messages)
 
     # Call the model
     response = llm(messages, max_tokens=10)
 
-    return response.content.strip()
+    return remove_think_tags(response.content.strip())
 
 
 def update_conversation_title(conversation_id: int, title: str, user_id: str = None) -> bool:
diff --git a/backend/services/prompt_service.py b/backend/services/prompt_service.py
@@ -15,6 +15,8 @@
 from utils.auth_utils import get_current_user_info
 from fastapi import Header, Request
 
+from utils.str_utils import remove_think_tags, add_no_think_token
+
 # Configure logging
 logger = logging.getLogger("prompt_service")
 
@@ -41,6 +43,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
     )
     messages = [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_prompt}]
+    add_no_think_token(messages)
     try:
         completion_kwargs = llm._prepare_completion_kwargs(
             messages=messages,
@@ -53,6 +56,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
         for chunk in current_request:
             new_token = chunk.choices[0].delta.content
             if new_token is not None:
+                new_token = remove_think_tags(new_token)
                 token_join.append(new_token)
                 current_text = "".join(token_join)
                 if callback is not None:
diff --git a/backend/utils/str_utils.py b/backend/utils/str_utils.py
@@ -0,0 +1,19 @@
+from typing import List
+
+
+def remove_think_tags(text: str) -> str:
+    """
+    Remove thinking tags from text
+
+    Args:
+        text: Input text that may contain thinking tags
+
+    Returns:
+        str: Text with thinking tags removed
+    """
+    return text.replace("<think>", "").replace("</think>", "")
+
+
+def add_no_think_token(messages: List[dict]):
+    if messages[-1]["role"] == "user":
+        messages[-1]["content"] += " /no_think"
diff --git a/docker/init.sql b/docker/init.sql
@@ -165,6 +165,7 @@ CREATE TABLE IF NOT EXISTS "model_record_t" (
   "used_token" int4,
   "display_name" varchar(100) COLLATE "pg_catalog"."default",
   "connect_status" varchar(100) COLLATE "pg_catalog"."default",
+  "is_deep_thinking" BOOLEAN DEFAULT FALSE,
   "create_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
   "delete_flag" varchar(1) COLLATE "pg_catalog"."default" DEFAULT 'N'::character varying,
   "update_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
diff --git a/docker/sql/0805_add_deep_thinking_to_model_record_t.sql b/docker/sql/0805_add_deep_thinking_to_model_record_t.sql
@@ -0,0 +1,3 @@
+ALTER TABLE nexent.model_record_t
+ADD COLUMN is_deep_thinking BOOLEAN DEFAULT FALSE;
+COMMENT ON COLUMN nexent.model_record_t.is_deep_thinking IS 'deep thinking switch, true=open, false=close';
diff --git a/frontend/app/[locale]/chat/streaming/chatStreamHandler.tsx b/frontend/app/[locale]/chat/streaming/chatStreamHandler.tsx
@@ -164,8 +164,7 @@ export const handleStreamResponse = async (
                   break;
 
                 case "model_output_thinking":
-                  // Process thinking content
-                  // If there's no currentStep, create one
+                  // Merge consecutive thinking chunks; create new group only when previous subType is not "thinking"
                   if (!currentStep) {
                     currentStep = {
                       id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
@@ -180,34 +179,69 @@ export const handleStreamResponse = async (
                     };
                   }
 
-                  // Ensure contents exists
-                  currentContentText = messageContent;
+                  const shouldAppendThinking =
+                    lastContentType === "model_output" &&
+                    lastModelOutputIndex >= 0 &&
+                    currentStep.contents[lastModelOutputIndex] &&
+                    currentStep.contents[lastModelOutputIndex].subType === "thinking";
 
-                  // If the last streaming output is thinking content, append
-                  if (lastContentType === "model_output" && lastModelOutputIndex >= 0) {
-                    const modelOutput = currentStep.contents[lastModelOutputIndex];
-                    // Update content directly without prefix check
-                    let newContent = modelOutput.content + messageContent;
-                    // Remove "思考：" prefix if present
-                    const thinkingPrefix = t('chatStreamHandler.thinkingPrefix');
-                    if (newContent.startsWith(thinkingPrefix)) {
-                      newContent = newContent.substring(thinkingPrefix.length);
-                    }
-                    modelOutput.content = newContent;
+                  if (shouldAppendThinking) {
+                    // Append to existing thinking content
+                    currentStep.contents[lastModelOutputIndex].content += messageContent;
                   } else {
-                    // Otherwise, create new thinking content
+                    // Create a new thinking content group
                     currentStep.contents.push({
-                      id: `model-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
+                      id: `thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
                       type: "model_output",
                       subType: "thinking",
-                      content: currentContentText,
+                      content: messageContent,
+                      expanded: true,
+                      timestamp: Date.now()
+                    });
+                    lastModelOutputIndex = currentStep.contents.length - 1;
+                  }
+
+                  lastContentType = "model_output";
+                  break;
+
+                case "model_output_deep_thinking":
+                  // Consecutive deep_thinking chunks should be combined until a thinking chunk arrives
+                  if (!currentStep) {
+                    currentStep = {
+                      id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
+                      title: "AI Thinking",
+                      content: "",
+                      expanded: true,
+                      contents: [],
+                      metrics: "",
+                      thinking: { content: "", expanded: true },
+                      code: { content: "", expanded: true },
+                      output: { content: "", expanded: true }
+                    };
+                  }
+
+                  const shouldAppendDeep =
+                    lastContentType === "model_output" &&
+                    lastModelOutputIndex >= 0 &&
+                    currentStep.contents[lastModelOutputIndex] &&
+                    currentStep.contents[lastModelOutputIndex].subType === "deep_thinking";
+
+                  if (shouldAppendDeep) {
+                    // Append to existing deep_thinking content
+                    currentStep.contents[lastModelOutputIndex].content += messageContent;
+                  } else {
+                    // Create a new deep_thinking content group
+                    currentStep.contents.push({
+                      id: `deep-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
+                      type: "model_output",
+                      subType: "deep_thinking",
+                      content: messageContent,
                       expanded: true,
                       timestamp: Date.now()
                     });
                     lastModelOutputIndex = currentStep.contents.length - 1;
                   }
 
-                  // Update the last processed content type
                   lastContentType = "model_output";
                   break;
                 
diff --git a/frontend/app/[locale]/chat/streaming/chatStreamMain.tsx b/frontend/app/[locale]/chat/streaming/chatStreamMain.tsx
@@ -143,6 +143,7 @@ export function ChatStreamMain({
               step.contents.forEach((content: any) => {
                 const taskMsg = {
                   type: content.type,
+                  subType: content.subType, // Preserve subType for styling (e.g., deep_thinking)
                   content: content.content,
                   id: content.id,
                   assistantId: message.id,
diff --git a/frontend/app/[locale]/chat/streaming/taskWindow.tsx b/frontend/app/[locale]/chat/streaming/taskWindow.tsx
@@ -39,7 +39,9 @@ const messageHandlers: MessageHandler[] = [
     canHandle: (message) => 
       message.type === "agent_new_run" || 
       message.type === "generating_code" ||
-      message.type === "executing",
+      message.type === "executing" ||
+      message.type === "model_output_thinking" ||
+      message.type === "model_output_deep_thinking",
     render: (message, _t) => (
         <div style={{
           fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif",
@@ -571,7 +573,7 @@ const messageHandlers: MessageHandler[] = [
         fontFamily: "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif",
         fontSize: "0.875rem",
         lineHeight: 1.5,
-        color: "#1f2937",
+        color: message.subType === "deep_thinking" ? "#6b7280" : "#1f2937",
         fontWeight: 400
       }}>
         <MarkdownRenderer content={message.content} className="task-message-content" />
diff --git a/frontend/app/[locale]/setup/agentSetup/DebugConfig.tsx b/frontend/app/[locale]/setup/agentSetup/DebugConfig.tsx
@@ -76,8 +76,10 @@ function AgentDebugging({
             role: "assistant",
             content: content.content,
             timestamp: new Date(),
-            type: content.type
-          });
+            type: content.type,
+            // Preserve subType so TaskWindow can style deep thinking text
+            subType: content.subType as any
+          } as any);
         });
       }
       
diff --git a/frontend/types/chat.ts b/frontend/types/chat.ts
@@ -12,7 +12,7 @@ export interface StepContent {
   content: string
   expanded: boolean
   timestamp: number
-  subType?: "thinking" | "code"
+  subType?: "thinking" | "code" | "deep_thinking"
   isLoading?: boolean
   _preserve?: boolean
   _messageContainer?: {
diff --git a/sdk/nexent/core/agents/agent_model.py b/sdk/nexent/core/agents/agent_model.py
@@ -12,6 +12,7 @@ class ModelConfig(BaseModel):
     url: str = Field(description="Model endpoint URL")
     temperature: Optional[float] = Field(description="Temperature", default=0.1)
     top_p: Optional[float] = Field(description="Top P", default=0.95)
+    is_deep_thinking: Optional[bool] = Field(description="Deep thinking", default=False)
 
 
 class ToolConfig(BaseModel):
diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py
@@ -7,9 +7,42 @@
 from .agent_model import ModelConfig, ToolConfig, AgentConfig, AgentHistory
 from ..utils.observer import MessageObserver
 from ..models.openai_llm import OpenAIModel
+from ..models.openai_deep_thinking_llm import OpenAIDeepThinkingModel
 from .core_agent import CoreAgent, convert_code_format
 from ..tools import *  # Used for tool creation, do not delete!!!
 
+
+class ModelFactory:
+    """Model factory class responsible for creating different types of models"""
+
+    @staticmethod
+    def create_model(model_config: ModelConfig, observer: MessageObserver, stop_event: Event) -> OpenAIModel:
+        """
+        Create a model instance based on configuration
+        
+        Args:
+            model_config: Model configuration
+            observer: Observer instance
+            stop_event: Stop event
+            
+        Returns:
+            OpenAIModel: Created model instance
+        """
+        base_params = {
+            "observer": observer,
+            "model_id": model_config.model_name,
+            "api_key": model_config.api_key,
+            "api_base": model_config.url,
+            "temperature": model_config.temperature,
+            "top_p": model_config.top_p
+        }
+
+        model_class = OpenAIDeepThinkingModel if model_config.is_deep_thinking else OpenAIModel
+        model = model_class(**base_params)
+        model.stop_event = stop_event
+        return model
+
+
 class NexentAgent:
     def __init__(self, observer: MessageObserver,
                  model_config_list: List[ModelConfig],
@@ -33,22 +66,20 @@ def __init__(self, observer: MessageObserver,
 
         self.agent = None
 
-
     def create_model(self, model_cite_name: str):
         """create a model instance"""
-        model_config = next(model_config for model_config in self.model_config_list if model_config.cite_name == model_cite_name)
-        if model_config is None:
+        try:
+            # Filter out None values and find matching model config
+            model_config = next(
+                (model_config for model_config in self.model_config_list 
+                 if model_config is not None and model_config.cite_name == model_cite_name),
+                None
+            )
+            if model_config is None:
+                raise ValueError(f"Model {model_cite_name} not found")
+            return ModelFactory.create_model(model_config, self.observer, self.stop_event)
+        except StopIteration:
             raise ValueError(f"Model {model_cite_name} not found")
-        model = OpenAIModel(
-            observer=self.observer,
-            model_id=model_config.model_name,
-            api_key=model_config.api_key,
-            api_base=model_config.url,
-            temperature=model_config.temperature,
-            top_p=model_config.top_p
-        )
-        model.stop_event = self.stop_event
-        return model
 
     def create_local_tool(self, tool_config: ToolConfig):
         class_name = tool_config.class_name
@@ -59,7 +90,7 @@ def create_local_tool(self, tool_config: ToolConfig):
         else:
             if class_name == "KnowledgeBaseSearchTool":
                 tools_obj = tool_class(index_names=tool_config.metadata.get("index_names", []),
-                                       observer= self.observer,
+                                       observer=self.observer,
                                        es_core=tool_config.metadata.get("es_core", []),
                                        embedding_model=tool_config.metadata.get("embedding_model", []),
                                        **params)
diff --git a/sdk/nexent/core/models/openai_deep_thinking_llm.py b/sdk/nexent/core/models/openai_deep_thinking_llm.py
diff --git a/sdk/nexent/core/models/openai_llm.py b/sdk/nexent/core/models/openai_llm.py
diff --git a/sdk/nexent/core/utils/observer.py b/sdk/nexent/core/utils/observer.py
diff --git a/test/backend/utils/test_str_utils.py b/test/backend/utils/test_str_utils.py
diff --git a/test/sdk/core/agents/test_nexent_agent.py b/test/sdk/core/agents/test_nexent_agent.py
diff --git a/test/sdk/core/models/test_openai_deep_thinking_llm.py b/test/sdk/core/models/test_openai_deep_thinking_llm.py
diff --git a/test/sdk/core/models/test_openai_llm.py b/test/sdk/core/models/test_openai_llm.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+ALTER TABLE nexent.model_record_t`
	`2`	`+ADD COLUMN is_deep_thinking BOOLEAN DEFAULT FALSE;`
	`3`	`+COMMENT ON COLUMN nexent.model_record_t.is_deep_thinking IS 'deep thinking switch, true=open, false=close';`