ModelEngine-Group
diff --git a/‎backend/agents/create_agent_info.py‎
Lines changed: 17 additions & 12 deletions b/‎backend/agents/create_agent_info.py‎
Lines changed: 17 additions & 12 deletions
diff --git a/‎backend/database/db_models.py‎
Lines changed: 1 addition & 0 deletions b/‎backend/database/db_models.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backend/services/conversation_management_service.py‎
Lines changed: 3 additions & 1 deletion b/‎backend/services/conversation_management_service.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backend/services/prompt_service.py‎
Lines changed: 4 additions & 0 deletions b/‎backend/services/prompt_service.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backend/utils/str_utils.py‎
Lines changed: 21 additions & 0 deletions b/‎backend/utils/str_utils.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎docker/deploy.sh‎
Lines changed: 19 additions & 19 deletions b/‎docker/deploy.sh‎
Lines changed: 19 additions & 19 deletions
diff --git a/‎docker/init.sql‎
Lines changed: 1 addition & 0 deletions b/‎docker/init.sql‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docker/sql/0805_add_deep_thinking_to_model_record_t.sql‎
Lines changed: 3 additions & 0 deletions b/‎docker/sql/0805_add_deep_thinking_to_model_record_t.sql‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎frontend/app/[locale]/chat/streaming/chatStreamHandler.tsx‎
Lines changed: 53 additions & 19 deletions b/‎frontend/app/[locale]/chat/streaming/chatStreamHandler.tsx‎
Lines changed: 53 additions & 19 deletions
diff --git a/‎frontend/app/[locale]/chat/streaming/chatStreamMain.tsx‎
Lines changed: 1 addition & 0 deletions b/‎frontend/app/[locale]/chat/streaming/chatStreamMain.tsx‎
Lines changed: 1 addition & 0 deletions
@@ -20,17 +20,21 @@
 logger = logging.getLogger("create_agent_info")
 
 async def create_model_config_list(tenant_id):
-     main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
-     sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
-
-     return [ModelConfig(cite_name="main_model",
-                         api_key=main_model_config.get("api_key", ""),
-                         model_name=get_model_name_from_config(main_model_config) if main_model_config.get("model_name") else "",
-                         url=main_model_config.get("base_url", "")),
+    main_model_config = tenant_config_manager.get_model_config(key="LLM_ID", tenant_id=tenant_id)
+    sub_model_config = tenant_config_manager.get_model_config(key="LLM_SECONDARY_ID", tenant_id=tenant_id)
+
+    return [ModelConfig(cite_name="main_model",
+                        api_key=main_model_config.get("api_key", ""),
+                        model_name=get_model_name_from_config(main_model_config) if main_model_config.get(
+                            "model_name") else "",
+                        url=main_model_config.get("base_url", ""),
+                        is_deep_thinking=main_model_config.get("is_deep_thinking", False)),
             ModelConfig(cite_name="sub_model",
                         api_key=sub_model_config.get("api_key", ""),
-                        model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get("model_name") else "",
-                        url=sub_model_config.get("base_url", ""))]
+                        model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get(
+                            "model_name") else "",
+                        url=sub_model_config.get("base_url", ""),
+                        is_deep_thinking=sub_model_config.get("is_deep_thinking", False))]
 
 
 async def create_agent_config(agent_id, tenant_id, user_id, language: str = 'zh'):
@@ -288,10 +292,11 @@ async def create_agent_run_info(agent_id, minio_files, query, history, authoriza
 
     agent_run_info = AgentRunInfo(
         query=final_query,
-        model_config_list= model_list,
+        model_config_list=model_list,
         observer=MessageObserver(lang=language),
-        agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id, language=language),
-        mcp_host= mcp_host,
+        agent_config=await create_agent_config(agent_id=agent_id, tenant_id=tenant_id, user_id=user_id,
+                                               language=language),
+        mcp_host=mcp_host,
         history=history,
         stop_event=threading.Event()
     )
 
@@ -132,6 +132,7 @@ class ModelRecord(TableBase):
     used_token = Column(Integer, doc="Number of tokens already used by the model in Q&A")
     display_name = Column(String(100), doc="Model name directly displayed on the frontend, customized by the user")
     connect_status = Column(String(100), doc="Model connectivity status of the latest detection. Optional values: Detecting, Available, Unavailable")
+    is_deep_thinking = Column(Boolean, doc="Whether the model opens up deep thinking")
     tenant_id = Column(String(100), doc="Tenant ID for filtering")
     create_time = Column(TIMESTAMP(timezone=False), server_default=func.now(), doc="Creation time, audit field")
     delete_flag = Column(String(1), default="N", doc="After the user deletes it on the frontend, the deletion flag will be set to \"Y\" for soft deletion. Optional values: Y/N")
 
@@ -15,6 +15,7 @@
     delete_conversation, get_conversation, create_conversation, update_message_opinion
 
 from utils.config_utils import tenant_config_manager,get_model_name_from_config
+from utils.str_utils import remove_think_tags, add_no_think_token
 
 logger = logging.getLogger("conversation_management_service")
 
@@ -250,11 +251,12 @@ def call_llm_for_title(content: str, tenant_id: str) -> str:
                  "content": prompt_template["SYSTEM_PROMPT"]},
                 {"role": "user",
                  "content": user_prompt}]
+    add_no_think_token(messages)
 
     # Call the model
     response = llm(messages, max_tokens=10)
 
-    return response.content.strip()
+    return remove_think_tags(response.content.strip())
 
 
 def update_conversation_title(conversation_id: int, title: str, user_id: str = None) -> bool:
 
@@ -15,6 +15,8 @@
 from utils.auth_utils import get_current_user_info
 from fastapi import Header, Request
 
+from utils.str_utils import remove_think_tags, add_no_think_token
+
 # Configure logging
 logger = logging.getLogger("prompt_service")
 
@@ -41,6 +43,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
     )
     messages = [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_prompt}]
+    add_no_think_token(messages)
     try:
         completion_kwargs = llm._prepare_completion_kwargs(
             messages=messages,
@@ -53,6 +56,7 @@ def call_llm_for_system_prompt(user_prompt: str, system_prompt: str, callback=No
         for chunk in current_request:
             new_token = chunk.choices[0].delta.content
             if new_token is not None:
+                new_token = remove_think_tags(new_token)
                 token_join.append(new_token)
                 current_text = "".join(token_join)
                 if callback is not None:
 
@@ -0,0 +1,21 @@
+from typing import List
+
+
+def remove_think_tags(text: str) -> str:
+    """
+    Remove thinking tags from text
+
+    Args:
+        text: Input text that may contain thinking tags
+
+    Returns:
+        str: Text with thinking tags removed
+    """
+    return text.replace("<think>", "").replace("</think>", "")
+
+
+def add_no_think_token(messages: List[dict]):
+    if not messages:
+        return
+    if messages[-1]["role"] == "user" and "content" in messages[-1]:
+        messages[-1]["content"] += " /no_think"
@@ -196,7 +196,7 @@ install() {
     export COMPOSE_PROFILES
     echo "📋 Using profiles: $COMPOSE_PROFILES"
   fi
-  
+
   # Start infrastructure services
   if ! docker-compose -p nexent -f "${COMPOSE_FILE}" up -d $INFRA_SERVICES; then
     echo "❌ ERROR Failed to start infrastructure services"
@@ -221,13 +221,13 @@ install() {
     export MINIO_ACCESS_KEY
     export MINIO_SECRET_KEY
   fi
-  
+
   wait_for_elasticsearch_healthy || {
     echo "❌ ERROR Elasticsearch health check failed"
     ERROR_OCCURRED=1
     return 1
   }
-  
+
   # Generate Elasticsearch API key and export to environment
   generate_elasticsearch_api_key_for_env || {
     echo "❌ ERROR Failed to generate Elasticsearch API key"
@@ -364,7 +364,7 @@ wait_for_elasticsearch_healthy() {
         sleep 10
         retries=$((retries + 1))
     done
-    
+
     if [ $retries -eq $max_retries ]; then
         echo "⚠️  Warning: Elasticsearch did not become healthy within expected time"
         echo "   You may need to check the container logs and try again"
@@ -378,13 +378,13 @@ wait_for_elasticsearch_healthy() {
 # Function to generate Elasticsearch API key for environment variables (not file)
 generate_elasticsearch_api_key_for_env() {
     echo "🔑 Generating ELASTICSEARCH_API_KEY for environment..."
-    
+
     # Generate API key
     API_KEY_JSON=$(docker-compose -p nexent -f "${COMPOSE_FILE}" exec -T nexent-elasticsearch curl -s -u "elastic:$ELASTIC_PASSWORD" "http://localhost:9200/_security/api_key" -H "Content-Type: application/json" -d '{"name":"nexent_deploy_key","role_descriptors":{"nexent_role":{"cluster":["all"],"index":[{"names":["*"],"privileges":["all"]}]}}}')
-    
+
     # Extract API key
     ELASTICSEARCH_API_KEY=$(echo "$API_KEY_JSON" | grep -o '"encoded":"[^"]*"' | awk -F'"' '{print $4}')
-    
+
     if [ -n "$ELASTICSEARCH_API_KEY" ]; then
         # Export to environment for docker-compose
         export ELASTICSEARCH_API_KEY
@@ -413,14 +413,14 @@ generate_env_for_infrastructure() {
         echo "❌ ERROR generate_env.sh not found in docker directory"
         return 1
     fi
-    
+
     # Make sure the script is executable and run it
     chmod +x generate_env.sh
     if ./generate_env.sh; then
         echo "--------------------------------"
         echo ""
         echo "✅ Environment file generated successfully for infrastructure mode!"
-        
+
         # Source the generated .env file to make variables available
         if [ -f "../.env" ]; then
             echo "📁 Sourcing generated .env file..."
@@ -436,7 +436,7 @@ generate_env_for_infrastructure() {
         echo "❌ ERROR Failed to generate environment file"
         return 1
     fi
-    
+
     echo ""
     echo "--------------------------------"
     echo ""
@@ -614,45 +614,45 @@ echo ""
 main_deploy() {
   # Start deployment
   select_deployment_mode || { echo "❌ Deployment mode selection failed"; exit 1; }
-  
+
   # Special handling for infrastructure mode
   if [ "$DEPLOYMENT_MODE" = "infrastructure" ]; then
     echo "🏗️  Infrastructure mode detected - preparing infrastructure services..."
-    
+
     # Set up basic environment and permissions first
     add_permission || { echo "❌ Permission setup failed"; exit 1; }
-    
+
     # Choose image environment (required for Docker images)
     echo "🌐 Selecting image environment for infrastructure services..."
     choose_image_env || { echo "❌ Image environment setup failed"; exit 1; }
-    
+
     # Generate MinIO keys first to avoid docker-compose warnings
     echo "🔑 Pre-generating MinIO keys to avoid docker-compose warnings..."
     generate_minio_ak_sk || { echo "❌ MinIO key generation failed"; exit 1; }
-    
+
     # Export MinIO keys to current environment for docker-compose
     export MINIO_ACCESS_KEY
     export MINIO_SECRET_KEY
-    
+
     # Start infrastructure services (basic services only)
     echo "🔧 Starting infrastructure services..."
     INFRA_SERVICES="nexent-elasticsearch nexent-postgresql nexent-minio redis"
     if ! docker-compose -p nexent -f "${COMPOSE_FILE}" up -d $INFRA_SERVICES; then
       echo "❌ ERROR Failed to start infrastructure services"
       exit 1
     fi
-    
+
     # Wait for services to be healthy, then generate complete environment
     echo "🔑 Generating complete environment file with all keys..."
     generate_env_for_infrastructure || { echo "❌ Environment generation failed"; exit 1; }
-    
+
     echo "🎉  Infrastructure deployment completed successfully!"
     echo "📦  You can now start the core services manually using dev containers"
     echo "📁  Environment file available at: $(cd .. && pwd)/.env"
     echo "💡  Use 'source .env' to load environment variables in your development shell"
     return 0
   fi
-  
+
   # Normal deployment flow for other modes
   select_terminal_tool || { echo "❌ Terminal tool configuration failed"; exit 1; }
   add_permission || { echo "❌ Permission setup failed"; exit 1; }
 
@@ -165,6 +165,7 @@ CREATE TABLE IF NOT EXISTS "model_record_t" (
   "used_token" int4,
   "display_name" varchar(100) COLLATE "pg_catalog"."default",
   "connect_status" varchar(100) COLLATE "pg_catalog"."default",
+  "is_deep_thinking" BOOLEAN DEFAULT FALSE,
   "create_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
   "delete_flag" varchar(1) COLLATE "pg_catalog"."default" DEFAULT 'N'::character varying,
   "update_time" timestamp(0) DEFAULT CURRENT_TIMESTAMP,
 
@@ -0,0 +1,3 @@
+ALTER TABLE nexent.model_record_t
+ADD COLUMN is_deep_thinking BOOLEAN DEFAULT FALSE;
+COMMENT ON COLUMN nexent.model_record_t.is_deep_thinking IS 'deep thinking switch, true=open, false=close';
@@ -164,8 +164,7 @@ export const handleStreamResponse = async (
                   break;
 
                 case "model_output_thinking":
-                  // Process thinking content
-                  // If there's no currentStep, create one
+                  // Merge consecutive thinking chunks; create new group only when previous subType is not "thinking"
                   if (!currentStep) {
                     currentStep = {
                       id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
@@ -180,34 +179,69 @@ export const handleStreamResponse = async (
                     };
                   }
 
-                  // Ensure contents exists
-                  currentContentText = messageContent;
+                  const shouldAppendThinking =
+                    lastContentType === "model_output" &&
+                    lastModelOutputIndex >= 0 &&
+                    currentStep.contents[lastModelOutputIndex] &&
+                    currentStep.contents[lastModelOutputIndex].subType === "thinking";
 
-                  // If the last streaming output is thinking content, append
-                  if (lastContentType === "model_output" && lastModelOutputIndex >= 0) {
-                    const modelOutput = currentStep.contents[lastModelOutputIndex];
-                    // Update content directly without prefix check
-                    let newContent = modelOutput.content + messageContent;
-                    // Remove "思考：" prefix if present
-                    const thinkingPrefix = t('chatStreamHandler.thinkingPrefix');
-                    if (newContent.startsWith(thinkingPrefix)) {
-                      newContent = newContent.substring(thinkingPrefix.length);
-                    }
-                    modelOutput.content = newContent;
+                  if (shouldAppendThinking) {
+                    // Append to existing thinking content
+                    currentStep.contents[lastModelOutputIndex].content += messageContent;
                   } else {
-                    // Otherwise, create new thinking content
+                    // Create a new thinking content group
                     currentStep.contents.push({
-                      id: `model-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
+                      id: `thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
                       type: "model_output",
                       subType: "thinking",
-                      content: currentContentText,
+                      content: messageContent,
+                      expanded: true,
+                      timestamp: Date.now()
+                    });
+                    lastModelOutputIndex = currentStep.contents.length - 1;
+                  }
+
+                  lastContentType = "model_output";
+                  break;
+
+                case "model_output_deep_thinking":
+                  // Consecutive deep_thinking chunks should be combined until a thinking chunk arrives
+                  if (!currentStep) {
+                    currentStep = {
+                      id: `step-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`,
+                      title: "AI Thinking",
+                      content: "",
+                      expanded: true,
+                      contents: [],
+                      metrics: "",
+                      thinking: { content: "", expanded: true },
+                      code: { content: "", expanded: true },
+                      output: { content: "", expanded: true }
+                    };
+                  }
+
+                  const shouldAppendDeep =
+                    lastContentType === "model_output" &&
+                    lastModelOutputIndex >= 0 &&
+                    currentStep.contents[lastModelOutputIndex] &&
+                    currentStep.contents[lastModelOutputIndex].subType === "deep_thinking";
+
+                  if (shouldAppendDeep) {
+                    // Append to existing deep_thinking content
+                    currentStep.contents[lastModelOutputIndex].content += messageContent;
+                  } else {
+                    // Create a new deep_thinking content group
+                    currentStep.contents.push({
+                      id: `deep-thinking-${Date.now()}-${Math.random().toString(36).substring(2, 7)}`,
+                      type: "model_output",
+                      subType: "deep_thinking",
+                      content: messageContent,
                       expanded: true,
                       timestamp: Date.now()
                     });
                     lastModelOutputIndex = currentStep.contents.length - 1;
                   }
 
-                  // Update the last processed content type
                   lastContentType = "model_output";
                   break;
 
 
@@ -143,6 +143,7 @@ export function ChatStreamMain({
               step.contents.forEach((content: any) => {
                 const taskMsg = {
                   type: content.type,
+                  subType: content.subType, // Preserve subType for styling (e.g., deep_thinking)
                   content: content.content,
                   id: content.id,
                   assistantId: message.id,
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+ALTER TABLE nexent.model_record_t`
	`2`	`+ADD COLUMN is_deep_thinking BOOLEAN DEFAULT FALSE;`
	`3`	`+COMMENT ON COLUMN nexent.model_record_t.is_deep_thinking IS 'deep thinking switch, true=open, false=close';`