✨ Add adapt to deep thinking model.

jiangpeiling · jiangpeiling · commit 99cf9b200b22 · 2025-08-06T15:13:50.000+08:00
diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py
@@ -34,7 +34,7 @@ async def create_model_config_list(tenant_id):
                         model_name=get_model_name_from_config(sub_model_config) if sub_model_config.get(
                             "model_name") else "",
                         url=sub_model_config.get("base_url", ""),
-                        is_deep_thinking=main_model_config.get("is_deep_thinking", False))]
+                        is_deep_thinking=sub_model_config.get("is_deep_thinking", False))]
 
 
 async def create_agent_config(agent_id, tenant_id, user_id, language: str = 'zh'):
diff --git a/backend/utils/str_utils.py b/backend/utils/str_utils.py
@@ -15,5 +15,7 @@ def remove_think_tags(text: str) -> str:
 
 
 def add_no_think_token(messages: List[dict]):
-    if messages[-1]["role"] == "user":
+    if not messages:
+        return
+    if messages[-1]["role"] == "user" and "content" in messages[-1]:
         messages[-1]["content"] += " /no_think"
diff --git a/sdk/nexent/core/agents/nexent_agent.py b/sdk/nexent/core/agents/nexent_agent.py
@@ -68,18 +68,15 @@ def __init__(self, observer: MessageObserver,
 
     def create_model(self, model_cite_name: str):
         """create a model instance"""
-        try:
-            # Filter out None values and find matching model config
-            model_config = next(
-                (model_config for model_config in self.model_config_list 
-                 if model_config is not None and model_config.cite_name == model_cite_name),
-                None
-            )
-            if model_config is None:
-                raise ValueError(f"Model {model_cite_name} not found")
-            return ModelFactory.create_model(model_config, self.observer, self.stop_event)
-        except StopIteration:
+        # Filter out None values and find matching model config
+        model_config = next(
+            (model_config for model_config in self.model_config_list
+             if model_config is not None and model_config.cite_name == model_cite_name),
+            None
+        )
+        if model_config is None:
             raise ValueError(f"Model {model_cite_name} not found")
+        return ModelFactory.create_model(model_config, self.observer, self.stop_event)
 
     def create_local_tool(self, tool_config: ToolConfig):
         class_name = tool_config.class_name
diff --git a/sdk/nexent/core/models/openai_deep_thinking_llm.py b/sdk/nexent/core/models/openai_deep_thinking_llm.py
@@ -49,7 +49,7 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List
                     new_token = self.process_token(new_token)
                     
                     # If in think block, process as deep thinking content
-                    if self.observer.in_think_block and new_token:
+                    if self.observer.in_think_block:
                         self.observer.message_query.append(
                             Message(ProcessType.MODEL_OUTPUT_DEEP_THINKING, new_token).to_json()
                         )
diff --git a/sdk/nexent/core/models/openai_llm.py b/sdk/nexent/core/models/openai_llm.py
@@ -23,8 +23,7 @@ def __init__(self, observer: MessageObserver, temperature=0.2, top_p=0.95, *args
     def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List[str]] = None,
             grammar: Optional[str] = None, tools_to_call_from: Optional[List[Tool]] = None, **kwargs, ) -> ChatMessage:
         try:
-            # 如果启用no_think，添加/no_think后缀到用户最后一条消息
-            if messages[-1]["role"] == "user":
+            if messages and isinstance(messages[-1], dict) and messages[-1].get("role") == "user":
                 messages[-1]["content"][-1]['text'] += " /no_think"
 
             completion_kwargs = self._prepare_completion_kwargs(messages=messages, stop_sequences=stop_sequences,
diff --git a/test/sdk/core/models/test_openai_deep_thinking_llm.py b/test/sdk/core/models/test_openai_deep_thinking_llm.py
@@ -23,6 +23,10 @@ def _prepare_completion_kwargs(self, *args, **kwargs):
         # In tests we will patch this method on the instance directly, so default impl is fine
         return {}
 
+    def postprocess_message(self, message, tools_to_call_from=None):
+        # Return the message as-is for testing
+        return message
+
 
 mock_models_module.OpenAIServerModel = DummyOpenAIServerModel
 mock_models_module.ChatMessage = MagicMock()
@@ -59,6 +63,11 @@ def deep_thinking_model_instance():
         model.top_p = 1.0
         model.custom_role_conversions = {}
 
+        # Create a proper mock for stop_event that returns False by default
+        mock_stop_event = MagicMock()
+        mock_stop_event.is_set.return_value = False
+        model.stop_event = mock_stop_event
+
         # Client hierarchy: client.chat.completions.create
         mock_client = MagicMock()
         mock_chat = MagicMock()
@@ -95,7 +104,7 @@ def test_process_token_with_think_tags(deep_thinking_model_instance):
     @pytest.mark.asyncio
     async def test_call_with_token_limit_error(deep_thinking_model_instance):
         """Test __call__ method handles token limit errors correctly."""
-        messages = [{"role": "user", "content": "test message"}]
+        messages = [{"role": "user", "content": [{"text": "test message"}]}]
 
         # Mock an error response
         deep_thinking_model_instance.client.chat.completions.create.side_effect = Exception("context_length_exceeded")
@@ -110,19 +119,226 @@ async def test_call_with_token_limit_error(deep_thinking_model_instance):
     @pytest.mark.asyncio
     async def test_call_with_stop_event(deep_thinking_model_instance):
         """Test __call__ method handles stop event correctly."""
-        messages = [{"role": "user", "content": "test message"}]
+        messages = [{"role": "user", "content": [{"text": "test message"}]}]
 
         # Set up mock chunks that will be interrupted
         mock_chunks = [
             MagicMock(choices=[MagicMock(delta=MagicMock(content="Start ", role="assistant"))])
         ]
         deep_thinking_model_instance.client.chat.completions.create.return_value = mock_chunks
 
-        # Set the stop event
-        deep_thinking_model_instance.stop_event.set()
+        # Configure the stop event to return True when is_set() is called
+        deep_thinking_model_instance.stop_event.is_set.return_value = True
 
         with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs"), pytest.raises(
                 RuntimeError) as exc_info:
             deep_thinking_model_instance(messages)
 
         assert "Model is interrupted by stop event" in str(exc_info.value)
+
+
+    # ---------------------------------------------------------------------------
+    # Tests for token processing and output generation
+    # ---------------------------------------------------------------------------
+
+    def test_call_normal_operation_with_usage_tracking(deep_thinking_model_instance):
+        """Test __call__ method with normal operation and usage tracking."""
+        messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+
+        # Mock the stream response with usage info
+        mock_chunk1 = MagicMock()
+        mock_chunk1.choices = [MagicMock()]
+        mock_chunk1.choices[0].delta.content = "Hello"
+        mock_chunk1.choices[0].delta.role = "assistant"
+
+        mock_chunk2 = MagicMock()
+        mock_chunk2.choices = [MagicMock()]
+        mock_chunk2.choices[0].delta.content = " world"
+        mock_chunk2.choices[0].delta.role = None
+        mock_chunk2.usage = MagicMock()
+        mock_chunk2.usage.prompt_tokens = 10
+        mock_chunk2.usage.total_tokens = 15
+
+        mock_stream = [mock_chunk1, mock_chunk2]
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            deep_thinking_model_instance.client.chat.completions.create.return_value = mock_stream
+
+            # Call the method
+            result = deep_thinking_model_instance.__call__(messages)
+
+            # Verify observer calls
+            deep_thinking_model_instance.observer.add_model_new_token.assert_any_call("Hello")
+            deep_thinking_model_instance.observer.add_model_new_token.assert_any_call(" world")
+            deep_thinking_model_instance.observer.flush_remaining_tokens.assert_called_once()
+
+            # Verify token counts were set
+            assert deep_thinking_model_instance.last_input_token_count == 10
+            assert deep_thinking_model_instance.last_output_token_count == 15
+
+            # Verify result is a ChatMessage
+            assert isinstance(result, MagicMock)  # Since we're mocking the parent class method
+
+
+    def test_call_with_no_usage_info(deep_thinking_model_instance):
+        """Test __call__ method handles case where usage info is None."""
+        messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+
+        # Mock the stream response with no usage info
+        mock_chunk = MagicMock()
+        mock_chunk.choices = [MagicMock()]
+        mock_chunk.choices[0].delta.content = "Response"
+        mock_chunk.choices[0].delta.role = "assistant"
+        mock_chunk.usage = None
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            deep_thinking_model_instance.client.chat.completions.create.return_value = [mock_chunk]
+
+            # Call the method
+            deep_thinking_model_instance.__call__(messages)
+
+            # Verify token counts are set to 0 when usage is None
+            assert deep_thinking_model_instance.last_input_token_count == 0
+            assert deep_thinking_model_instance.last_output_token_count == 0
+
+
+    def test_call_with_deep_thinking_tokens(deep_thinking_model_instance):
+        """Test __call__ method processes deep thinking tokens correctly."""
+        messages = [{"role": "user", "content": [{"text": "Think about this"}]}]
+
+        # Mock the stream response with think tags
+        mock_chunk1 = MagicMock()
+        mock_chunk1.choices = [MagicMock()]
+        mock_chunk1.choices[0].delta.content = "<think>"
+        mock_chunk1.choices[0].delta.role = "assistant"
+
+        mock_chunk2 = MagicMock()
+        mock_chunk2.choices = [MagicMock()]
+        mock_chunk2.choices[0].delta.content = "deep thinking"
+        mock_chunk2.choices[0].delta.role = None
+
+        mock_chunk3 = MagicMock()
+        mock_chunk3.choices = [MagicMock()]
+        mock_chunk3.choices[0].delta.content = "</think>"
+        mock_chunk3.choices[0].delta.role = None
+
+        mock_chunk4 = MagicMock()
+        mock_chunk4.choices = [MagicMock()]
+        mock_chunk4.choices[0].delta.content = "final answer"
+        mock_chunk4.choices[0].delta.role = None
+        mock_chunk4.usage = MagicMock()
+        mock_chunk4.usage.prompt_tokens = 5
+        mock_chunk4.usage.total_tokens = 8
+
+        mock_stream = [mock_chunk1, mock_chunk2, mock_chunk3, mock_chunk4]
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            deep_thinking_model_instance.client.chat.completions.create.return_value = mock_stream
+
+            # Call the method
+            deep_thinking_model_instance.__call__(messages)
+
+            # Verify that deep thinking tokens were processed correctly
+            # The think tags should be removed and content should be added to message_query
+            assert deep_thinking_model_instance.observer.in_think_block is False  # Should end as False
+            deep_thinking_model_instance.observer.add_model_new_token.assert_any_call("final answer")
+
+
+    def test_call_with_mixed_thinking_and_normal_tokens(deep_thinking_model_instance):
+        """Test __call__ method handles mixed thinking and normal tokens."""
+        messages = [{"role": "user", "content": [{"text": "Mixed content"}]}]
+
+        # Mock the stream response with mixed content
+        mock_chunk1 = MagicMock()
+        mock_chunk1.choices = [MagicMock()]
+        mock_chunk1.choices[0].delta.content = "Normal "
+        mock_chunk1.choices[0].delta.role = "assistant"
+
+        mock_chunk2 = MagicMock()
+        mock_chunk2.choices = [MagicMock()]
+        mock_chunk2.choices[0].delta.content = "<think>thinking"
+        mock_chunk2.choices[0].delta.role = None
+
+        mock_chunk3 = MagicMock()
+        mock_chunk3.choices = [MagicMock()]
+        mock_chunk3.choices[0].delta.content = "</think>"
+        mock_chunk3.choices[0].delta.role = None
+
+        mock_chunk4 = MagicMock()
+        mock_chunk4.choices = [MagicMock()]
+        mock_chunk4.choices[0].delta.content = " more normal"
+        mock_chunk4.choices[0].delta.role = None
+        mock_chunk4.usage = MagicMock()
+        mock_chunk4.usage.prompt_tokens = 8
+        mock_chunk4.usage.total_tokens = 12
+
+        mock_stream = [mock_chunk1, mock_chunk2, mock_chunk3, mock_chunk4]
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            deep_thinking_model_instance.client.chat.completions.create.return_value = mock_stream
+
+            # Call the method
+            deep_thinking_model_instance.__call__(messages)
+
+            # Verify that normal tokens were added to observer
+            deep_thinking_model_instance.observer.add_model_new_token.assert_any_call("Normal ")
+            deep_thinking_model_instance.observer.add_model_new_token.assert_any_call(" more normal")
+
+            # Verify token counts
+            assert deep_thinking_model_instance.last_input_token_count == 8
+            assert deep_thinking_model_instance.last_output_token_count == 12
+
+
+    def test_call_with_null_tokens(deep_thinking_model_instance):
+        """Test __call__ method handles null tokens in stream."""
+        messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+
+        # Mock the stream response with null tokens
+        mock_chunk1 = MagicMock()
+        mock_chunk1.choices = [MagicMock()]
+        mock_chunk1.choices[0].delta.content = None
+        mock_chunk1.choices[0].delta.role = "assistant"
+
+        mock_chunk2 = MagicMock()
+        mock_chunk2.choices = [MagicMock()]
+        mock_chunk2.choices[0].delta.content = "Response"
+        mock_chunk2.choices[0].delta.role = None
+        mock_chunk2.usage = MagicMock()
+        mock_chunk2.usage.prompt_tokens = 5
+        mock_chunk2.usage.total_tokens = 8
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            deep_thinking_model_instance.client.chat.completions.create.return_value = [mock_chunk1, mock_chunk2]
+
+            # Call the method
+            deep_thinking_model_instance.__call__(messages)
+
+            # Verify that null tokens are handled correctly (not added to observer)
+            deep_thinking_model_instance.observer.add_model_new_token.assert_called_once_with("Response")
+
+
+    def test_call_with_general_exception(deep_thinking_model_instance):
+        """Test __call__ method re-raises general exceptions."""
+        messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            # Mock the client to raise a general exception
+            deep_thinking_model_instance.client.chat.completions.create.side_effect = Exception("General error")
+
+            # Call the method and expect the same exception
+            with pytest.raises(Exception, match="General error"):
+                deep_thinking_model_instance.__call__(messages)
+
+
+    def test_call_with_context_length_exceeded_error(deep_thinking_model_instance):
+        """Test __call__ method handles context_length_exceeded error correctly."""
+        messages = [{"role": "user", "content": [{"text": "Hello"}]}]
+
+        with patch.object(deep_thinking_model_instance, "_prepare_completion_kwargs", return_value={}):
+            # Mock the client to raise context length exceeded error
+            deep_thinking_model_instance.client.chat.completions.create.side_effect = Exception(
+                "context_length_exceeded: token limit exceeded")
+
+            # Call the method and expect ValueError
+            with pytest.raises(ValueError, match="Token limit exceeded"):
+                deep_thinking_model_instance.__call__(messages)

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ def __call__(self, messages: List[Dict[str, Any]], stop_sequences: Optional[List`
`49`	`49`	`new_token = self.process_token(new_token)`
`50`	`50`
`51`	`51`	`# If in think block, process as deep thinking content`
`52`		`- if self.observer.in_think_block and new_token:`
	`52`	`+ if self.observer.in_think_block:`
`53`	`53`	`self.observer.message_query.append(`
`54`	`54`	`Message(ProcessType.MODEL_OUTPUT_DEEP_THINKING, new_token).to_json()`
`55`	`55`	`)`