test: add unit test for llms

CaralHsi · CaralHsi · commit e268b8233b28 · 2025-07-17T19:28:05.000+08:00
diff --git a/tests/llms/test_deepseek.py b/tests/llms/test_deepseek.py
@@ -0,0 +1,88 @@
+import unittest
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from memos.configs.llm import DeepSeekLLMConfig
+from memos.llms.deepseek import DeepSeekLLM
+
+
+class TestDeepSeekLLM(unittest.TestCase):
+    def test_deepseek_llm_generate_with_and_without_think_prefix(self):
+        """Test DeepSeekLLM generate method with and without <think> tag removal."""
+
+        # Simulated full content including <think> tag
+        full_content = "<think>Thinking in progress...</think>Hello from DeepSeek!"
+
+        # Mock response object
+        mock_response = MagicMock()
+        mock_response.model_dump_json.return_value = '{"mock": "true"}'
+        mock_response.choices[0].message.content = full_content
+
+        # Config with think prefix preserved
+        config_with_think = DeepSeekLLMConfig.model_validate(
+            {
+                "model_name_or_path": "deepseek-chat",
+                "temperature": 0.7,
+                "max_tokens": 512,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://api.deepseek.com/v1",
+                "remove_think_prefix": False,
+            }
+        )
+        llm_with_think = DeepSeekLLM(config_with_think)
+        llm_with_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        output_with_think = llm_with_think.generate([{"role": "user", "content": "Hello"}])
+        self.assertEqual(output_with_think, full_content)
+
+        # Config with think tag removed
+        config_without_think = config_with_think.copy(update={"remove_think_prefix": True})
+        llm_without_think = DeepSeekLLM(config_without_think)
+        llm_without_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        output_without_think = llm_without_think.generate([{"role": "user", "content": "Hello"}])
+        self.assertEqual(output_without_think, "Hello from DeepSeek!")
+
+    def test_deepseek_llm_generate_stream(self):
+        """Test DeepSeekLLM generate_stream with reasoning_content and content chunks."""
+
+        def make_chunk(delta_dict):
+            # Create a simulated stream chunk with delta fields
+            delta = SimpleNamespace(**delta_dict)
+            choice = SimpleNamespace(delta=delta)
+            return SimpleNamespace(choices=[choice])
+
+        # Simulate chunks: reasoning + answer
+        mock_stream_chunks = [
+            make_chunk({"reasoning_content": "Analyzing..."}),
+            make_chunk({"content": "Hello"}),
+            make_chunk({"content": ", "}),
+            make_chunk({"content": "DeepSeek!"}),
+        ]
+
+        mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks))
+
+        config = DeepSeekLLMConfig.model_validate(
+            {
+                "model_name_or_path": "deepseek-chat",
+                "temperature": 0.7,
+                "max_tokens": 512,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://api.deepseek.com/v1",
+                "remove_think_prefix": False,
+            }
+        )
+        llm = DeepSeekLLM(config)
+        llm.client.chat.completions.create = mock_chat_completions_create
+
+        messages = [{"role": "user", "content": "Say hello"}]
+        streamed = list(llm.generate_stream(messages))
+        full_output = "".join(streamed)
+
+        self.assertIn("Analyzing...", full_output)
+        self.assertIn("Hello, DeepSeek!", full_output)
+        self.assertTrue(full_output.startswith("Analyzing..."))
+        self.assertTrue(full_output.endswith("DeepSeek!"))
diff --git a/tests/llms/test_openai.py b/tests/llms/test_openai.py
@@ -1,5 +1,6 @@
 import unittest
 
+from types import SimpleNamespace
 from unittest.mock import MagicMock
 
 from memos.configs.llm import LLMConfigFactory
@@ -40,3 +41,62 @@ def test_llm_factory_with_mocked_openai_backend(self):
             response,
             "Hello! I'm an AI language model created by OpenAI. I'm here to help answer questions, provide information, and assist with a wide range of topics. How can I assist you today?",
         )
+
+    def test_llm_factory_with_stream_openai_backend(self):
+        """Test LLMFactory stream generation with mocked OpenAI backend."""
+
+        def make_chunk(delta_dict):
+            # Create a mock response chunk with a simulated delta dictionary
+            delta = SimpleNamespace(**delta_dict)
+            choice = SimpleNamespace(delta=delta, finish_reason="stop", index=0)
+            return SimpleNamespace(choices=[choice])
+
+        # Simulate a stream response with both reasoning_content and content
+        mock_stream_chunks = [
+            make_chunk({"reasoning_content": "I am thinking"}),
+            make_chunk({"content": "Hello"}),
+            make_chunk({"content": ", "}),
+            make_chunk({"content": "world!"}),
+        ]
+
+        # Mock the streaming chat completion call
+        mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks))
+
+        # Create the LLM config with think prefix enabled
+        config = LLMConfigFactory.model_validate(
+            {
+                "backend": "openai",
+                "config": {
+                    "model_name_or_path": "gpt-4.1-nano",
+                    "temperature": 0.8,
+                    "max_tokens": 1024,
+                    "top_p": 0.9,
+                    "top_k": 50,
+                    "api_key": "sk-xxxx",
+                    "api_base": "https://api.openai.com/v1",
+                    "remove_think_prefix": False,
+                    # Ensure <think> tag is emitted
+                },
+            }
+        )
+
+        # Instantiate the LLM and inject the mocked stream method
+        llm = LLMFactory.from_config(config)
+        llm.client.chat.completions.create = mock_chat_completions_create
+
+        # Input message to the model
+        messages = [{"role": "user", "content": "Think and say hello"}]
+
+        # Collect streamed output as a list of chunks
+        response_parts = list(llm.generate_stream(messages))
+        response = "".join(response_parts)
+
+        # Assert the presence of the <think> tag and expected content
+        self.assertIn("<think>", response)
+        self.assertIn("I am thinking", response)
+        self.assertIn("Hello, world!", response)
+
+        # Optional: check structure of stream response
+        self.assertEqual(response_parts[0], "<think>")
+        self.assertTrue(response.startswith("<think>I am thinking"))
+        self.assertTrue(response.endswith("Hello, world!"))
diff --git a/tests/llms/test_qwen.py b/tests/llms/test_qwen.py
@@ -0,0 +1,101 @@
+import unittest
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from memos.configs.llm import QwenLLMConfig
+from memos.llms.qwen import QwenLLM
+
+
+class TestQwenLLM(unittest.TestCase):
+    def test_qwen_llm_generate_with_and_without_think_prefix(self):
+        """Test QwenLLM non-streaming response generation with and without <think> prefix removal."""
+
+        # Simulated full response content with <think> tag
+        full_content = "<think>Analyzing your request...</think>Hello, world!"
+
+        # Prepare the mock response object with expected structure
+        mock_response = MagicMock()
+        mock_response.model_dump_json.return_value = '{"mocked": "true"}'
+        mock_response.choices[0].message.content = full_content
+
+        # Create config with remove_think_prefix = False
+        config_with_think = QwenLLMConfig.model_validate(
+            {
+                "model_name_or_path": "qwen-test",
+                "temperature": 0.7,
+                "max_tokens": 100,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://dashscope.aliyuncs.com/api/v1",
+                "remove_think_prefix": False,
+            }
+        )
+
+        # Instance with think tag enabled
+        llm_with_think = QwenLLM(config_with_think)
+        llm_with_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        response_with_think = llm_with_think.generate([{"role": "user", "content": "Hi"}])
+        self.assertEqual(response_with_think, full_content)
+
+        # Create config with remove_think_prefix = True
+        config_without_think = config_with_think.copy(update={"remove_think_prefix": True})
+
+        # Instance with think tag removed
+        llm_without_think = QwenLLM(config_without_think)
+        llm_without_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        response_without_think = llm_without_think.generate([{"role": "user", "content": "Hi"}])
+        self.assertEqual(response_without_think, "Hello, world!")
+        self.assertNotIn("<think>", response_without_think)
+
+    def test_qwen_llm_generate_stream(self):
+        """Test QwenLLM stream generation with both reasoning_content and content."""
+
+        def make_chunk(delta_dict):
+            # Construct a mock chunk with delta fields
+            delta = SimpleNamespace(**delta_dict)
+            choice = SimpleNamespace(delta=delta)
+            return SimpleNamespace(choices=[choice])
+
+        # Simulate a sequence of streamed chunks
+        mock_stream_chunks = [
+            make_chunk({"reasoning_content": "Analyzing input..."}),
+            make_chunk({"content": "Hello"}),
+            make_chunk({"content": ", "}),
+            make_chunk({"content": "world!"}),
+        ]
+
+        # Mock the client's streaming response
+        mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks))
+
+        # Build QwenLLM config with think prefix enabled
+        config = QwenLLMConfig.model_validate(
+            {
+                "model_name_or_path": "qwen-test",
+                "temperature": 0.7,
+                "max_tokens": 100,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://dashscope.aliyuncs.com/api/v1",
+                "remove_think_prefix": False,
+            }
+        )
+
+        # Create QwenLLM instance and inject mock client
+        llm = QwenLLM(config)
+        llm.client.chat.completions.create = mock_chat_completions_create
+
+        messages = [{"role": "user", "content": "Say hello"}]
+
+        # Collect the streamed output
+        response_parts = list(llm.generate_stream(messages))
+        response = "".join(response_parts)
+
+        # Assertions for structure and content
+        self.assertIn("<think>", response)
+        self.assertIn("Analyzing input...", response)
+        self.assertIn("Hello, world!", response)
+        self.assertTrue(response.startswith("<think>Analyzing input..."))
+        self.assertTrue(response.endswith("Hello, world!"))