|
| 1 | +import unittest |
| 2 | + |
| 3 | +from types import SimpleNamespace |
| 4 | +from unittest.mock import MagicMock |
| 5 | + |
| 6 | +from memos.configs.llm import QwenLLMConfig |
| 7 | +from memos.llms.qwen import QwenLLM |
| 8 | + |
| 9 | + |
| 10 | +class TestQwenLLM(unittest.TestCase): |
| 11 | + def test_qwen_llm_generate_with_and_without_think_prefix(self): |
| 12 | + """Test QwenLLM non-streaming response generation with and without <think> prefix removal.""" |
| 13 | + |
| 14 | + # Simulated full response content with <think> tag |
| 15 | + full_content = "<think>Analyzing your request...</think>Hello, world!" |
| 16 | + |
| 17 | + # Prepare the mock response object with expected structure |
| 18 | + mock_response = MagicMock() |
| 19 | + mock_response.model_dump_json.return_value = '{"mocked": "true"}' |
| 20 | + mock_response.choices[0].message.content = full_content |
| 21 | + |
| 22 | + # Create config with remove_think_prefix = False |
| 23 | + config_with_think = QwenLLMConfig.model_validate( |
| 24 | + { |
| 25 | + "model_name_or_path": "qwen-test", |
| 26 | + "temperature": 0.7, |
| 27 | + "max_tokens": 100, |
| 28 | + "top_p": 0.9, |
| 29 | + "api_key": "sk-test", |
| 30 | + "api_base": "https://dashscope.aliyuncs.com/api/v1", |
| 31 | + "remove_think_prefix": False, |
| 32 | + } |
| 33 | + ) |
| 34 | + |
| 35 | + # Instance with think tag enabled |
| 36 | + llm_with_think = QwenLLM(config_with_think) |
| 37 | + llm_with_think.client.chat.completions.create = MagicMock(return_value=mock_response) |
| 38 | + |
| 39 | + response_with_think = llm_with_think.generate([{"role": "user", "content": "Hi"}]) |
| 40 | + self.assertEqual(response_with_think, full_content) |
| 41 | + |
| 42 | + # Create config with remove_think_prefix = True |
| 43 | + config_without_think = config_with_think.copy(update={"remove_think_prefix": True}) |
| 44 | + |
| 45 | + # Instance with think tag removed |
| 46 | + llm_without_think = QwenLLM(config_without_think) |
| 47 | + llm_without_think.client.chat.completions.create = MagicMock(return_value=mock_response) |
| 48 | + |
| 49 | + response_without_think = llm_without_think.generate([{"role": "user", "content": "Hi"}]) |
| 50 | + self.assertEqual(response_without_think, "Hello, world!") |
| 51 | + self.assertNotIn("<think>", response_without_think) |
| 52 | + |
| 53 | + def test_qwen_llm_generate_stream(self): |
| 54 | + """Test QwenLLM stream generation with both reasoning_content and content.""" |
| 55 | + |
| 56 | + def make_chunk(delta_dict): |
| 57 | + # Construct a mock chunk with delta fields |
| 58 | + delta = SimpleNamespace(**delta_dict) |
| 59 | + choice = SimpleNamespace(delta=delta) |
| 60 | + return SimpleNamespace(choices=[choice]) |
| 61 | + |
| 62 | + # Simulate a sequence of streamed chunks |
| 63 | + mock_stream_chunks = [ |
| 64 | + make_chunk({"reasoning_content": "Analyzing input..."}), |
| 65 | + make_chunk({"content": "Hello"}), |
| 66 | + make_chunk({"content": ", "}), |
| 67 | + make_chunk({"content": "world!"}), |
| 68 | + ] |
| 69 | + |
| 70 | + # Mock the client's streaming response |
| 71 | + mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks)) |
| 72 | + |
| 73 | + # Build QwenLLM config with think prefix enabled |
| 74 | + config = QwenLLMConfig.model_validate( |
| 75 | + { |
| 76 | + "model_name_or_path": "qwen-test", |
| 77 | + "temperature": 0.7, |
| 78 | + "max_tokens": 100, |
| 79 | + "top_p": 0.9, |
| 80 | + "api_key": "sk-test", |
| 81 | + "api_base": "https://dashscope.aliyuncs.com/api/v1", |
| 82 | + "remove_think_prefix": False, |
| 83 | + } |
| 84 | + ) |
| 85 | + |
| 86 | + # Create QwenLLM instance and inject mock client |
| 87 | + llm = QwenLLM(config) |
| 88 | + llm.client.chat.completions.create = mock_chat_completions_create |
| 89 | + |
| 90 | + messages = [{"role": "user", "content": "Say hello"}] |
| 91 | + |
| 92 | + # Collect the streamed output |
| 93 | + response_parts = list(llm.generate_stream(messages)) |
| 94 | + response = "".join(response_parts) |
| 95 | + |
| 96 | + # Assertions for structure and content |
| 97 | + self.assertIn("<think>", response) |
| 98 | + self.assertIn("Analyzing input...", response) |
| 99 | + self.assertIn("Hello, world!", response) |
| 100 | + self.assertTrue(response.startswith("<think>Analyzing input...")) |
| 101 | + self.assertTrue(response.endswith("Hello, world!")) |
0 commit comments