feat: remove think for reason model

fridayL · fridayL · commit 31bee6a15626 · 2025-09-01T15:59:32.000+08:00
diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py
@@ -105,6 +105,7 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:
                 "temperature": float(getattr(self.config, "temperature", 0.8)),
                 "max_tokens": int(getattr(self.config, "max_tokens", 1024)),
                 "top_p": float(getattr(self.config, "top_p", 0.9)),
+                "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
             }
 
             response = self.client.chat.completions.create(**completion_kwargs)
@@ -142,6 +143,7 @@ def generate_stream(self, messages: list[MessageDict]):
                 "max_tokens": int(getattr(self.config, "max_tokens", 1024)),
                 "top_p": float(getattr(self.config, "top_p", 0.9)),
                 "stream": True,  # Enable streaming
+                "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
             }
 
             stream = self.client.chat.completions.create(**completion_kwargs)

Original file line number	Diff line number	Diff line change
`@@ -105,6 +105,7 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str:`
`105`	`105`	`"temperature": float(getattr(self.config, "temperature", 0.8)),`
`106`	`106`	`"max_tokens": int(getattr(self.config, "max_tokens", 1024)),`
`107`	`107`	`"top_p": float(getattr(self.config, "top_p", 0.9)),`
	`108`	`+ "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},`
`108`	`109`	`}`
`109`	`110`
`110`	`111`	`response = self.client.chat.completions.create(**completion_kwargs)`
`@@ -142,6 +143,7 @@ def generate_stream(self, messages: list[MessageDict]):`
`142`	`143`	`"max_tokens": int(getattr(self.config, "max_tokens", 1024)),`
`143`	`144`	`"top_p": float(getattr(self.config, "top_p", 0.9)),`
`144`	`145`	`"stream": True, # Enable streaming`
	`146`	`+ "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},`
`145`	`147`	`}`
`146`	`148`
`147`	`149`	`stream = self.client.chat.completions.create(**completion_kwargs)`