Fix Ollama thinking mode with /nothink chat message

clucraft · claude · clucraft · commit f1a055c3b9fa · 2026-01-25T19:40:02.000-05:00
Send /nothink as a separate message before the actual prompt,
with a mock assistant response. This properly disables thinking
mode for Qwen3/DeepSeek models.

Removed the ineffective think:false API parameter.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/backend/src/services/ai-extractor.ts b/backend/src/services/ai-extractor.ts
@@ -286,13 +286,20 @@ async function extractWithOllama(
     {
       model: model,
       messages: [
+        {
+          role: 'user',
+          content: '/nothink', // Disable thinking mode for Qwen3/DeepSeek
+        },
+        {
+          role: 'assistant',
+          content: 'Ok.',
+        },
         {
           role: 'user',
           content: EXTRACTION_PROMPT + preparedHtml,
         },
       ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -388,9 +395,12 @@ async function verifyWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -484,9 +494,12 @@ async function verifyStockStatusWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },
@@ -943,9 +956,12 @@ async function arbitrateWithOllama(
     `${baseUrl}/api/chat`,
     {
       model: model,
-      messages: [{ role: 'user', content: prompt }],
+      messages: [
+        { role: 'user', content: '/nothink' },
+        { role: 'assistant', content: 'Ok.' },
+        { role: 'user', content: prompt },
+      ],
       stream: false,
-      think: false, // Disable thinking mode for Qwen3/DeepSeek models
       options: {
         num_ctx: 16384, // Increase context window for large HTML content
       },