feat: send include_reasoning: false to suppress think tags at source

SkylarKelty · claude · SkylarKelty · commit e6307ed30175 · 2026-03-11T14:19:06.000Z
Backends like sglang and vllm honour this flag and won't emit
&lt;think&gt; blocks in the response content. We still strip them
client-side as a fallback for backends that ignore the flag.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/artemis/llm.py b/artemis/llm.py
@@ -327,6 +327,9 @@ async def chat_completion(
         body["tool_choice"] = "none"
     if response_format is not None:
         body["response_format"] = response_format
+    # Ask the backend to suppress <think> reasoning blocks if supported
+    # (works with sglang, vllm, and other OpenAI-compatible backends)
+    body["include_reasoning"] = False
 
     try:
         response = await client.post(