feat(server): force full thinking budget for Opus on all routes

FammasMaz · FammasMaz · commit 060d272c810e · 2025-12-20T22:44:48.000+01:00
diff --git a/src/proxy_app/main.py b/src/proxy_app/main.py
@@ -926,6 +926,14 @@ async def chat_completions(
             "custom_reasoning_budget"
         ) or generation_cfg.get("custom_reasoning_budget", False)
 
+        # Auto-enable full thinking budget for Opus models
+        # This ensures Opus always gets maximum thinking capacity (no // 4 reduction)
+        if model and "opus" in model.lower():
+            if not reasoning_effort:
+                request_data["reasoning_effort"] = "high"
+            if not custom_reasoning_budget:
+                request_data["custom_reasoning_budget"] = True
+
         logging.getLogger("rotator_library").debug(
             f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
         )
diff --git a/src/rotator_library/anthropic_compat/translator.py b/src/rotator_library/anthropic_compat/translator.py
@@ -370,11 +370,9 @@ def translate_anthropic_request(request: AnthropicMessagesRequest) -> Dict[str,
             openai_request["reasoning_effort"] = "disable"
     elif _is_opus_model(request.model):
         # Enable thinking for Opus models when no thinking config is provided
-        # Use "high" effort but NOT custom_reasoning_budget, so // 4 applies
-        # This gives 8192 thinking tokens (32768 // 4) which is reasonable for most tasks
-        # Users who want full capacity can explicitly set thinking.budget_tokens >= 32000
+        # Always use full thinking capacity for Opus (no // 4 reduction)
         openai_request["reasoning_effort"] = "high"
-        # Note: NOT setting custom_reasoning_budget here to conserve tokens
+        openai_request["custom_reasoning_budget"] = True
 
     return openai_request