fix(antigravity): reject requests exceeding Claude's 64K max_tokens limit

FammasMaz · FammasMaz · commit 5a8258ca891a · 2026-01-05T15:48:57.000+01:00
Instead of silently capping max_tokens, raise a ValueError so Claude Code
sees the error and can adjust its request. Fixes 400 INVALID_ARGUMENT errors
when clients send max_tokens &gt; 64000 for Claude models.
diff --git a/src/rotator_library/providers/antigravity_provider.py b/src/rotator_library/providers/antigravity_provider.py
@@ -132,6 +132,10 @@ def _env_int(key: str, default: int) -> int:
 # See: https://ai.google.dev/gemini-api/docs/models
 GEMINI_MAX_OUTPUT_TOKENS = 16384
 
+# Claude max output tokens cap - Claude models have a 64K output limit
+# See: https://docs.anthropic.com/en/docs/about-claude/models
+CLAUDE_MAX_OUTPUT_TOKENS = 64000
+
 # Empty response retry configuration
 # When Antigravity returns an empty response (no content, no tool calls),
 # automatically retry up to this many attempts before giving up (minimum 1)
@@ -4073,6 +4077,16 @@ def _transform_to_antigravity_format(
                 )
                 gen_config["maxOutputTokens"] = GEMINI_MAX_OUTPUT_TOKENS
 
+        # Reject requests that exceed Claude's max_tokens limit (64K)
+        # Let the client see the error so it can adjust its request
+        if is_claude and gen_config.get("maxOutputTokens"):
+            current_max = gen_config["maxOutputTokens"]
+            if current_max > CLAUDE_MAX_OUTPUT_TOKENS:
+                raise ValueError(
+                    f"max_tokens: {current_max} > {CLAUDE_MAX_OUTPUT_TOKENS}, "
+                    f"which is the maximum allowed number of output tokens for {model}"
+                )
+
         antigravity_payload["request"]["generationConfig"] = gen_config
 
         # Set toolConfig based on tool_choice parameter