improve token budget

SamSaffron · SamSaffron · commit 4d366388386d · 2025-03-03T14:48:39.000+11:00
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
@@ -34,13 +34,15 @@ def default_options(dialect)
 
           # Note: Anthropic requires this param
           max_tokens = 4096
-          max_tokens = 8192 if mapped_model.match?(/3.5/)
+          # 3.5 and 3.7 models have a higher token limit
+          max_tokens = 8192 if mapped_model.match?(/3.[57]/)
 
           options = { model: mapped_model, max_tokens: max_tokens }
 
+          # reasoning has even higher token limits
           if llm_model.lookup_custom_param("enable_reasoning")
             reasoning_tokens =
-              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
+              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
 
             # this allows for lots of tokens beyond reasoning
             options[:max_tokens] = reasoning_tokens + 30_000
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
@@ -24,12 +24,14 @@ def default_options(dialect)
           options =
             if dialect.is_a?(DiscourseAi::Completions::Dialects::Claude)
               max_tokens = 4096
-              max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
+              max_tokens = 8192 if bedrock_model_id.match?(/3.[57]/)
 
               result = { anthropic_version: "bedrock-2023-05-31" }
               if llm_model.lookup_custom_param("enable_reasoning")
+                # we require special headers to go over 64k output tokens, lets
+                # wait for feature requests before enabling this
                 reasoning_tokens =
-                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
+                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
 
                 # this allows for ample tokens beyond reasoning
                 max_tokens = reasoning_tokens + 30_000