Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 4d36638

Browse files
committed
improve token budget
1 parent a316050 commit 4d36638

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

lib/completions/endpoints/anthropic.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@ def default_options(dialect)
3434

3535
# Note: Anthropic requires this param
3636
max_tokens = 4096
37-
max_tokens = 8192 if mapped_model.match?(/3.5/)
37+
# 3.5 and 3.7 models have a higher token limit
38+
max_tokens = 8192 if mapped_model.match?(/3.[57]/)
3839

3940
options = { model: mapped_model, max_tokens: max_tokens }
4041

42+
# reasoning has even higher token limits
4143
if llm_model.lookup_custom_param("enable_reasoning")
4244
reasoning_tokens =
43-
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
45+
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
4446

4547
# this allows for lots of tokens beyond reasoning
4648
options[:max_tokens] = reasoning_tokens + 30_000

lib/completions/endpoints/aws_bedrock.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ def default_options(dialect)
2424
options =
2525
if dialect.is_a?(DiscourseAi::Completions::Dialects::Claude)
2626
max_tokens = 4096
27-
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
27+
max_tokens = 8192 if bedrock_model_id.match?(/3.[57]/)
2828

2929
result = { anthropic_version: "bedrock-2023-05-31" }
3030
if llm_model.lookup_custom_param("enable_reasoning")
31+
# we require special headers to go over 64k output tokens, lets
32+
# wait for feature requests before enabling this
3133
reasoning_tokens =
32-
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 65_536)
34+
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
3335

3436
# this allows for ample tokens beyond reasoning
3537
max_tokens = reasoning_tokens + 30_000

0 commit comments

Comments
 (0)