Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 6505006

Browse files
committed
FIX: allow the correct number of tokens for AWS models
1 parent 0d7f353 commit 6505006

File tree

2 files changed

+44
-22
lines changed

2 files changed

+44
-22
lines changed

lib/completions/endpoints/aws_bedrock.rb

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ def normalize_model_params(model_params)
1919
end
2020

2121
def default_options(dialect)
22-
options = { max_tokens: 3_000, anthropic_version: "bedrock-2023-05-31" }
22+
max_tokens = 4096
23+
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
24+
25+
options = { max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
2326

2427
options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
2528
dialect.prompt.has_tools?
@@ -40,6 +43,27 @@ def xml_tags_to_strip(dialect)
4043

4144
private
4245

46+
def bedrock_model_id
47+
case llm_model.name
48+
when "claude-2"
49+
"anthropic.claude-v2:1"
50+
when "claude-3-haiku"
51+
"anthropic.claude-3-haiku-20240307-v1:0"
52+
when "claude-3-sonnet"
53+
"anthropic.claude-3-sonnet-20240229-v1:0"
54+
when "claude-instant-1"
55+
"anthropic.claude-instant-v1"
56+
when "claude-3-opus"
57+
"anthropic.claude-3-opus-20240229-v1:0"
58+
when "claude-3-5-sonnet"
59+
"anthropic.claude-3-5-sonnet-20241022-v2:0"
60+
when "claude-3-5-haiku"
61+
"anthropic.claude-3-5-haiku-20241022-v1:0"
62+
else
63+
llm_model.name
64+
end
65+
end
66+
4367
def prompt_size(prompt)
4468
# approximation
4569
tokenizer.size(prompt.system_prompt.to_s + " " + prompt.messages.to_s)
@@ -48,24 +72,6 @@ def prompt_size(prompt)
4872
def model_uri
4973
region = llm_model.lookup_custom_param("region")
5074

51-
bedrock_model_id =
52-
case llm_model.name
53-
when "claude-2"
54-
"anthropic.claude-v2:1"
55-
when "claude-3-haiku"
56-
"anthropic.claude-3-haiku-20240307-v1:0"
57-
when "claude-3-sonnet"
58-
"anthropic.claude-3-sonnet-20240229-v1:0"
59-
when "claude-instant-1"
60-
"anthropic.claude-instant-v1"
61-
when "claude-3-opus"
62-
"anthropic.claude-3-opus-20240229-v1:0"
63-
when "claude-3-5-sonnet"
64-
"anthropic.claude-3-5-sonnet-20241022-v2:0"
65-
else
66-
llm_model.name
67-
end
68-
6975
if region.blank? || bedrock_model_id.blank?
7076
raise CompletionFailed.new(I18n.t("discourse_ai.llm_models.bedrock_invalid_url"))
7177
end

spec/lib/completions/endpoints/aws_bedrock_spec.rb

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ def encode_message(message)
2626
Aws::EventStream::Encoder.new.encode(aws_message)
2727
end
2828

29+
it "should provide accurate max token count" do
30+
prompt = DiscourseAi::Completions::Prompt.new("hello")
31+
dialect = DiscourseAi::Completions::Dialects::Claude.new(prompt, model)
32+
endpoint = DiscourseAi::Completions::Endpoints::AwsBedrock.new(model)
33+
34+
model.name = "claude-2"
35+
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(4096)
36+
37+
model.name = "claude-3-5-sonnet"
38+
expect(endpoint.default_options(dialect)[:max_tokens]).to eq(8192)
39+
40+
model.name = "claude-3-5-haiku"
41+
options = endpoint.default_options(dialect)
42+
expect(options[:max_tokens]).to eq(8192)
43+
end
44+
2945
describe "function calling" do
3046
it "supports old school xml function calls" do
3147
model.provider_params["disable_native_tools"] = true
@@ -246,7 +262,7 @@ def encode_message(message)
246262
expect(response).to eq(expected_response)
247263

248264
expected = {
249-
"max_tokens" => 3000,
265+
"max_tokens" => 4096,
250266
"anthropic_version" => "bedrock-2023-05-31",
251267
"messages" => [{ "role" => "user", "content" => "what is the weather in sydney" }],
252268
"tools" => [
@@ -305,7 +321,7 @@ def encode_message(message)
305321
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
306322

307323
expected = {
308-
"max_tokens" => 3000,
324+
"max_tokens" => 4096,
309325
"anthropic_version" => "bedrock-2023-05-31",
310326
"messages" => [{ "role" => "user", "content" => "hello world" }],
311327
"system" => "You are a helpful bot",
@@ -354,7 +370,7 @@ def encode_message(message)
354370
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
355371

356372
expected = {
357-
"max_tokens" => 3000,
373+
"max_tokens" => 4096,
358374
"anthropic_version" => "bedrock-2023-05-31",
359375
"messages" => [{ "role" => "user", "content" => "hello world" }],
360376
"system" => "You are a helpful bot",

0 commit comments

Comments
 (0)