Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit fe19133

Browse files
authored
FEATURE: full support for Sonnet 3.7 (#1151)
* FEATURE: full support for Sonnet 3.7 - Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic - Fixes regression where provider params were not populated Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536 * FIX: open ai non reasoning models need to use deprecate max_tokens
1 parent 84e791a commit fe19133

File tree

11 files changed

+184
-15
lines changed

11 files changed

+184
-15
lines changed

app/models/llm_model.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,13 @@ def self.provider_params
2626
access_key_id: :text,
2727
region: :text,
2828
disable_native_tools: :checkbox,
29+
enable_reasoning: :checkbox,
30+
reasoning_tokens: :number,
2931
},
3032
anthropic: {
3133
disable_native_tools: :checkbox,
34+
enable_reasoning: :checkbox,
35+
reasoning_tokens: :number,
3236
},
3337
open_ai: {
3438
organization: :text,

assets/javascripts/discourse/components/ai-llm-editor-form.gjs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
6161
provider: model.provider,
6262
enabled_chat_bot: model.enabled_chat_bot,
6363
vision_enabled: model.vision_enabled,
64-
provider_params: this.computeProviderParams(model.provider),
64+
provider_params: this.computeProviderParams(
65+
model.provider,
66+
model.provider_params
67+
),
6568
llm_quotas: model.llm_quotas,
6669
};
6770
}
@@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
128131
return !this.args.model.isNew;
129132
}
130133

131-
computeProviderParams(provider) {
134+
computeProviderParams(provider, currentParams = {}) {
132135
const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
133136
return Object.fromEntries(
134137
Object.entries(params).map(([k, v]) => [
135138
k,
136-
v?.type === "enum" ? v.default : null,
139+
currentParams[k] ?? (v?.type === "enum" ? v.default : null),
137140
])
138141
);
139142
}

config/locales/client.en.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ en:
390390

391391
model_description:
392392
none: "General settings that work for most language models"
393-
anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model"
393+
anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
394394
anthropic-claude-3-5-haiku: "Fast and cost-effective"
395395
anthropic-claude-3-opus: "Excels at writing and complex tasks"
396396
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
@@ -459,6 +459,8 @@ en:
459459
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
460460
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
461461
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
462+
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
463+
reasoning_tokens: "Number of tokens used for reasoning"
462464

463465
related_topics:
464466
title: "Related topics"

lib/completions/endpoints/anthropic.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ def default_options(dialect)
3838

3939
options = { model: mapped_model, max_tokens: max_tokens }
4040

41+
if llm_model.lookup_custom_param("enable_reasoning")
42+
reasoning_tokens =
43+
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
44+
45+
# this allows for lots of tokens beyond reasoning
46+
options[:max_tokens] = reasoning_tokens + 30_000
47+
options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
48+
end
49+
4150
options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
4251
dialect.prompt.has_tools?
4352

lib/completions/endpoints/aws_bedrock.rb

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,18 @@ def default_options(dialect)
2626
max_tokens = 4096
2727
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
2828

29-
{ max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
29+
result = { anthropic_version: "bedrock-2023-05-31" }
30+
if llm_model.lookup_custom_param("enable_reasoning")
31+
reasoning_tokens =
32+
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
33+
34+
# this allows for ample tokens beyond reasoning
35+
max_tokens = reasoning_tokens + 30_000
36+
result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
37+
end
38+
result[:max_tokens] = max_tokens
39+
40+
result
3041
else
3142
{}
3243
end
@@ -66,6 +77,8 @@ def bedrock_model_id
6677
"anthropic.claude-3-5-sonnet-20241022-v2:0"
6778
when "claude-3-5-haiku"
6879
"anthropic.claude-3-5-haiku-20241022-v1:0"
80+
when "claude-3-7-sonnet"
81+
"anthropic.claude-3-7-sonnet-20250219-v1:0"
6982
else
7083
llm_model.name
7184
end

lib/completions/endpoints/open_ai.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ def self.can_contact?(model_provider)
1111
def normalize_model_params(model_params)
1212
model_params = model_params.dup
1313

14-
# max_tokens is deprecated and is not functional on reasoning models
15-
max_tokens = model_params.delete(:max_tokens)
16-
model_params[:max_completion_tokens] = max_tokens if max_tokens
14+
# max_tokens is deprecated however we still need to support it
15+
# on older OpenAI models and older Azure models, so we will only normalize
16+
# if our model name starts with o (to denote all the reasoning models)
17+
if llm_model.name.starts_with?("o")
18+
max_tokens = model_params.delete(:max_tokens)
19+
model_params[:max_completion_tokens] = max_tokens if max_tokens
20+
end
1721

1822
# temperature is already supported
1923
if model_params[:stop_sequences]

lib/completions/llm.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ def presets
2727
id: "anthropic",
2828
models: [
2929
{
30-
name: "claude-3-5-sonnet",
30+
name: "claude-3-7-sonnet",
3131
tokens: 200_000,
32-
display_name: "Claude 3.5 Sonnet",
32+
display_name: "Claude 3.7 Sonnet",
3333
},
3434
{ name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
3535
{ name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },

spec/lib/completions/endpoints/anthropic_spec.rb

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,68 @@
334334
expect(requested_body).to eq(request_body)
335335
end
336336

337+
it "can support reasoning" do
338+
body = <<~STRING
339+
{
340+
"content": [
341+
{
342+
"text": "Hello!",
343+
"type": "text"
344+
}
345+
],
346+
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
347+
"model": "claude-3-opus-20240229",
348+
"role": "assistant",
349+
"stop_reason": "end_turn",
350+
"stop_sequence": null,
351+
"type": "message",
352+
"usage": {
353+
"input_tokens": 10,
354+
"output_tokens": 25
355+
}
356+
}
357+
STRING
358+
359+
parsed_body = nil
360+
stub_request(:post, url).with(
361+
body:
362+
proc do |req_body|
363+
parsed_body = JSON.parse(req_body, symbolize_names: true)
364+
true
365+
end,
366+
headers: {
367+
"Content-Type" => "application/json",
368+
"X-Api-Key" => "123",
369+
"Anthropic-Version" => "2023-06-01",
370+
},
371+
).to_return(status: 200, body: body)
372+
373+
model.provider_params["enable_reasoning"] = true
374+
model.provider_params["reasoning_tokens"] = 10_000
375+
model.save!
376+
377+
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
378+
result = proxy.generate(prompt, user: Discourse.system_user)
379+
expect(result).to eq("Hello!")
380+
381+
expected_body = {
382+
model: "claude-3-opus-20240229",
383+
max_tokens: 40_000,
384+
thinking: {
385+
type: "enabled",
386+
budget_tokens: 10_000,
387+
},
388+
messages: [{ role: "user", content: "user1: hello" }],
389+
system: "You are hello bot",
390+
}
391+
expect(parsed_body).to eq(expected_body)
392+
393+
log = AiApiAuditLog.order(:id).last
394+
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
395+
expect(log.request_tokens).to eq(10)
396+
expect(log.response_tokens).to eq(25)
397+
end
398+
337399
it "can operate in regular mode" do
338400
body = <<~STRING
339401
{

spec/lib/completions/endpoints/aws_bedrock_spec.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,57 @@ def encode_message(message)
335335
expect(log.response_tokens).to eq(20)
336336
end
337337

338+
it "supports thinking" do
339+
model.provider_params["enable_reasoning"] = true
340+
model.provider_params["reasoning_tokens"] = 10_000
341+
model.save!
342+
343+
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
344+
345+
request = nil
346+
347+
content = {
348+
content: [text: "hello sam"],
349+
usage: {
350+
input_tokens: 10,
351+
output_tokens: 20,
352+
},
353+
}.to_json
354+
355+
stub_request(
356+
:post,
357+
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
358+
)
359+
.with do |inner_request|
360+
request = inner_request
361+
true
362+
end
363+
.to_return(status: 200, body: content)
364+
365+
response = proxy.generate("hello world", user: user)
366+
367+
expect(request.headers["Authorization"]).to be_present
368+
expect(request.headers["X-Amz-Content-Sha256"]).to be_present
369+
370+
expected = {
371+
"max_tokens" => 40_000,
372+
"thinking" => {
373+
"type" => "enabled",
374+
"budget_tokens" => 10_000,
375+
},
376+
"anthropic_version" => "bedrock-2023-05-31",
377+
"messages" => [{ "role" => "user", "content" => "hello world" }],
378+
"system" => "You are a helpful bot",
379+
}
380+
expect(JSON.parse(request.body)).to eq(expected)
381+
382+
expect(response).to eq("hello sam")
383+
384+
log = AiApiAuditLog.order(:id).last
385+
expect(log.request_tokens).to eq(10)
386+
expect(log.response_tokens).to eq(20)
387+
end
388+
338389
it "supports claude 3 streaming" do
339390
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
340391

spec/lib/completions/endpoints/open_ai_spec.rb

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,23 @@ def request_body(prompt, stream: false, tool_call: false)
285285
end
286286
end
287287

288+
describe "max tokens remapping" do
289+
it "remaps max_tokens to max_completion_tokens for reasoning models" do
290+
model.update!(name: "o3-mini")
291+
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
292+
293+
body_parsed = nil
294+
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
295+
body: ->(body) { body_parsed = JSON.parse(body) },
296+
).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)
297+
298+
llm.generate("test", user: user, max_tokens: 1000)
299+
300+
expect(body_parsed["max_completion_tokens"]).to eq(1000)
301+
expect(body_parsed["max_tokens"]).to be_nil
302+
end
303+
end
304+
288305
describe "forced tool use" do
289306
it "can properly force tool use" do
290307
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
@@ -346,9 +363,11 @@ def request_body(prompt, stream: false, tool_call: false)
346363
body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
347364
).to_return(body: response)
348365

349-
result = llm.generate(prompt, user: user)
366+
result = llm.generate(prompt, user: user, max_tokens: 1000)
350367

351368
expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
369+
# we expect this not to be remapped on older non reasoning models
370+
expect(body_json[:max_tokens]).to eq(1000)
352371

353372
log = AiApiAuditLog.order(:id).last
354373
expect(log.request_tokens).to eq(55)

0 commit comments

Comments
 (0)