From 791ab199b8f9ab8358e47a65c11ebf31e086145f Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Tue, 25 Feb 2025 16:54:23 +1100 Subject: [PATCH 1/2] FEATURE: full support for Sonnet 3.7 - Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic - Fixes regression where provider params were not populated Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536 --- app/models/llm_model.rb | 4 ++ .../components/ai-llm-editor-form.gjs | 9 ++- config/locales/client.en.yml | 4 +- lib/completions/endpoints/anthropic.rb | 13 ++++ lib/completions/endpoints/aws_bedrock.rb | 19 +++++- lib/completions/llm.rb | 4 +- .../completions/endpoints/anthropic_spec.rb | 62 +++++++++++++++++++ .../completions/endpoints/aws_bedrock_spec.rb | 51 +++++++++++++++ spec/system/llms/ai_llm_spec.rb | 10 +-- 9 files changed, 165 insertions(+), 11 deletions(-) diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index a5f5e5107..084091e88 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -26,9 +26,13 @@ def self.provider_params access_key_id: :text, region: :text, disable_native_tools: :checkbox, + enable_reasoning: :checkbox, + reasoning_tokens: :number, }, anthropic: { disable_native_tools: :checkbox, + enable_reasoning: :checkbox, + reasoning_tokens: :number, }, open_ai: { organization: :text, diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs index 6ba6cd269..d3c02df3b 100644 --- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs +++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs @@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component { provider: model.provider, enabled_chat_bot: model.enabled_chat_bot, vision_enabled: model.vision_enabled, - provider_params: this.computeProviderParams(model.provider), + provider_params: this.computeProviderParams( + model.provider, + model.provider_params + ), llm_quotas: model.llm_quotas, }; } @@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component { return !this.args.model.isNew; } - computeProviderParams(provider) { + computeProviderParams(provider, currentParams = {}) { const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {}; return Object.fromEntries( Object.entries(params).map(([k, v]) => [ k, - v?.type === "enum" ? v.default : null, + currentParams[k] ?? (v?.type === "enum" ? v.default : null), ]) ); } diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index eb4675235..208066797 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -390,7 +390,7 @@ en: model_description: none: "General settings that work for most language models" - anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model" + anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model" anthropic-claude-3-5-haiku: "Fast and cost-effective" anthropic-claude-3-opus: "Excels at writing and complex tasks" google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks" @@ -459,6 +459,8 @@ en: provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)" disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)" reasoning_effort: "Reasoning effort (only applicable to reasoning models)" + enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)" + reasoning_tokens: "Number of tokens used for reasoning" related_topics: title: "Related topics" diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb index ffbdb024d..c4620ff43 100644 --- a/lib/completions/endpoints/anthropic.rb +++ b/lib/completions/endpoints/anthropic.rb @@ -38,6 +38,19 @@ def default_options(dialect) options = { model: mapped_model, max_tokens: max_tokens } + if llm_model.lookup_custom_param("enable_reasoning") + reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i + if reasoning_tokens < 100 + reasoning_tokens = 100 + elsif reasoning_tokens > 65_536 + reasoning_tokens = 65_536 + end + + # this allows for lots of tokens beyond reasoning + options[:max_tokens] = reasoning_tokens + 30_000 + options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens } + end + options[:stop_sequences] = [""] if !dialect.native_tool_support? && dialect.prompt.has_tools? diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index be1abb2c0..894206b76 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -26,7 +26,22 @@ def default_options(dialect) max_tokens = 4096 max_tokens = 8192 if bedrock_model_id.match?(/3.5/) - { max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" } + result = { anthropic_version: "bedrock-2023-05-31" } + if llm_model.lookup_custom_param("enable_reasoning") + reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i + if reasoning_tokens < 100 + reasoning_tokens = 100 + elsif reasoning_tokens > 65_536 + reasoning_tokens = 65_536 + end + + # this allows for ample tokens beyond reasoning + max_tokens = reasoning_tokens + 30_000 + result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens } + end + result[:max_tokens] = max_tokens + + result else {} end @@ -66,6 +81,8 @@ def bedrock_model_id "anthropic.claude-3-5-sonnet-20241022-v2:0" when "claude-3-5-haiku" "anthropic.claude-3-5-haiku-20241022-v1:0" + when "claude-3-7-sonnet" + "anthropic.claude-3-7-sonnet-20250219-v1:0" else llm_model.name end diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb index 1db8f0907..540aae08c 100644 --- a/lib/completions/llm.rb +++ b/lib/completions/llm.rb @@ -27,9 +27,9 @@ def presets id: "anthropic", models: [ { - name: "claude-3-5-sonnet", + name: "claude-3-7-sonnet", tokens: 200_000, - display_name: "Claude 3.5 Sonnet", + display_name: "Claude 3.7 Sonnet", }, { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" }, { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" }, diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb index 72ba2422d..b43c76254 100644 --- a/spec/lib/completions/endpoints/anthropic_spec.rb +++ b/spec/lib/completions/endpoints/anthropic_spec.rb @@ -334,6 +334,68 @@ expect(requested_body).to eq(request_body) end + it "can support reasoning" do + body = <<~STRING + { + "content": [ + { + "text": "Hello!", + "type": "text" + } + ], + "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF", + "model": "claude-3-opus-20240229", + "role": "assistant", + "stop_reason": "end_turn", + "stop_sequence": null, + "type": "message", + "usage": { + "input_tokens": 10, + "output_tokens": 25 + } + } + STRING + + parsed_body = nil + stub_request(:post, url).with( + body: + proc do |req_body| + parsed_body = JSON.parse(req_body, symbolize_names: true) + true + end, + headers: { + "Content-Type" => "application/json", + "X-Api-Key" => "123", + "Anthropic-Version" => "2023-06-01", + }, + ).to_return(status: 200, body: body) + + model.provider_params["enable_reasoning"] = true + model.provider_params["reasoning_tokens"] = 10_000 + model.save! + + proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + result = proxy.generate(prompt, user: Discourse.system_user) + expect(result).to eq("Hello!") + + expected_body = { + model: "claude-3-opus-20240229", + max_tokens: 40_000, + thinking: { + type: "enabled", + budget_tokens: 10_000, + }, + messages: [{ role: "user", content: "user1: hello" }], + system: "You are hello bot", + } + expect(parsed_body).to eq(expected_body) + + log = AiApiAuditLog.order(:id).last + expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic) + expect(log.request_tokens).to eq(10) + expect(log.response_tokens).to eq(25) + end + it "can operate in regular mode" do body = <<~STRING { diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb index f5329d3db..ebe8094b1 100644 --- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb +++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb @@ -335,6 +335,57 @@ def encode_message(message) expect(log.response_tokens).to eq(20) end + it "supports thinking" do + model.provider_params["enable_reasoning"] = true + model.provider_params["reasoning_tokens"] = 10_000 + model.save! + + proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + + request = nil + + content = { + content: [text: "hello sam"], + usage: { + input_tokens: 10, + output_tokens: 20, + }, + }.to_json + + stub_request( + :post, + "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke", + ) + .with do |inner_request| + request = inner_request + true + end + .to_return(status: 200, body: content) + + response = proxy.generate("hello world", user: user) + + expect(request.headers["Authorization"]).to be_present + expect(request.headers["X-Amz-Content-Sha256"]).to be_present + + expected = { + "max_tokens" => 40_000, + "thinking" => { + "type" => "enabled", + "budget_tokens" => 10_000, + }, + "anthropic_version" => "bedrock-2023-05-31", + "messages" => [{ "role" => "user", "content" => "hello world" }], + "system" => "You are a helpful bot", + } + expect(JSON.parse(request.body)).to eq(expected) + + expect(response).to eq("hello sam") + + log = AiApiAuditLog.order(:id).last + expect(log.request_tokens).to eq(10) + expect(log.response_tokens).to eq(20) + end + it "supports claude 3 streaming" do proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") diff --git a/spec/system/llms/ai_llm_spec.rb b/spec/system/llms/ai_llm_spec.rb index 7431821f3..f6a8982d4 100644 --- a/spec/system/llms/ai_llm_spec.rb +++ b/spec/system/llms/ai_llm_spec.rb @@ -73,13 +73,15 @@ context "when changing the provider" do it "has the correct provider params when visiting the edit page" do - llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {}) + llm = + Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true }) visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit" - expect(form).to have_field_with_name("provider_params.organization") expect(form).to have_field_with_name("provider_params.disable_native_tools") - expect(form).to have_field_with_name("provider_params.disable_streaming") - expect(form).to have_field_with_name("provider_params.reasoning_effort") + expect(form).to have_field_with_name("provider_params.reasoning_tokens") + + reasoning = form.field("provider_params.enable_reasoning") + expect(reasoning).to be_checked end it "correctly changes the provider params" do visit "/admin/plugins/discourse-ai/ai-llms" From c17541bbd1ab6356fb0a5af627833ccdcb2363a6 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Tue, 25 Feb 2025 17:18:20 +1100 Subject: [PATCH 2/2] FIX: open ai non reasoning models need to use deprecate max_tokens --- lib/completions/endpoints/anthropic.rb | 8 ++----- lib/completions/endpoints/aws_bedrock.rb | 8 ++----- lib/completions/endpoints/open_ai.rb | 10 ++++++--- .../lib/completions/endpoints/open_ai_spec.rb | 21 ++++++++++++++++++- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb index c4620ff43..ed950d31f 100644 --- a/lib/completions/endpoints/anthropic.rb +++ b/lib/completions/endpoints/anthropic.rb @@ -39,12 +39,8 @@ def default_options(dialect) options = { model: mapped_model, max_tokens: max_tokens } if llm_model.lookup_custom_param("enable_reasoning") - reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i - if reasoning_tokens < 100 - reasoning_tokens = 100 - elsif reasoning_tokens > 65_536 - reasoning_tokens = 65_536 - end + reasoning_tokens = + llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536) # this allows for lots of tokens beyond reasoning options[:max_tokens] = reasoning_tokens + 30_000 diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb index 894206b76..75ed12cfe 100644 --- a/lib/completions/endpoints/aws_bedrock.rb +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -28,12 +28,8 @@ def default_options(dialect) result = { anthropic_version: "bedrock-2023-05-31" } if llm_model.lookup_custom_param("enable_reasoning") - reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i - if reasoning_tokens < 100 - reasoning_tokens = 100 - elsif reasoning_tokens > 65_536 - reasoning_tokens = 65_536 - end + reasoning_tokens = + llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536) # this allows for ample tokens beyond reasoning max_tokens = reasoning_tokens + 30_000 diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb index 17bb260fe..f2bbd7977 100644 --- a/lib/completions/endpoints/open_ai.rb +++ b/lib/completions/endpoints/open_ai.rb @@ -11,9 +11,13 @@ def self.can_contact?(model_provider) def normalize_model_params(model_params) model_params = model_params.dup - # max_tokens is deprecated and is not functional on reasoning models - max_tokens = model_params.delete(:max_tokens) - model_params[:max_completion_tokens] = max_tokens if max_tokens + # max_tokens is deprecated however we still need to support it + # on older OpenAI models and older Azure models, so we will only normalize + # if our model name starts with o (to denote all the reasoning models) + if llm_model.name.starts_with?("o") + max_tokens = model_params.delete(:max_tokens) + model_params[:max_completion_tokens] = max_tokens if max_tokens + end # temperature is already supported if model_params[:stop_sequences] diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index fe8b9fe4e..67aabd55f 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -285,6 +285,23 @@ def request_body(prompt, stream: false, tool_call: false) end end + describe "max tokens remapping" do + it "remaps max_tokens to max_completion_tokens for reasoning models" do + model.update!(name: "o3-mini") + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + + body_parsed = nil + stub_request(:post, "https://api.openai.com/v1/chat/completions").with( + body: ->(body) { body_parsed = JSON.parse(body) }, + ).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json) + + llm.generate("test", user: user, max_tokens: 1000) + + expect(body_parsed["max_completion_tokens"]).to eq(1000) + expect(body_parsed["max_tokens"]).to be_nil + end + end + describe "forced tool use" do it "can properly force tool use" do llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") @@ -346,9 +363,11 @@ def request_body(prompt, stream: false, tool_call: false) body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) }, ).to_return(body: response) - result = llm.generate(prompt, user: user) + result = llm.generate(prompt, user: user, max_tokens: 1000) expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } }) + # we expect this not to be remapped on older non reasoning models + expect(body_json[:max_tokens]).to eq(1000) log = AiApiAuditLog.order(:id).last expect(log.request_tokens).to eq(55)