From 791ab199b8f9ab8358e47a65c11ebf31e086145f Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Tue, 25 Feb 2025 16:54:23 +1100
Subject: [PATCH 1/2] FEATURE: full support for Sonnet 3.7

- Adds support for Sonnet 3.7 with reasoning on bedrock and anthropic
- Fixes regression where provider params were not populated

Note. reasoning tokens are hardcoded to minimum of 100 maximum of 65536
---
 app/models/llm_model.rb                       |  4 ++
 .../components/ai-llm-editor-form.gjs         |  9 ++-
 config/locales/client.en.yml                  |  4 +-
 lib/completions/endpoints/anthropic.rb        | 13 ++++
 lib/completions/endpoints/aws_bedrock.rb      | 19 +++++-
 lib/completions/llm.rb                        |  4 +-
 .../completions/endpoints/anthropic_spec.rb   | 62 +++++++++++++++++++
 .../completions/endpoints/aws_bedrock_spec.rb | 51 +++++++++++++++
 spec/system/llms/ai_llm_spec.rb               | 10 +--
 9 files changed, 165 insertions(+), 11 deletions(-)

diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
index a5f5e5107..084091e88 100644
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@@ -26,9 +26,13 @@ def self.provider_params
         access_key_id: :text,
         region: :text,
         disable_native_tools: :checkbox,
+        enable_reasoning: :checkbox,
+        reasoning_tokens: :number,
       },
       anthropic: {
         disable_native_tools: :checkbox,
+        enable_reasoning: :checkbox,
+        reasoning_tokens: :number,
       },
       open_ai: {
         organization: :text,
diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
index 6ba6cd269..d3c02df3b 100644
--- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
+++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
@@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
       provider: model.provider,
       enabled_chat_bot: model.enabled_chat_bot,
       vision_enabled: model.vision_enabled,
-      provider_params: this.computeProviderParams(model.provider),
+      provider_params: this.computeProviderParams(
+        model.provider,
+        model.provider_params
+      ),
       llm_quotas: model.llm_quotas,
     };
   }
@@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
     return !this.args.model.isNew;
   }
 
-  computeProviderParams(provider) {
+  computeProviderParams(provider, currentParams = {}) {
     const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
     return Object.fromEntries(
       Object.entries(params).map(([k, v]) => [
         k,
-        v?.type === "enum" ? v.default : null,
+        currentParams[k] ?? (v?.type === "enum" ? v.default : null),
       ])
     );
   }
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index eb4675235..208066797 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -390,7 +390,7 @@ en:
 
         model_description:
           none: "General settings that work for most language models"
-          anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model"
+          anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
           anthropic-claude-3-5-haiku: "Fast and cost-effective"
           anthropic-claude-3-opus: "Excels at writing and complex tasks"
           google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
@@ -459,6 +459,8 @@ en:
           provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
           disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
           reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
+          enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
+          reasoning_tokens: "Number of tokens used for reasoning"
 
       related_topics:
         title: "Related topics"
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index ffbdb024d..c4620ff43 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -38,6 +38,19 @@ def default_options(dialect)
 
           options = { model: mapped_model, max_tokens: max_tokens }
 
+          if llm_model.lookup_custom_param("enable_reasoning")
+            reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i
+            if reasoning_tokens < 100
+              reasoning_tokens = 100
+            elsif reasoning_tokens > 65_536
+              reasoning_tokens = 65_536
+            end
+
+            # this allows for lots of tokens beyond reasoning
+            options[:max_tokens] = reasoning_tokens + 30_000
+            options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
+          end
+
           options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
             dialect.prompt.has_tools?
 
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
index be1abb2c0..894206b76 100644
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@@ -26,7 +26,22 @@ def default_options(dialect)
               max_tokens = 4096
               max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
 
-              { max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
+              result = { anthropic_version: "bedrock-2023-05-31" }
+              if llm_model.lookup_custom_param("enable_reasoning")
+                reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i
+                if reasoning_tokens < 100
+                  reasoning_tokens = 100
+                elsif reasoning_tokens > 65_536
+                  reasoning_tokens = 65_536
+                end
+
+                # this allows for ample tokens beyond reasoning
+                max_tokens = reasoning_tokens + 30_000
+                result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
+              end
+              result[:max_tokens] = max_tokens
+
+              result
             else
               {}
             end
@@ -66,6 +81,8 @@ def bedrock_model_id
             "anthropic.claude-3-5-sonnet-20241022-v2:0"
           when "claude-3-5-haiku"
             "anthropic.claude-3-5-haiku-20241022-v1:0"
+          when "claude-3-7-sonnet"
+            "anthropic.claude-3-7-sonnet-20250219-v1:0"
           else
             llm_model.name
           end
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index 1db8f0907..540aae08c 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -27,9 +27,9 @@ def presets
                   id: "anthropic",
                   models: [
                     {
-                      name: "claude-3-5-sonnet",
+                      name: "claude-3-7-sonnet",
                       tokens: 200_000,
-                      display_name: "Claude 3.5 Sonnet",
+                      display_name: "Claude 3.7 Sonnet",
                     },
                     { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
                     { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
index 72ba2422d..b43c76254 100644
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -334,6 +334,68 @@
     expect(requested_body).to eq(request_body)
   end
 
+  it "can support reasoning" do
+    body = <<~STRING
+      {
+        "content": [
+          {
+            "text": "Hello!",
+            "type": "text"
+          }
+        ],
+        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+        "model": "claude-3-opus-20240229",
+        "role": "assistant",
+        "stop_reason": "end_turn",
+        "stop_sequence": null,
+        "type": "message",
+        "usage": {
+          "input_tokens": 10,
+          "output_tokens": 25
+        }
+      }
+    STRING
+
+    parsed_body = nil
+    stub_request(:post, url).with(
+      body:
+        proc do |req_body|
+          parsed_body = JSON.parse(req_body, symbolize_names: true)
+          true
+        end,
+      headers: {
+        "Content-Type" => "application/json",
+        "X-Api-Key" => "123",
+        "Anthropic-Version" => "2023-06-01",
+      },
+    ).to_return(status: 200, body: body)
+
+    model.provider_params["enable_reasoning"] = true
+    model.provider_params["reasoning_tokens"] = 10_000
+    model.save!
+
+    proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+    result = proxy.generate(prompt, user: Discourse.system_user)
+    expect(result).to eq("Hello!")
+
+    expected_body = {
+      model: "claude-3-opus-20240229",
+      max_tokens: 40_000,
+      thinking: {
+        type: "enabled",
+        budget_tokens: 10_000,
+      },
+      messages: [{ role: "user", content: "user1: hello" }],
+      system: "You are hello bot",
+    }
+    expect(parsed_body).to eq(expected_body)
+
+    log = AiApiAuditLog.order(:id).last
+    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
+    expect(log.request_tokens).to eq(10)
+    expect(log.response_tokens).to eq(25)
+  end
+
   it "can operate in regular mode" do
     body = <<~STRING
       {
diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
index f5329d3db..ebe8094b1 100644
--- a/spec/lib/completions/endpoints/aws_bedrock_spec.rb
+++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb
@@ -335,6 +335,57 @@ def encode_message(message)
       expect(log.response_tokens).to eq(20)
     end
 
+    it "supports thinking" do
+      model.provider_params["enable_reasoning"] = true
+      model.provider_params["reasoning_tokens"] = 10_000
+      model.save!
+
+      proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+
+      request = nil
+
+      content = {
+        content: [text: "hello sam"],
+        usage: {
+          input_tokens: 10,
+          output_tokens: 20,
+        },
+      }.to_json
+
+      stub_request(
+        :post,
+        "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
+      )
+        .with do |inner_request|
+          request = inner_request
+          true
+        end
+        .to_return(status: 200, body: content)
+
+      response = proxy.generate("hello world", user: user)
+
+      expect(request.headers["Authorization"]).to be_present
+      expect(request.headers["X-Amz-Content-Sha256"]).to be_present
+
+      expected = {
+        "max_tokens" => 40_000,
+        "thinking" => {
+          "type" => "enabled",
+          "budget_tokens" => 10_000,
+        },
+        "anthropic_version" => "bedrock-2023-05-31",
+        "messages" => [{ "role" => "user", "content" => "hello world" }],
+        "system" => "You are a helpful bot",
+      }
+      expect(JSON.parse(request.body)).to eq(expected)
+
+      expect(response).to eq("hello sam")
+
+      log = AiApiAuditLog.order(:id).last
+      expect(log.request_tokens).to eq(10)
+      expect(log.response_tokens).to eq(20)
+    end
+
     it "supports claude 3 streaming" do
       proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
 
diff --git a/spec/system/llms/ai_llm_spec.rb b/spec/system/llms/ai_llm_spec.rb
index 7431821f3..f6a8982d4 100644
--- a/spec/system/llms/ai_llm_spec.rb
+++ b/spec/system/llms/ai_llm_spec.rb
@@ -73,13 +73,15 @@
 
   context "when changing the provider" do
     it "has the correct provider params when visiting the edit page" do
-      llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {})
+      llm =
+        Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true })
       visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"
 
-      expect(form).to have_field_with_name("provider_params.organization")
       expect(form).to have_field_with_name("provider_params.disable_native_tools")
-      expect(form).to have_field_with_name("provider_params.disable_streaming")
-      expect(form).to have_field_with_name("provider_params.reasoning_effort")
+      expect(form).to have_field_with_name("provider_params.reasoning_tokens")
+
+      reasoning = form.field("provider_params.enable_reasoning")
+      expect(reasoning).to be_checked
     end
     it "correctly changes the provider params" do
       visit "/admin/plugins/discourse-ai/ai-llms"

From c17541bbd1ab6356fb0a5af627833ccdcb2363a6 Mon Sep 17 00:00:00 2001
From: Sam Saffron <sam.saffron@gmail.com>
Date: Tue, 25 Feb 2025 17:18:20 +1100
Subject: [PATCH 2/2] FIX: open ai non reasoning models need to use deprecate
 max_tokens

---
 lib/completions/endpoints/anthropic.rb        |  8 ++-----
 lib/completions/endpoints/aws_bedrock.rb      |  8 ++-----
 lib/completions/endpoints/open_ai.rb          | 10 ++++++---
 .../lib/completions/endpoints/open_ai_spec.rb | 21 ++++++++++++++++++-
 4 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index c4620ff43..ed950d31f 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -39,12 +39,8 @@ def default_options(dialect)
           options = { model: mapped_model, max_tokens: max_tokens }
 
           if llm_model.lookup_custom_param("enable_reasoning")
-            reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i
-            if reasoning_tokens < 100
-              reasoning_tokens = 100
-            elsif reasoning_tokens > 65_536
-              reasoning_tokens = 65_536
-            end
+            reasoning_tokens =
+              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
 
             # this allows for lots of tokens beyond reasoning
             options[:max_tokens] = reasoning_tokens + 30_000
diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb
index 894206b76..75ed12cfe 100644
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@@ -28,12 +28,8 @@ def default_options(dialect)
 
               result = { anthropic_version: "bedrock-2023-05-31" }
               if llm_model.lookup_custom_param("enable_reasoning")
-                reasoning_tokens = llm_model.lookup_custom_param("reasoning_tokens").to_i
-                if reasoning_tokens < 100
-                  reasoning_tokens = 100
-                elsif reasoning_tokens > 65_536
-                  reasoning_tokens = 65_536
-                end
+                reasoning_tokens =
+                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
 
                 # this allows for ample tokens beyond reasoning
                 max_tokens = reasoning_tokens + 30_000
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
index 17bb260fe..f2bbd7977 100644
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@@ -11,9 +11,13 @@ def self.can_contact?(model_provider)
         def normalize_model_params(model_params)
           model_params = model_params.dup
 
-          # max_tokens is deprecated and is not functional on reasoning models
-          max_tokens = model_params.delete(:max_tokens)
-          model_params[:max_completion_tokens] = max_tokens if max_tokens
+          # max_tokens is deprecated however we still need to support it
+          # on older OpenAI models and older Azure models, so we will only normalize
+          # if our model name starts with o (to denote all the reasoning models)
+          if llm_model.name.starts_with?("o")
+            max_tokens = model_params.delete(:max_tokens)
+            model_params[:max_completion_tokens] = max_tokens if max_tokens
+          end
 
           # temperature is already supported
           if model_params[:stop_sequences]
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
index fe8b9fe4e..67aabd55f 100644
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -285,6 +285,23 @@ def request_body(prompt, stream: false, tool_call: false)
     end
   end
 
+  describe "max tokens remapping" do
+    it "remaps max_tokens to max_completion_tokens for reasoning models" do
+      model.update!(name: "o3-mini")
+      llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+
+      body_parsed = nil
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+        body: ->(body) { body_parsed = JSON.parse(body) },
+      ).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)
+
+      llm.generate("test", user: user, max_tokens: 1000)
+
+      expect(body_parsed["max_completion_tokens"]).to eq(1000)
+      expect(body_parsed["max_tokens"]).to be_nil
+    end
+  end
+
   describe "forced tool use" do
     it "can properly force tool use" do
       llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
@@ -346,9 +363,11 @@ def request_body(prompt, stream: false, tool_call: false)
         body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
       ).to_return(body: response)
 
-      result = llm.generate(prompt, user: user)
+      result = llm.generate(prompt, user: user, max_tokens: 1000)
 
       expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
+      # we expect this not to be remapped on older non reasoning models
+      expect(body_json[:max_tokens]).to eq(1000)
 
       log = AiApiAuditLog.order(:id).last
       expect(log.request_tokens).to eq(55)