Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions app/models/llm_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ def self.provider_params
access_key_id: :text,
region: :text,
disable_native_tools: :checkbox,
enable_reasoning: :checkbox,
reasoning_tokens: :number,
},
anthropic: {
disable_native_tools: :checkbox,
enable_reasoning: :checkbox,
reasoning_tokens: :number,
},
open_ai: {
organization: :text,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ export default class AiLlmEditorForm extends Component {
provider: model.provider,
enabled_chat_bot: model.enabled_chat_bot,
vision_enabled: model.vision_enabled,
provider_params: this.computeProviderParams(model.provider),
provider_params: this.computeProviderParams(
model.provider,
model.provider_params
),
llm_quotas: model.llm_quotas,
};
}
Expand Down Expand Up @@ -128,12 +131,12 @@ export default class AiLlmEditorForm extends Component {
return !this.args.model.isNew;
}

computeProviderParams(provider) {
computeProviderParams(provider, currentParams = {}) {
const params = this.args.llms.resultSetMeta.provider_params[provider] ?? {};
return Object.fromEntries(
Object.entries(params).map(([k, v]) => [
k,
v?.type === "enum" ? v.default : null,
currentParams[k] ?? (v?.type === "enum" ? v.default : null),
])
);
}
Expand Down
4 changes: 3 additions & 1 deletion config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ en:

model_description:
none: "General settings that work for most language models"
anthropic-claude-3-5-sonnet: "Anthropic's most intelligent model"
anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
anthropic-claude-3-5-haiku: "Fast and cost-effective"
anthropic-claude-3-opus: "Excels at writing and complex tasks"
google-gemini-1-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
Expand Down Expand Up @@ -459,6 +459,8 @@ en:
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
reasoning_tokens: "Number of tokens used for reasoning"

related_topics:
title: "Related topics"
Expand Down
9 changes: 9 additions & 0 deletions lib/completions/endpoints/anthropic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ def default_options(dialect)

options = { model: mapped_model, max_tokens: max_tokens }

if llm_model.lookup_custom_param("enable_reasoning")
reasoning_tokens =
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)

# this allows for lots of tokens beyond reasoning
options[:max_tokens] = reasoning_tokens + 30_000
options[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
end

options[:stop_sequences] = ["</function_calls>"] if !dialect.native_tool_support? &&
dialect.prompt.has_tools?

Expand Down
15 changes: 14 additions & 1 deletion lib/completions/endpoints/aws_bedrock.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,18 @@ def default_options(dialect)
max_tokens = 4096
max_tokens = 8192 if bedrock_model_id.match?(/3.5/)

{ max_tokens: max_tokens, anthropic_version: "bedrock-2023-05-31" }
result = { anthropic_version: "bedrock-2023-05-31" }
if llm_model.lookup_custom_param("enable_reasoning")
reasoning_tokens =
llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)

# this allows for ample tokens beyond reasoning
max_tokens = reasoning_tokens + 30_000
result[:thinking] = { type: "enabled", budget_tokens: reasoning_tokens }
end
result[:max_tokens] = max_tokens

result
else
{}
end
Expand Down Expand Up @@ -66,6 +77,8 @@ def bedrock_model_id
"anthropic.claude-3-5-sonnet-20241022-v2:0"
when "claude-3-5-haiku"
"anthropic.claude-3-5-haiku-20241022-v1:0"
when "claude-3-7-sonnet"
"anthropic.claude-3-7-sonnet-20250219-v1:0"
else
llm_model.name
end
Expand Down
10 changes: 7 additions & 3 deletions lib/completions/endpoints/open_ai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ def self.can_contact?(model_provider)
def normalize_model_params(model_params)
model_params = model_params.dup

# max_tokens is deprecated and is not functional on reasoning models
max_tokens = model_params.delete(:max_tokens)
model_params[:max_completion_tokens] = max_tokens if max_tokens
# max_tokens is deprecated however we still need to support it
# on older OpenAI models and older Azure models, so we will only normalize
# if our model name starts with o (to denote all the reasoning models)
if llm_model.name.starts_with?("o")
max_tokens = model_params.delete(:max_tokens)
model_params[:max_completion_tokens] = max_tokens if max_tokens
end

# temperature is already supported
if model_params[:stop_sequences]
Expand Down
4 changes: 2 additions & 2 deletions lib/completions/llm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def presets
id: "anthropic",
models: [
{
name: "claude-3-5-sonnet",
name: "claude-3-7-sonnet",
tokens: 200_000,
display_name: "Claude 3.5 Sonnet",
display_name: "Claude 3.7 Sonnet",
},
{ name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
{ name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
Expand Down
62 changes: 62 additions & 0 deletions spec/lib/completions/endpoints/anthropic_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,68 @@
expect(requested_body).to eq(request_body)
end

it "can support reasoning" do
body = <<~STRING
{
"content": [
{
"text": "Hello!",
"type": "text"
}
],
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
"model": "claude-3-opus-20240229",
"role": "assistant",
"stop_reason": "end_turn",
"stop_sequence": null,
"type": "message",
"usage": {
"input_tokens": 10,
"output_tokens": 25
}
}
STRING

parsed_body = nil
stub_request(:post, url).with(
body:
proc do |req_body|
parsed_body = JSON.parse(req_body, symbolize_names: true)
true
end,
headers: {
"Content-Type" => "application/json",
"X-Api-Key" => "123",
"Anthropic-Version" => "2023-06-01",
},
).to_return(status: 200, body: body)

model.provider_params["enable_reasoning"] = true
model.provider_params["reasoning_tokens"] = 10_000
model.save!

proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
result = proxy.generate(prompt, user: Discourse.system_user)
expect(result).to eq("Hello!")

expected_body = {
model: "claude-3-opus-20240229",
max_tokens: 40_000,
thinking: {
type: "enabled",
budget_tokens: 10_000,
},
messages: [{ role: "user", content: "user1: hello" }],
system: "You are hello bot",
}
expect(parsed_body).to eq(expected_body)

log = AiApiAuditLog.order(:id).last
expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(25)
end

it "can operate in regular mode" do
body = <<~STRING
{
Expand Down
51 changes: 51 additions & 0 deletions spec/lib/completions/endpoints/aws_bedrock_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,57 @@ def encode_message(message)
expect(log.response_tokens).to eq(20)
end

it "supports thinking" do
model.provider_params["enable_reasoning"] = true
model.provider_params["reasoning_tokens"] = 10_000
model.save!

proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")

request = nil

content = {
content: [text: "hello sam"],
usage: {
input_tokens: 10,
output_tokens: 20,
},
}.to_json

stub_request(
:post,
"https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke",
)
.with do |inner_request|
request = inner_request
true
end
.to_return(status: 200, body: content)

response = proxy.generate("hello world", user: user)

expect(request.headers["Authorization"]).to be_present
expect(request.headers["X-Amz-Content-Sha256"]).to be_present

expected = {
"max_tokens" => 40_000,
"thinking" => {
"type" => "enabled",
"budget_tokens" => 10_000,
},
"anthropic_version" => "bedrock-2023-05-31",
"messages" => [{ "role" => "user", "content" => "hello world" }],
"system" => "You are a helpful bot",
}
expect(JSON.parse(request.body)).to eq(expected)

expect(response).to eq("hello sam")

log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(10)
expect(log.response_tokens).to eq(20)
end

it "supports claude 3 streaming" do
proxy = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")

Expand Down
21 changes: 20 additions & 1 deletion spec/lib/completions/endpoints/open_ai_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,23 @@ def request_body(prompt, stream: false, tool_call: false)
end
end

describe "max tokens remapping" do
it "remaps max_tokens to max_completion_tokens for reasoning models" do
model.update!(name: "o3-mini")
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")

body_parsed = nil
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
body: ->(body) { body_parsed = JSON.parse(body) },
).to_return(status: 200, body: { choices: [{ message: { content: "hello" } }] }.to_json)

llm.generate("test", user: user, max_tokens: 1000)

expect(body_parsed["max_completion_tokens"]).to eq(1000)
expect(body_parsed["max_tokens"]).to be_nil
end
end

describe "forced tool use" do
it "can properly force tool use" do
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
Expand Down Expand Up @@ -346,9 +363,11 @@ def request_body(prompt, stream: false, tool_call: false)
body: proc { |body| body_json = JSON.parse(body, symbolize_names: true) },
).to_return(body: response)

result = llm.generate(prompt, user: user)
result = llm.generate(prompt, user: user, max_tokens: 1000)

expect(body_json[:tool_choice]).to eq({ type: "function", function: { name: "echo" } })
# we expect this not to be remapped on older non reasoning models
expect(body_json[:max_tokens]).to eq(1000)

log = AiApiAuditLog.order(:id).last
expect(log.request_tokens).to eq(55)
Expand Down
10 changes: 6 additions & 4 deletions spec/system/llms/ai_llm_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@

context "when changing the provider" do
it "has the correct provider params when visiting the edit page" do
llm = Fabricate(:llm_model, provider: "open_ai", provider_params: {})
llm =
Fabricate(:llm_model, provider: "anthropic", provider_params: { enable_reasoning: true })
visit "/admin/plugins/discourse-ai/ai-llms/#{llm.id}/edit"

expect(form).to have_field_with_name("provider_params.organization")
expect(form).to have_field_with_name("provider_params.disable_native_tools")
expect(form).to have_field_with_name("provider_params.disable_streaming")
expect(form).to have_field_with_name("provider_params.reasoning_effort")
expect(form).to have_field_with_name("provider_params.reasoning_tokens")

reasoning = form.field("provider_params.enable_reasoning")
expect(reasoning).to be_checked
end
it "correctly changes the provider params" do
visit "/admin/plugins/discourse-ai/ai-llms"
Expand Down