Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/models/llm_model.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def self.provider_params
},
google: {
disable_native_tools: :checkbox,
enable_thinking: :checkbox,
thinking_tokens: :number,
},
azure: {
disable_native_tools: :checkbox,
Expand Down
2 changes: 2 additions & 0 deletions config/locales/client.en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,8 @@ en:
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
reasoning_effort: "Reasoning effort (only applicable to reasoning models)"
enable_reasoning: "Enable reasoning (only applicable to Sonnet 3.7)"
enable_thinking: "Enable thinking (only on applicable models eg: flash 2.5)"
thinking_tokens: "Number of tokens used for thinking"
reasoning_tokens: "Number of tokens used for reasoning"
disable_temperature: "Disable temperature (some thinking models don't support temperature)"
disable_top_p: "Disable top P (some thinking models don't support top P)"
Expand Down
6 changes: 6 additions & 0 deletions lib/completions/endpoints/gemini.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ def prepare_payload(prompt, model_params, dialect)
end
end

if llm_model.lookup_custom_param("enable_thinking")
thinking_tokens = llm_model.lookup_custom_param("thinking_tokens").to_i
thinking_tokens = thinking_tokens.clamp(0, 24_576)
payload[:generationConfig][:thinkingConfig] = { thinkingBudget: thinking_tokens }
end

payload
end

Expand Down
78 changes: 78 additions & 0 deletions spec/lib/completions/endpoints/gemini_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,84 @@ def tool_response
}
end

it "correctly configures thinking when enabled" do
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "10000" })

response = gemini_mock.response("Using thinking mode").to_json

req_body = nil

llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
url = "#{model.url}:generateContent?key=123"

stub_request(:post, url).with(
body:
proc do |_req_body|
req_body = _req_body
true
end,
).to_return(status: 200, body: response)

response = llm.generate("Hello", user: user)

parsed = JSON.parse(req_body, symbolize_names: true)

# Verify thinking config is properly set with the token limit
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 10_000 })
end

it "clamps thinking tokens within allowed limits" do
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "30000" })

response = gemini_mock.response("Thinking tokens clamped").to_json

req_body = nil

llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
url = "#{model.url}:generateContent?key=123"

stub_request(:post, url).with(
body:
proc do |_req_body|
req_body = _req_body
true
end,
).to_return(status: 200, body: response)

response = llm.generate("Hello", user: user)

parsed = JSON.parse(req_body, symbolize_names: true)

# Verify thinking tokens are clamped to 24_576
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 24_576 })
end

it "does not add thinking config when disabled" do
model.update!(provider_params: { enable_thinking: false, thinking_tokens: "10000" })

response = gemini_mock.response("No thinking mode").to_json

req_body = nil

llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
url = "#{model.url}:generateContent?key=123"

stub_request(:post, url).with(
body:
proc do |_req_body|
req_body = _req_body
true
end,
).to_return(status: 200, body: response)

response = llm.generate("Hello", user: user)

parsed = JSON.parse(req_body, symbolize_names: true)

# Verify thinking config is not present
expect(parsed.dig(:generationConfig, :thinkingConfig)).to be_nil
end

# by default gemini is meant to use AUTO mode, however new experimental models
# appear to require this to be explicitly set
it "Explicitly specifies tool config" do
Expand Down
Loading