Skip to content
This repository was archived by the owner on Jul 22, 2025. It is now read-only.

Commit 1016adc

Browse files
committed
FIX: use max_completion_tokens for open ai models
max_tokens is now deprecated per API
1 parent 02f0908 commit 1016adc

File tree

3 files changed

+50
-1
lines changed

3 files changed

+50
-1
lines changed

lib/completions/endpoints/open_ai.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ def self.can_contact?(model_provider)
1111
def normalize_model_params(model_params)
1212
model_params = model_params.dup
1313

14-
# max_tokens, temperature are already supported
14+
# max_tokens is deprecated and is not functional on reasoning models
15+
max_tokens = model_params.delete(:max_tokens)
16+
model_params[:max_completion_tokens] = max_tokens if max_tokens
17+
18+
# temperature is already supported
1519
if model_params[:stop_sequences]
1620
model_params[:stop] = model_params.delete(:stop_sequences)
1721
end

lib/completions/endpoints/open_router.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ def self.can_contact?(model_provider)
88
%w[open_router].include?(model_provider)
99
end
1010

11+
def normalize_model_params(model_params)
12+
model_params = model_params.dup
13+
14+
# max_tokens, temperature are already supported
15+
if model_params[:stop_sequences]
16+
model_params[:stop] = model_params.delete(:stop_sequences)
17+
end
18+
19+
model_params
20+
end
21+
1122
def prepare_request(payload)
1223
headers = { "Content-Type" => "application/json" }
1324
api_key = llm_model.api_key

spec/lib/completions/endpoints/open_ai_spec.rb

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,40 @@ def request_body(prompt, stream: false, tool_call: false)
171171
UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
172172
end
173173

174+
describe "max tokens for reasoning models" do
175+
it "uses max_completion_tokens for reasoning models" do
176+
model.update!(name: "o3-mini")
177+
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
178+
prompt =
179+
DiscourseAi::Completions::Prompt.new(
180+
"You are a bot",
181+
messages: [type: :user, content: "hello"],
182+
)
183+
184+
response_text = <<~RESPONSE
185+
data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null}
186+
187+
data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{"content":"hello"},"finish_reason":null}],"usage":null}
188+
189+
data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null}
190+
191+
data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[],"usage":{"prompt_tokens":22,"completion_tokens":203,"total_tokens":225,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":192,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
192+
193+
data: [DONE]
194+
RESPONSE
195+
196+
body_parsed = nil
197+
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
198+
body: ->(body) { body_parsed = JSON.parse(body) },
199+
).to_return(body: response_text)
200+
result = +""
201+
llm.generate(prompt, user: user, max_tokens: 1000) { |chunk| result << chunk }
202+
203+
expect(result).to eq("hello")
204+
expect(body_parsed["max_completion_tokens"]).to eq(1000)
205+
end
206+
end
207+
174208
describe "repeat calls" do
175209
it "can properly reset context" do
176210
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")

0 commit comments

Comments
 (0)