FIX: use max_completion_tokens for open ai models

SamSaffron · SamSaffron · commit 1016adc2f314 · 2025-02-19T15:40:07.000+11:00
max_tokens is now deprecated per API
diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb
@@ -11,7 +11,11 @@ def self.can_contact?(model_provider)
         def normalize_model_params(model_params)
           model_params = model_params.dup
 
-          # max_tokens, temperature are already supported
+          # max_tokens is deprecated and is not functional on reasoning models
+          max_tokens = model_params.delete(:max_tokens)
+          model_params[:max_completion_tokens] = max_tokens if max_tokens
+
+          # temperature is already supported
           if model_params[:stop_sequences]
             model_params[:stop] = model_params.delete(:stop_sequences)
           end
diff --git a/lib/completions/endpoints/open_router.rb b/lib/completions/endpoints/open_router.rb
@@ -8,6 +8,17 @@ def self.can_contact?(model_provider)
           %w[open_router].include?(model_provider)
         end
 
+        def normalize_model_params(model_params)
+          model_params = model_params.dup
+
+          # max_tokens, temperature are already supported
+          if model_params[:stop_sequences]
+            model_params[:stop] = model_params.delete(:stop_sequences)
+          end
+
+          model_params
+        end
+
         def prepare_request(payload)
           headers = { "Content-Type" => "application/json" }
           api_key = llm_model.api_key
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -171,6 +171,40 @@ def request_body(prompt, stream: false, tool_call: false)
     UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
   end
 
+  describe "max tokens for reasoning models" do
+    it "uses max_completion_tokens for reasoning models" do
+      model.update!(name: "o3-mini")
+      llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+      prompt =
+        DiscourseAi::Completions::Prompt.new(
+          "You are a bot",
+          messages: [type: :user, content: "hello"],
+        )
+
+      response_text = <<~RESPONSE
+        data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"finish_reason":null}],"usage":null}
+
+        data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{"content":"hello"},"finish_reason":null}],"usage":null}
+
+        data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":null}
+
+        data: {"id":"chatcmpl-B2VwlY6KzSDtHvg8pN1VAfRhhLFgn","object":"chat.completion.chunk","created":1739939159,"model":"o3-mini-2025-01-31","service_tier":"default","system_fingerprint":"fp_ef58bd3122","choices":[],"usage":{"prompt_tokens":22,"completion_tokens":203,"total_tokens":225,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":192,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}
+
+        data: [DONE]
+      RESPONSE
+
+      body_parsed = nil
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+        body: ->(body) { body_parsed = JSON.parse(body) },
+      ).to_return(body: response_text)
+      result = +""
+      llm.generate(prompt, user: user, max_tokens: 1000) { |chunk| result << chunk }
+
+      expect(result).to eq("hello")
+      expect(body_parsed["max_completion_tokens"]).to eq(1000)
+    end
+  end
+
   describe "repeat calls" do
     it "can properly reset context" do
       llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")