diff --git a/CHANGELOG.md b/CHANGELOG.md index 115768cdc..40ce826db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## Unreleased +- Add `version` parameter to `openai-chat` provider config to support different API versions: + - **Version 1**: Uses `max_tokens` parameter (for Mistral AI and older OpenAI-compatible endpoints) + - **Version 2** (default): Uses `max_completion_tokens` parameter (for OpenAI and modern endpoints) + - Fixes Mistral AI 422 errors by allowing configuration of appropriate token parameter + - Supports both provider-level and model-level version configuration with provider taking precedence + - Maintains full backward compatibility by defaulting to version 2 + ## 0.86.0 - Improve agent behavior prompt to mention usage of editor_diagnostics tool. #230 diff --git a/docs/configuration.md b/docs/configuration.md index 786e31979..9d8ef7ec1 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -582,6 +582,7 @@ To configure, add your OTLP collector config via `:otlp` map following [otlp aut completionUrlRelativePath?: string; thinkTagStart?: string; thinkTagEnd?: string; + version?: 1 | 2; // API version for openai-chat providers (1 for Mistral/compatible, 2 for OpenAI/modern, default: 2) models: {[key: string]: { modelName?: string; extraPayload?: {[key: string]: any} diff --git a/docs/models.md b/docs/models.md index 609d6a22c..cc308a673 100644 --- a/docs/models.md +++ b/docs/models.md @@ -146,6 +146,56 @@ Defaults by API type: Only set this when your provider uses a different path or expects query parameters at the endpoint (e.g., Azure API versioning). +#### API Version (for openai-chat providers) + +The `openai-chat` API supports different versions to accommodate various provider requirements: + +- **`version: 1`**: Uses `max_tokens` parameter (for Mistral AI and older OpenAI-compatible endpoints) +- **`version: 2`** (default): Uses `max_completion_tokens` parameter (for OpenAI and modern endpoints) + +Set the version in your provider config to ensure compatibility: + +```javascript title="~/.config/eca/config.json" +{ + "providers": { + "mistral": { + "api": "openai-chat", + "version": 1, + "url": "https://api.mistral.ai", + "key": "${env:MISTRAL_API_KEY}", + "models": { + "labs-devstral-small-2512": { + "extraPayload": { + "max_tokens": 2048 + } + } + } + } + } +} +``` + +For OpenAI and most modern providers, you can omit the version (defaults to 2): + +```javascript title="~/.config/eca/config.json" +{ + "providers": { + "openai": { + "api": "openai-chat", + "url": "https://api.openai.com", + "key": "${env:OPENAI_API_KEY}", + "models": { + "gpt-4o": { + "extraPayload": { + "max_completion_tokens": 2048 + } + } + } + } + } +} +``` + ### Credential File Authentication ECA also supports standard plain-text .netrc file format for reading credentials. diff --git a/integration-test/integration/chat/custom_provider_test.clj b/integration-test/integration/chat/custom_provider_test.clj index 8fae82a31..06bc9efa8 100644 --- a/integration-test/integration/chat/custom_provider_test.clj +++ b/integration-test/integration/chat/custom_provider_test.clj @@ -125,7 +125,53 @@ {:role "user" :content [{:type "input_text" :text "Who's there?"}]} {:role "assistant" :content [{:type "output_text" :text "Foo"}]} {:role "user" :content [{:type "input_text" :text "What foo?"}]}]} - (llm.mocks/get-req-body :simple-text-2))))))) + (llm.mocks/get-req-body :simple-text-2)))))) + +(deftest openai-chat-version-parameter + (eca/start-process!) + + (eca/request! (fixture/initialize-request + {:initializationOptions + (merge fixture/default-init-options + {:defaultModel "mistral/labs-devstral-small-2512" + :providers + {"mistral" + {:api "openai-chat" + :version 1 ;; Test version 1 for Mistral + :url (str "http://localhost:" llm-mock.server/port "/openai-chat") + :key "mistral-key" + :models {"labs-devstral-small-2512" {}}}}}) + :capabilities {:codeAssistant {:chat {}}}})) + + (eca/notify! (fixture/initialized-notification)) + (testing "Mistral provider with version 1 uses max_tokens parameter" + (is (match? + {:chat {:models (m/embeds ["mistral/labs-devstral-small-2512"]) + :selectModel "mistral/labs-devstral-small-2512"}} + (eca/client-awaits-server-notification :config/updated))) + + (let [chat-id* (atom nil)] + (testing "Request body contains max_tokens instead of max_completion_tokens" + (llm.mocks/set-case! :simple-text-0) + (let [resp (eca/request! (fixture/chat-prompt-request + {:model "mistral/labs-devstral-small-2512" + :message "Test Mistral compatibility"})) + chat-id (reset! chat-id* (:chatId resp))] + + (is (match? + {:chatId (m/pred string?) + :model "mistral/labs-devstral-small-2512" + :status "prompting"} + resp)) + + ;; Verify that the request body contains max_tokens (version 1) instead of max_completion_tokens + (let [req-body (llm.mocks/get-req-body :simple-text-0)] + (is (contains? req-body :max_tokens) "Should contain max_tokens for version 1") + (is (not (contains? req-body :max_completion_tokens)) "Should not contain max_completion_tokens for version 1") + (is (= 32000 (:max_tokens req-body)) "Should default to 32000 max_tokens")) + + (match-content chat-id "user" {:type "text" :text "Test Mistral compatibility\n"}) + (match-content chat-id "system" {:type "progress" :state "finished"}))))))) (deftest openai-chat-simple-text (eca/start-process!) diff --git a/integration-test/llm_mock/openai_chat.clj b/integration-test/llm_mock/openai_chat.clj index d3ab65dea..17b006833 100644 --- a/integration-test/llm_mock/openai_chat.clj +++ b/integration-test/llm_mock/openai_chat.clj @@ -100,7 +100,9 @@ (let [body (some-> (slurp (:body req)) (json/parse-string true)) messages (:messages body) normalized (messages->normalized-input messages) - normalized-body (merge normalized (select-keys body [:tools]))] + ;; Capture token parameters for version testing + token-params (select-keys body [:max_tokens :max_completion_tokens]) + normalized-body (merge normalized (select-keys body [:tools]) token-params)] (hk/as-channel req {:on-open (fn [ch] diff --git a/src/eca/llm_api.clj b/src/eca/llm_api.clj index 064c644c4..2a6d0abaf 100644 --- a/src/eca/llm_api.clj +++ b/src/eca/llm_api.clj @@ -140,6 +140,7 @@ extra-payload) :api-url api-url :api-key api-key + :provider-config provider-config :extra-headers {"openai-intent" "conversation-panel" "x-request-id" (str (random-uuid)) "vscode-sessionid" "" @@ -165,7 +166,8 @@ {:extra_body {:google {:thinking_config {:include_thoughts true}}}}) extra-payload) :api-url api-url - :api-key api-key} + :api-key api-key + :provider-config provider-config} callbacks) (= "ollama" provider) @@ -207,6 +209,7 @@ :think-tag-start think-tag-start :think-tag-end think-tag-end :http-client http-client + :provider-config provider-config :api-url api-url :api-key api-key} callbacks)) diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj index f9d1e4b5f..da3f444d6 100644 --- a/src/eca/llm_providers/openai_chat.clj +++ b/src/eca/llm_providers/openai_chat.clj @@ -287,6 +287,41 @@ (emit-text! (.substring buf 0 emit-len)) (reset! content-buffer* (.substring buf emit-len)))))))))))) +(defn- build-request-body-with-version + "Build request body with version-specific max tokens parameter. + + version 1: uses max_tokens (for Mistral, older OpenAI-compatible endpoints) + version 2 (default): uses max_completion_tokens (for OpenAI, modern endpoints) + + Checks for version in both provider-config and extra-payload, with provider-config taking precedence." + [extra-payload provider-config model messages stream? temperature tools] + (let [version (or (get provider-config :version) + (get extra-payload :version) + 2) + max-tokens-param (if (= version 1) :max_tokens :max_completion_tokens) + base-request (-> {:model model + :messages messages + :stream stream?} + (assoc max-tokens-param 32000) + (assoc-some + :temperature temperature + :tools (when (seq tools) (->tools tools)))) + ;; Remove version from extra-payload to avoid sending it to the API + extra-payload (dissoc extra-payload :version) + ;; Handle potential conflicts between extra-payload and version-based tokens + extra-payload (if (= version 1) + ;; For version 1, remove max_completion_tokens if present and ensure max_tokens + (-> extra-payload + (dissoc :max_completion_tokens) + (cond-> (not (contains? extra-payload :max_tokens)) + (assoc :max_tokens 32000))) + ;; For version 2, remove max_tokens if present and ensure max_completion_tokens + (-> extra-payload + (dissoc :max_tokens) + (cond-> (not (contains? extra-payload :max_completion_tokens)) + (assoc :max_completion_tokens 32000))))] + (deep-merge base-request extra-payload))) + (defn chat-completion! "Primary entry point for OpenAI chat completions with streaming support. @@ -295,7 +330,7 @@ Compatible with OpenRouter and other OpenAI-compatible providers." [{:keys [model user-messages instructions temperature api-key api-url url-relative-path past-messages tools extra-payload extra-headers supports-image? - think-tag-start think-tag-end http-client]} + think-tag-start think-tag-end http-client provider-config]} {:keys [on-message-received on-error on-prepare-tool-call on-tools-called on-reason on-usage-updated] :as callbacks}] (let [think-tag-start (or think-tag-start "") think-tag-end (or think-tag-end "") @@ -305,15 +340,7 @@ (normalize-messages past-messages supports-image? think-tag-start think-tag-end) (normalize-messages user-messages supports-image? think-tag-start think-tag-end))) - body (deep-merge - (assoc-some - {:model model - :messages messages - :stream stream? - :max_completion_tokens 32000} - :temperature temperature - :tools (when (seq tools) (->tools tools))) - extra-payload) + body (build-request-body-with-version extra-payload provider-config model messages stream? temperature tools) ;; Atom to accumulate tool call data from streaming chunks. ;; OpenAI streams tool call arguments across multiple chunks, so we need to diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj index 819b0f306..6a1d6f0a3 100644 --- a/test/eca/llm_providers/openai_chat_test.clj +++ b/test/eca/llm_providers/openai_chat_test.clj @@ -255,4 +255,97 @@ [:text "re"]]} (process-text-think-aware ["<" "thi" "nk>H" "um.." ".H" - "ello " "the" "re " "mat" "e!"]))))) + "ello " "the" "re " "mat" "e!"])))) + +;; Test helper to capture the request body from chat-completion! +(defn capture-request-body + "Helper to capture the request body that would be sent to the API" + [config] + (let [request-body* (atom nil) + callbacks {:on-message-received (fn [_]) + :on-error (fn [_]) + :on-prepare-tool-call (fn [_]) + :on-tools-called (fn [_]) + :on-reason (fn [_]) + :on-usage-updated (fn [_])} + ;; Mock the base-chat-request! function to capture the body + original-base-chat-request! @(resolve 'eca.llm-providers.openai-chat/base-chat-request!) + _ (with-redefs [eca.llm-providers.openai-chat/base-chat-request! + (fn [{:keys [body] :as _all-args}] + (reset! request-body* body) + ;; Return a mock successful response + {:usage {:prompt_tokens 10 :completion_tokens 5 :total_tokens 15} + :choices [{:message {:content "test response" :reasoning "test reasoning"}}]})] + (eca.llm-providers.openai-chat/chat-completion! config callbacks)) + result @request-body*] + result)) + +(deftest version-parameter-test + (testing "Version 1 uses max_tokens parameter" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 1}})] + (is (contains? body :max_tokens) "Should contain max_tokens for version 1") + (is (not (contains? body :max_completion_tokens)) "Should not contain max_completion_tokens for version 1") + (is (= 32000 (:max_tokens body)) "Should default to 32000 max_tokens for version 1"))) + + (testing "Version 2 uses max_completion_tokens parameter" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 2}})] + (is (contains? body :max_completion_tokens) "Should contain max_completion_tokens for version 2") + (is (not (contains? body :max_tokens)) "Should not contain max_tokens for version 2") + (is (= 32000 (:max_completion_tokens body)) "Should default to 32000 max_completion_tokens for version 2"))) + + (testing "Default version (missing) uses max_completion_tokens parameter" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {}})] + (is (contains? body :max_completion_tokens) "Should contain max_completion_tokens by default") + (is (not (contains? body :max_tokens)) "Should not contain max_tokens by default") + (is (= 32000 (:max_completion_tokens body)) "Should default to 32000 max_completion_tokens"))) + + (testing "Version 1 with user-provided max_tokens in extraPayload" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 1 :max_tokens 2048}})] + (is (contains? body :max_tokens) "Should contain max_tokens for version 1") + (is (= 2048 (:max_tokens body)) "Should use user-provided max_tokens for version 1") + (is (not (contains? body :max_completion_tokens)) "Should not contain max_completion_tokens for version 1"))) + + (testing "Version 2 with user-provided max_completion_tokens in extraPayload" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 2 :max_completion_tokens 1024}})] + (is (contains? body :max_completion_tokens) "Should contain max_completion_tokens for version 2") + (is (= 1024 (:max_completion_tokens body)) "Should use user-provided max_completion_tokens for version 2") + (is (not (contains? body :max_tokens)) "Should not contain max_tokens for version 2"))) + + (testing "Version 1 removes conflicting max_completion_tokens from extraPayload" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 1 :max_completion_tokens 5000}})] + (is (contains? body :max_tokens) "Should contain max_tokens for version 1") + (is (not (contains? body :max_completion_tokens)) "Should remove conflicting max_completion_tokens for version 1"))) + + (testing "Version 2 removes conflicting max_tokens from extraPayload" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 2 :max_tokens 5000}})] + (is (contains? body :max_completion_tokens) "Should contain max_completion_tokens for version 2") + (is (not (contains? body :max_tokens)) "Should remove conflicting max_tokens for version 2"))) + + (testing "Version parameter is not included in final request body" + (let [body (capture-request-body + {:model "test-model" + :user-messages [{:role "user" :content "test"}] + :extra-payload {:version 1 :temperature 0.7}})] + (is (not (contains? body :version)) "Should not contain version in final request body") + (is (= 0.7 (:temperature body)) "Should preserve other extraPayload parameters")))))