fix(openai): ensure non-compatible providers send correct max_tokens field

peteski22 · peteski22 · commit 0e9c2d920a5c · 2026-02-26T21:01:33.000Z
DeepSeek and Mistral are not fully OpenAI-compatible but delegated to
CompatibleProvider.Completion(), which unconditionally mapped MaxTokens
to max_completion_tokens on the wire. Both APIs expect max_tokens.

Add ChatCompletionRequestTransform hook to CompatibleConfig (Strategy
pattern) that lets providers adjust the SDK request after convertParams
builds it. DeepSeek and Mistral supply transforms that swap
max_completion_tokens back to max_tokens and clear unsupported fields.

Move Mistral's user/reasoning_effort stripping from preprocessParams
(CompletionParams level) to transformRequest (SDK request level) where
it correctly prevents the fields from being serialized.

Add FakeCompletionServer test helper and wire-level tests that capture
actual JSON request bodies to assert correct field names on the wire.
diff --git a/internal/testutil/fakeserver.go b/internal/testutil/fakeserver.go
@@ -0,0 +1,54 @@
+// Package testutil provides testing utilities and fixtures for any-llm.
+package testutil
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+)
+
+// FakeCompletionServer creates an httptest server that captures the raw JSON
+// request body and returns a minimal valid OpenAI-compatible chat completion
+// response. The captured body is returned so callers can assert on the exact
+// JSON field names sent over the wire.
+func FakeCompletionServer(t *testing.T) (serverURL string, capturedBody func() map[string]any) {
+	t.Helper()
+
+	var body map[string]any
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		raw, err := io.ReadAll(r.Body)
+		if err != nil {
+			t.Errorf("reading request body: %v", err)
+			http.Error(w, "bad request", http.StatusBadRequest)
+			return
+		}
+
+		if err := json.Unmarshal(raw, &body); err != nil {
+			t.Errorf("unmarshalling request body: %v", err)
+			http.Error(w, "bad request", http.StatusBadRequest)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		// Minimal valid chat completion response.
+		_, _ = w.Write([]byte(`{
+			"id": "chatcmpl-test",
+			"object": "chat.completion",
+			"created": 1700000000,
+			"model": "test-model",
+			"choices": [{
+				"index": 0,
+				"message": {"role": "assistant", "content": "hello"},
+				"finish_reason": "stop"
+			}],
+			"usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8}
+		}`))
+	}))
+
+	t.Cleanup(srv.Close)
+
+	return srv.URL, func() map[string]any { return body }
+}
diff --git a/providers/deepseek/deepseek.go b/providers/deepseek/deepseek.go
@@ -8,6 +8,9 @@ import (
 	"fmt"
 	"slices"
 
+	oaisdk "github.com/openai/openai-go"
+	"github.com/openai/openai-go/packages/param"
+
 	"github.com/mozilla-ai/any-llm-go/config"
 	"github.com/mozilla-ai/any-llm-go/providers"
 	"github.com/mozilla-ai/any-llm-go/providers/openai"
@@ -50,13 +53,14 @@ type Provider struct {
 // New creates a new DeepSeek provider.
 func New(opts ...config.Option) (*Provider, error) {
 	base, err := openai.NewCompatible(openai.CompatibleConfig{
-		APIKeyEnvVar:   envAPIKey,
-		BaseURLEnvVar:  "",
-		Capabilities:   capabilities(),
-		DefaultAPIKey:  "",
-		DefaultBaseURL: defaultBaseURL,
-		Name:           providerName,
-		RequireAPIKey:  true,
+		APIKeyEnvVar:                   envAPIKey,
+		BaseURLEnvVar:                  "",
+		Capabilities:                   capabilities(),
+		DefaultAPIKey:                  "",
+		DefaultBaseURL:                 defaultBaseURL,
+		Name:                           providerName,
+		RequireAPIKey:                  true,
+		ChatCompletionRequestTransform: transformRequest,
 	}, opts...)
 	if err != nil {
 		return nil, err
@@ -156,6 +160,19 @@ func preprocessParams(params providers.CompletionParams) providers.CompletionPar
 	}
 }
 
+// transformRequest adjusts the OpenAI SDK request for DeepSeek's API.
+// DeepSeek uses max_tokens, not max_completion_tokens.
+// See: https://api-docs.deepseek.com/api/create-chat-completion
+func transformRequest(req *oaisdk.ChatCompletionNewParams) {
+	if req.MaxCompletionTokens.Valid() {
+		// Set max_tokens using max_completion_tokens value.
+		req.MaxTokens = oaisdk.Int(req.MaxCompletionTokens.Value)
+	}
+
+	// Clear unsupported fields from the request.
+	req.MaxCompletionTokens = param.Opt[int64]{}
+}
+
 // preprocessMessagesForJSONSchema injects the JSON schema into the last user message.
 // Returns the modified messages and true if injection succeeded, or the original messages
 // and false if injection failed (no user message, non-string content, or marshal error).
diff --git a/providers/deepseek/deepseek_test.go b/providers/deepseek/deepseek_test.go
@@ -354,6 +354,38 @@ func TestPreprocessMessagesForJSONSchema(t *testing.T) {
 	})
 }
 
+func TestCompletionSendsMaxTokensOnWire(t *testing.T) {
+	t.Parallel()
+
+	serverURL, capturedBody := testutil.FakeCompletionServer(t)
+
+	provider, err := New(
+		config.WithAPIKey("test-key"),
+		config.WithBaseURL(serverURL),
+	)
+	require.NoError(t, err)
+
+	maxTokens := 512
+	params := providers.CompletionParams{
+		Model:     "deepseek-chat",
+		Messages:  testutil.SimpleMessages(),
+		MaxTokens: &maxTokens,
+	}
+
+	_, err = provider.Completion(context.Background(), params)
+	require.NoError(t, err)
+
+	body := capturedBody()
+
+	// DeepSeek is not fully OpenAI-compatible.
+	// The wire request must use max_tokens (not max_completion_tokens)
+	// because that is what the DeepSeek API accepts.
+	// See: https://api-docs.deepseek.com/api/create-chat-completion
+	require.Contains(t, body, "max_tokens")
+	require.NotContains(t, body, "max_completion_tokens")
+	require.Equal(t, float64(512), body["max_tokens"])
+}
+
 // Integration tests - only run if DeepSeek API key is available.
 
 func TestIntegrationCompletion(t *testing.T) {
diff --git a/providers/mistral/mistral.go b/providers/mistral/mistral.go
@@ -6,6 +6,9 @@ import (
 	"context"
 	"slices"
 
+	oaisdk "github.com/openai/openai-go"
+	"github.com/openai/openai-go/packages/param"
+
 	"github.com/mozilla-ai/any-llm-go/config"
 	"github.com/mozilla-ai/any-llm-go/providers"
 	"github.com/mozilla-ai/any-llm-go/providers/openai"
@@ -48,13 +51,14 @@ type Provider struct {
 // New creates a new Mistral provider.
 func New(opts ...config.Option) (*Provider, error) {
 	base, err := openai.NewCompatible(openai.CompatibleConfig{
-		APIKeyEnvVar:   envAPIKey,
-		BaseURLEnvVar:  "",
-		Capabilities:   capabilities(),
-		DefaultAPIKey:  "",
-		DefaultBaseURL: defaultBaseURL,
-		Name:           providerName,
-		RequireAPIKey:  true,
+		APIKeyEnvVar:                   envAPIKey,
+		BaseURLEnvVar:                  "",
+		Capabilities:                   capabilities(),
+		DefaultAPIKey:                  "",
+		DefaultBaseURL:                 defaultBaseURL,
+		Name:                           providerName,
+		RequireAPIKey:                  true,
+		ChatCompletionRequestTransform: transformRequest,
 	}, opts...)
 	if err != nil {
 		return nil, err
@@ -131,11 +135,23 @@ func patchMessages(messages []providers.Message) []providers.Message {
 }
 
 // preprocessParams handles Mistral's API requirements.
-// Mistral doesn't accept the "user" or "reasoning_effort" fields and requires
-// an assistant message between tool results and user messages.
+// Mistral requires an assistant message between tool results and user messages.
 func preprocessParams(params providers.CompletionParams) providers.CompletionParams {
 	params.Messages = patchMessages(slices.Clone(params.Messages))
-	params.ReasoningEffort = "" // Mistral doesn't support reasoning_effort; Magistral models reason automatically.
-	params.User = ""            // Mistral doesn't support the user field.
 	return params
 }
+
+// transformRequest adjusts the OpenAI SDK request for Mistral's API.
+// Mistral uses max_tokens (not max_completion_tokens) and does not accept user or reasoning_effort fields.
+// See: https://docs.mistral.ai/api/#tag/chat/operation/chat_completion_v1_chat_completions_post
+func transformRequest(req *oaisdk.ChatCompletionNewParams) {
+	if req.MaxCompletionTokens.Valid() {
+		// Set max_tokens using max_completion_tokens value.
+		req.MaxTokens = oaisdk.Int(req.MaxCompletionTokens.Value)
+	}
+
+	// Clear unsupported fields from the request.
+	req.MaxCompletionTokens = param.Opt[int64]{}
+	req.User = param.Opt[string]{}
+	req.ReasoningEffort = ""
+}
diff --git a/providers/mistral/mistral_test.go b/providers/mistral/mistral_test.go
@@ -80,51 +80,6 @@ func TestProviderName(t *testing.T) {
 func TestPreprocessParams(t *testing.T) {
 	t.Parallel()
 
-	t.Run("strips user field from params", func(t *testing.T) {
-		t.Parallel()
-
-		params := providers.CompletionParams{
-			Model:    "mistral-small-latest",
-			Messages: testutil.SimpleMessages(),
-			User:     "test-user",
-		}
-
-		result := preprocessParams(params)
-
-		require.Equal(t, params.Model, result.Model)
-		require.Empty(t, result.User)
-	})
-
-	t.Run("strips reasoning effort from params", func(t *testing.T) {
-		t.Parallel()
-
-		params := providers.CompletionParams{
-			Model:           "magistral-small-latest",
-			Messages:        testutil.SimpleMessages(),
-			ReasoningEffort: providers.ReasoningEffortLow,
-		}
-
-		result := preprocessParams(params)
-
-		require.Equal(t, params.Model, result.Model)
-		require.Empty(t, result.ReasoningEffort)
-	})
-
-	t.Run("passes through params without user field", func(t *testing.T) {
-		t.Parallel()
-
-		params := providers.CompletionParams{
-			Model:    "mistral-small-latest",
-			Messages: testutil.SimpleMessages(),
-		}
-
-		result := preprocessParams(params)
-
-		require.Equal(t, params.Model, result.Model)
-		require.Equal(t, len(params.Messages), len(result.Messages))
-		require.Empty(t, result.User)
-	})
-
 	t.Run("patches messages with tool-to-user sequence", func(t *testing.T) {
 		t.Parallel()
 
@@ -276,6 +231,90 @@ func TestPatchMessages(t *testing.T) {
 	})
 }
 
+func TestCompletionSendsMaxTokensOnWire(t *testing.T) {
+	t.Parallel()
+
+	serverURL, capturedBody := testutil.FakeCompletionServer(t)
+
+	provider, err := New(
+		config.WithAPIKey("test-key"),
+		config.WithBaseURL(serverURL),
+	)
+	require.NoError(t, err)
+
+	maxTokens := 256
+	params := providers.CompletionParams{
+		Model:     "mistral-small-latest",
+		Messages:  testutil.SimpleMessages(),
+		MaxTokens: &maxTokens,
+	}
+
+	_, err = provider.Completion(context.Background(), params)
+	require.NoError(t, err)
+
+	body := capturedBody()
+
+	// Mistral is not fully OpenAI-compatible.
+	// The wire request must use max_tokens (not max_completion_tokens)
+	// because that is what the Mistral API accepts.
+	// See: https://docs.mistral.ai/api?property=operation-chat_completion_v1_chat_completions_post_request_max_tokens
+	require.Contains(t, body, "max_tokens")
+	require.NotContains(t, body, "max_completion_tokens")
+	require.Equal(t, float64(256), body["max_tokens"])
+}
+
+func TestCompletionStripsUserField(t *testing.T) {
+	t.Parallel()
+
+	serverURL, capturedBody := testutil.FakeCompletionServer(t)
+
+	provider, err := New(
+		config.WithAPIKey("test-key"),
+		config.WithBaseURL(serverURL),
+	)
+	require.NoError(t, err)
+
+	params := providers.CompletionParams{
+		Model:    "mistral-small-latest",
+		Messages: testutil.SimpleMessages(),
+		User:     "test-user",
+	}
+
+	_, err = provider.Completion(context.Background(), params)
+	require.NoError(t, err)
+
+	body := capturedBody()
+
+	// Mistral doesn't support the user field; it must not appear on the wire.
+	require.NotContains(t, body, "user")
+}
+
+func TestCompletionStripsReasoningEffort(t *testing.T) {
+	t.Parallel()
+
+	serverURL, capturedBody := testutil.FakeCompletionServer(t)
+
+	provider, err := New(
+		config.WithAPIKey("test-key"),
+		config.WithBaseURL(serverURL),
+	)
+	require.NoError(t, err)
+
+	params := providers.CompletionParams{
+		Model:           "magistral-small-latest",
+		Messages:        testutil.SimpleMessages(),
+		ReasoningEffort: providers.ReasoningEffortHigh,
+	}
+
+	_, err = provider.Completion(context.Background(), params)
+	require.NoError(t, err)
+
+	body := capturedBody()
+
+	// Mistral doesn't support reasoning_effort; it must not appear on the wire.
+	require.NotContains(t, body, "reasoning_effort")
+}
+
 // Integration tests - only run if Mistral API key is available.
 
 func TestIntegrationCompletion(t *testing.T) {
diff --git a/providers/openai/compatible.go b/providers/openai/compatible.go
@@ -70,6 +70,12 @@ type CompatibleConfig struct {
 
 	// RequireAPIKey indicates whether an API key is required.
 	RequireAPIKey bool
+
+	// ChatCompletionRequestTransform is an optional function that modifies the chat completion
+	// request after construction. Providers that are not fully OpenAI-compatible use this to
+	// adjust wire-level fields (e.g. swapping max_completion_tokens back to max_tokens).
+	// Nil means no transformation.
+	ChatCompletionRequestTransform func(*openai.ChatCompletionNewParams)
 }
 
 // Ensure CompatibleProvider implements the required interfaces.
@@ -143,6 +149,9 @@ func (p *CompatibleProvider) Completion(
 	}
 
 	req := convertParams(params)
+	if p.compatibleConfig.ChatCompletionRequestTransform != nil {
+		p.compatibleConfig.ChatCompletionRequestTransform(&req)
+	}
 
 	resp, err := p.client.Chat.Completions.New(ctx, req)
 	if err != nil {
@@ -170,6 +179,9 @@ func (p *CompatibleProvider) CompletionStream(
 		}
 
 		req := convertParams(params)
+		if p.compatibleConfig.ChatCompletionRequestTransform != nil {
+			p.compatibleConfig.ChatCompletionRequestTransform(&req)
+		}
 		stream := p.client.Chat.Completions.NewStreaming(ctx, req)
 
 		for stream.Next() {
diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go