diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts index 99bf4be3d01..2eac1f954a3 100644 --- a/packages/types/src/providers/groq.ts +++ b/packages/types/src/providers/groq.ts @@ -89,7 +89,7 @@ export const groqModels = { description: "DeepSeek R1 Distill Llama 70B model, 128K context.", }, "moonshotai/kimi-k2-instruct": { - maxTokens: 8192, + maxTokens: 16384, contextWindow: 131072, supportsImages: false, supportsPromptCache: false, diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index f196b5f309b..49c53e32632 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -69,11 +69,17 @@ export abstract class BaseOpenAiCompatibleProvider ): ApiStream { const { id: model, - info: { maxTokens: max_tokens }, + info: { maxTokens: modelMaxTokens }, } = this.getModel() const temperature = this.options.modelTemperature ?? this.defaultTemperature - + // Ensure max_tokens doesn't exceed the model's configured limit + // Users can override with modelMaxTokens, but it should not exceed the model's actual API limit + const userMaxTokens = this.options.modelMaxTokens + const max_tokens = + typeof userMaxTokens === "number" && userMaxTokens > 0 && typeof modelMaxTokens === "number" + ? Math.min(userMaxTokens, modelMaxTokens) + : modelMaxTokens const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model, max_tokens,