Skip to content

Commit e83f5cc

Browse files
committed
fix: always include max_completion_tokens for OpenAI compatible providers
- Changed default behavior to include max_completion_tokens when includeMaxTokens is undefined - Added validation to only include positive max token values - Updated tests to reflect new default behavior - Added tests for OpenAI compatible provider scenarios Fixes #6265
1 parent 623fa2a commit e83f5cc

File tree

2 files changed

+76
-13
lines changed

2 files changed

+76
-13
lines changed

src/api/providers/__tests__/openai.spec.ts

Lines changed: 66 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,10 @@ describe("OpenAiHandler", () => {
242242
expect(callArgs.max_completion_tokens).toBeUndefined()
243243
})
244244

245-
it("should not include max_tokens when includeMaxTokens is undefined", async () => {
245+
it("should include max_completion_tokens when includeMaxTokens is undefined (default behavior)", async () => {
246246
const optionsWithUndefinedMaxTokens: ApiHandlerOptions = {
247247
...mockOptions,
248-
// includeMaxTokens is not set, should not include max_tokens
248+
// includeMaxTokens is not set, should default to including max_completion_tokens
249249
openAiCustomModelInfo: {
250250
contextWindow: 128_000,
251251
maxTokens: 4096,
@@ -257,10 +257,10 @@ describe("OpenAiHandler", () => {
257257
// Consume the stream to trigger the API call
258258
for await (const _chunk of stream) {
259259
}
260-
// Assert the mockCreate was called without max_tokens
260+
// Assert the mockCreate was called with max_completion_tokens (default behavior)
261261
expect(mockCreate).toHaveBeenCalled()
262262
const callArgs = mockCreate.mock.calls[0][0]
263-
expect(callArgs.max_completion_tokens).toBeUndefined()
263+
expect(callArgs.max_completion_tokens).toBe(4096)
264264
})
265265

266266
it("should use user-configured modelMaxTokens instead of model default maxTokens", async () => {
@@ -306,6 +306,54 @@ describe("OpenAiHandler", () => {
306306
const callArgs = mockCreate.mock.calls[0][0]
307307
expect(callArgs.max_completion_tokens).toBe(4096)
308308
})
309+
310+
it("should include max_completion_tokens by default for OpenAI compatible providers", async () => {
311+
const optionsForCompatibleProvider: ApiHandlerOptions = {
312+
...mockOptions,
313+
// includeMaxTokens is not set, simulating OpenAI compatible provider usage
314+
openAiBaseUrl: "https://api.koboldcpp.example.com/v1",
315+
openAiCustomModelInfo: {
316+
contextWindow: 32_000,
317+
maxTokens: 4096,
318+
supportsPromptCache: false,
319+
},
320+
}
321+
const compatibleHandler = new OpenAiHandler(optionsForCompatibleProvider)
322+
const stream = compatibleHandler.createMessage(systemPrompt, messages)
323+
324+
const chunks = []
325+
for await (const chunk of stream) {
326+
chunks.push(chunk)
327+
}
328+
329+
// Verify max_completion_tokens is included by default
330+
const callArgs = mockCreate.mock.calls[0][0]
331+
expect(callArgs).toHaveProperty("max_completion_tokens", 4096)
332+
})
333+
334+
it("should respect includeMaxTokens=false even for OpenAI compatible providers", async () => {
335+
const optionsWithExplicitFalse: ApiHandlerOptions = {
336+
...mockOptions,
337+
includeMaxTokens: false, // Explicitly set to false
338+
openAiBaseUrl: "https://api.koboldcpp.example.com/v1",
339+
openAiCustomModelInfo: {
340+
contextWindow: 32_000,
341+
maxTokens: 4096,
342+
supportsPromptCache: false,
343+
},
344+
}
345+
const handlerWithExplicitFalse = new OpenAiHandler(optionsWithExplicitFalse)
346+
const stream = handlerWithExplicitFalse.createMessage(systemPrompt, messages)
347+
348+
const chunks = []
349+
for await (const chunk of stream) {
350+
chunks.push(chunk)
351+
}
352+
353+
// Verify max_completion_tokens is NOT included when explicitly set to false
354+
const callArgs = mockCreate.mock.calls[0][0]
355+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
356+
})
309357
})
310358

311359
describe("error handling", () => {
@@ -402,6 +450,11 @@ describe("OpenAiHandler", () => {
402450
openAiBaseUrl: "https://test.services.ai.azure.com",
403451
openAiModelId: "deepseek-v3",
404452
azureApiVersion: "2024-05-01-preview",
453+
openAiCustomModelInfo: {
454+
contextWindow: 128_000,
455+
maxTokens: 4096,
456+
supportsPromptCache: false,
457+
},
405458
}
406459

407460
it("should initialize with Azure AI Inference Service configuration", () => {
@@ -442,13 +495,14 @@ describe("OpenAiHandler", () => {
442495
stream: true,
443496
stream_options: { include_usage: true },
444497
temperature: 0,
498+
max_completion_tokens: 4096,
445499
},
446500
{ path: "/models/chat/completions" },
447501
)
448502

449-
// Verify max_tokens is NOT included when includeMaxTokens is not set
503+
// Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
450504
const callArgs = mockCreate.mock.calls[0][0]
451-
expect(callArgs).not.toHaveProperty("max_completion_tokens")
505+
expect(callArgs).toHaveProperty("max_completion_tokens")
452506
})
453507

454508
it("should handle non-streaming responses with Azure AI Inference Service", async () => {
@@ -488,13 +542,14 @@ describe("OpenAiHandler", () => {
488542
{ role: "user", content: systemPrompt },
489543
{ role: "user", content: "Hello!" },
490544
],
545+
max_completion_tokens: 4096,
491546
},
492547
{ path: "/models/chat/completions" },
493548
)
494549

495-
// Verify max_tokens is NOT included when includeMaxTokens is not set
550+
// Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
496551
const callArgs = mockCreate.mock.calls[0][0]
497-
expect(callArgs).not.toHaveProperty("max_completion_tokens")
552+
expect(callArgs).toHaveProperty("max_completion_tokens")
498553
})
499554

500555
it("should handle completePrompt with Azure AI Inference Service", async () => {
@@ -505,13 +560,14 @@ describe("OpenAiHandler", () => {
505560
{
506561
model: azureOptions.openAiModelId,
507562
messages: [{ role: "user", content: "Test prompt" }],
563+
max_completion_tokens: 4096,
508564
},
509565
{ path: "/models/chat/completions" },
510566
)
511567

512-
// Verify max_tokens is NOT included when includeMaxTokens is not set
568+
// Verify max_completion_tokens IS included when includeMaxTokens is not set (default behavior)
513569
const callArgs = mockCreate.mock.calls[0][0]
514-
expect(callArgs).not.toHaveProperty("max_completion_tokens")
570+
expect(callArgs).toHaveProperty("max_completion_tokens")
515571
})
516572
})
517573

src/api/providers/openai.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,11 +401,18 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
401401
| OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
402402
modelInfo: ModelInfo,
403403
): void {
404-
// Only add max_completion_tokens if includeMaxTokens is true
405-
if (this.options.includeMaxTokens === true) {
404+
// For OpenAI compatible providers, always include max_completion_tokens to prevent
405+
// fallback to provider's default (which may be too small, e.g., koboldcpp's 512 tokens)
406+
// Only add max_completion_tokens if includeMaxTokens is explicitly true OR if it's undefined
407+
// (treating undefined as true for backward compatibility with OpenAI compatible providers)
408+
if (this.options.includeMaxTokens !== false) {
406409
// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
407410
// Using max_completion_tokens as max_tokens is deprecated
408-
requestOptions.max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
411+
const maxTokens = this.options.modelMaxTokens || modelInfo.maxTokens
412+
// Only set max_completion_tokens if we have a valid positive value
413+
if (maxTokens && maxTokens > 0) {
414+
requestOptions.max_completion_tokens = maxTokens
415+
}
409416
}
410417
}
411418
}

0 commit comments

Comments
 (0)