Skip to content

Commit 173883d

Browse files
author
AlexandruSmirnov
committed
fix: properly handle max_tokens deprecation for OpenAI models
- O3 family models (o3-mini, o3) do not support max_tokens parameter - All other models use max_completion_tokens instead of deprecated max_tokens - Remove unused isAzureAiInference parameter from addMaxTokensIfNeeded - Update tests to reflect correct behavior for each model type Per OpenAI docs: max_tokens is deprecated and not compatible with o-series models
1 parent 0b72b68 commit 173883d

File tree

2 files changed

+27
-27
lines changed

2 files changed

+27
-27
lines changed

src/api/providers/__tests__/openai.spec.ts

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ describe("OpenAiHandler", () => {
217217
// Assert the mockCreate was called with max_tokens
218218
expect(mockCreate).toHaveBeenCalled()
219219
const callArgs = mockCreate.mock.calls[0][0]
220-
expect(callArgs.max_tokens).toBe(4096)
220+
expect(callArgs.max_completion_tokens).toBe(4096)
221221
})
222222

223223
it("should not include max_tokens when includeMaxTokens is false", async () => {
@@ -238,7 +238,7 @@ describe("OpenAiHandler", () => {
238238
// Assert the mockCreate was called without max_tokens
239239
expect(mockCreate).toHaveBeenCalled()
240240
const callArgs = mockCreate.mock.calls[0][0]
241-
expect(callArgs.max_tokens).toBeUndefined()
241+
expect(callArgs.max_completion_tokens).toBeUndefined()
242242
})
243243

244244
it("should not include max_tokens when includeMaxTokens is undefined", async () => {
@@ -259,7 +259,7 @@ describe("OpenAiHandler", () => {
259259
// Assert the mockCreate was called without max_tokens
260260
expect(mockCreate).toHaveBeenCalled()
261261
const callArgs = mockCreate.mock.calls[0][0]
262-
expect(callArgs.max_tokens).toBeUndefined()
262+
expect(callArgs.max_completion_tokens).toBeUndefined()
263263
})
264264

265265
it("should use user-configured modelMaxTokens instead of model default maxTokens", async () => {
@@ -281,7 +281,7 @@ describe("OpenAiHandler", () => {
281281
// Assert the mockCreate was called with user-configured modelMaxTokens (32000), not model default maxTokens (4096)
282282
expect(mockCreate).toHaveBeenCalled()
283283
const callArgs = mockCreate.mock.calls[0][0]
284-
expect(callArgs.max_tokens).toBe(32000)
284+
expect(callArgs.max_completion_tokens).toBe(32000)
285285
})
286286

287287
it("should fallback to model default maxTokens when user modelMaxTokens is not set", async () => {
@@ -303,7 +303,7 @@ describe("OpenAiHandler", () => {
303303
// Assert the mockCreate was called with model default maxTokens (4096) as fallback
304304
expect(mockCreate).toHaveBeenCalled()
305305
const callArgs = mockCreate.mock.calls[0][0]
306-
expect(callArgs.max_tokens).toBe(4096)
306+
expect(callArgs.max_completion_tokens).toBe(4096)
307307
})
308308
})
309309

@@ -447,7 +447,7 @@ describe("OpenAiHandler", () => {
447447

448448
// Verify max_tokens is NOT included when includeMaxTokens is not set
449449
const callArgs = mockCreate.mock.calls[0][0]
450-
expect(callArgs).not.toHaveProperty("max_tokens")
450+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
451451
})
452452

453453
it("should handle non-streaming responses with Azure AI Inference Service", async () => {
@@ -493,7 +493,7 @@ describe("OpenAiHandler", () => {
493493

494494
// Verify max_tokens is NOT included when includeMaxTokens is not set
495495
const callArgs = mockCreate.mock.calls[0][0]
496-
expect(callArgs).not.toHaveProperty("max_tokens")
496+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
497497
})
498498

499499
it("should handle completePrompt with Azure AI Inference Service", async () => {
@@ -510,7 +510,7 @@ describe("OpenAiHandler", () => {
510510

511511
// Verify max_tokens is NOT included when includeMaxTokens is not set
512512
const callArgs = mockCreate.mock.calls[0][0]
513-
expect(callArgs).not.toHaveProperty("max_tokens")
513+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
514514
})
515515
})
516516

@@ -566,7 +566,7 @@ describe("OpenAiHandler", () => {
566566
},
567567
}
568568

569-
it("should handle O3 model with streaming and include max_tokens when includeMaxTokens is true", async () => {
569+
it("should handle O3 model with streaming and NOT include max_tokens even when includeMaxTokens is true", async () => {
570570
const o3Handler = new OpenAiHandler({
571571
...o3Options,
572572
includeMaxTokens: true,
@@ -601,7 +601,7 @@ describe("OpenAiHandler", () => {
601601
stream_options: { include_usage: true },
602602
reasoning_effort: "medium",
603603
temperature: 0.5,
604-
max_tokens: 32000,
604+
// O3 models do not support max_tokens
605605
}),
606606
{},
607607
)
@@ -647,10 +647,10 @@ describe("OpenAiHandler", () => {
647647

648648
// Verify max_tokens is NOT included
649649
const callArgs = mockCreate.mock.calls[0][0]
650-
expect(callArgs).not.toHaveProperty("max_tokens")
650+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
651651
})
652652

653-
it("should handle O3 model non-streaming with max_tokens and reasoning_effort", async () => {
653+
it("should handle O3 model non-streaming with reasoning_effort but NO max_tokens", async () => {
654654
const o3Handler = new OpenAiHandler({
655655
...o3Options,
656656
openAiStreamingEnabled: false,
@@ -683,7 +683,7 @@ describe("OpenAiHandler", () => {
683683
],
684684
reasoning_effort: "medium",
685685
temperature: 0.3,
686-
max_tokens: 65536, // Falls back to model default
686+
// O3 models do not support max_tokens
687687
}),
688688
{},
689689
)
@@ -743,10 +743,10 @@ describe("OpenAiHandler", () => {
743743

744744
// Verify max_tokens is NOT included when includeMaxTokens is false
745745
const callArgs = mockCreate.mock.calls[0][0]
746-
expect(callArgs).not.toHaveProperty("max_tokens")
746+
expect(callArgs).not.toHaveProperty("max_completion_tokens")
747747
})
748748

749-
it("should include max_tokens for O3 model with Azure AI Inference Service when includeMaxTokens is true", async () => {
749+
it("should NOT include max_tokens for O3 model with Azure AI Inference Service even when includeMaxTokens is true", async () => {
750750
const o3AzureHandler = new OpenAiHandler({
751751
...o3Options,
752752
openAiBaseUrl: "https://test.services.ai.azure.com",
@@ -766,7 +766,7 @@ describe("OpenAiHandler", () => {
766766
expect(mockCreate).toHaveBeenCalledWith(
767767
expect.objectContaining({
768768
model: "o3-mini",
769-
max_tokens: 65536, // Included when includeMaxTokens is true
769+
// O3 models do not support max_tokens
770770
}),
771771
{ path: "/models/chat/completions" },
772772
)

src/api/providers/openai.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
159159
}
160160

161161
// Add max_tokens if needed
162-
this.addMaxTokensIfNeeded(requestOptions, modelInfo, isAzureAiInference)
162+
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
163163

164164
const stream = await this.client.chat.completions.create(
165165
requestOptions,
@@ -221,7 +221,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
221221
}
222222

223223
// Add max_tokens if needed
224-
this.addMaxTokensIfNeeded(requestOptions, modelInfo, isAzureAiInference)
224+
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
225225

226226
const response = await this.client.chat.completions.create(
227227
requestOptions,
@@ -266,7 +266,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
266266
}
267267

268268
// Add max_tokens if needed
269-
this.addMaxTokensIfNeeded(requestOptions, modelInfo, isAzureAiInference)
269+
this.addMaxTokensIfNeeded(requestOptions, modelInfo)
270270

271271
const response = await this.client.chat.completions.create(
272272
requestOptions,
@@ -309,8 +309,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
309309
temperature: this.options.modelTemperature ?? 0,
310310
}
311311

312-
// Add max_tokens if needed
313-
this.addMaxTokensIfNeeded(requestOptions, modelInfo, methodIsAzureAiInference)
312+
// O3 family models do not support max_tokens parameter
314313

315314
const stream = await this.client.chat.completions.create(
316315
requestOptions,
@@ -332,8 +331,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
332331
temperature: this.options.modelTemperature ?? 0,
333332
}
334333

335-
// Add max_tokens if needed
336-
this.addMaxTokensIfNeeded(requestOptions, modelInfo, methodIsAzureAiInference)
334+
// O3 family models do not support max_tokens parameter
337335

338336
const response = await this.client.chat.completions.create(
339337
requestOptions,
@@ -387,19 +385,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
387385
}
388386

389387
/**
390-
* Adds max_tokens to the request body if needed based on provider configuration
388+
* Adds max_completion_tokens to the request body if needed based on provider configuration
389+
* Note: max_tokens is deprecated in favor of max_completion_tokens as per OpenAI documentation
390+
* O3 family models handle max_tokens separately in handleO3FamilyMessage
391391
*/
392392
private addMaxTokensIfNeeded(
393393
requestOptions:
394394
| OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming
395395
| OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming,
396396
modelInfo: ModelInfo,
397-
isAzureAiInference: boolean,
398397
): void {
399-
// Only add max_tokens if includeMaxTokens is true
398+
// Only add max_completion_tokens if includeMaxTokens is true
400399
if (this.options.includeMaxTokens === true) {
401400
// Use user-configured modelMaxTokens if available, otherwise fall back to model's default maxTokens
402-
requestOptions.max_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
401+
// Using max_completion_tokens as max_tokens is deprecated
402+
;(requestOptions as any).max_completion_tokens = this.options.modelMaxTokens || modelInfo.maxTokens
403403
}
404404
}
405405
}

0 commit comments

Comments
 (0)