|
1 | | -import { beforeAll, describe, expect, expectTypeOf, it, vi } from 'vitest'; |
| 1 | +import { |
| 2 | + beforeAll, |
| 3 | + beforeEach, |
| 4 | + afterEach, |
| 5 | + describe, |
| 6 | + expect, |
| 7 | + expectTypeOf, |
| 8 | + it, |
| 9 | + vi, |
| 10 | +} from 'vitest'; |
2 | 11 | import { z } from 'zod'; |
3 | 12 | import { |
4 | 13 | Agent, |
@@ -32,7 +41,7 @@ import { |
32 | 41 | TEST_MODEL_RESPONSE_BASIC, |
33 | 42 | TEST_TOOL, |
34 | 43 | } from './stubs'; |
35 | | -import { Model, ModelRequest } from '../src/model'; |
| 44 | +import { Model, ModelProvider, ModelRequest } from '../src/model'; |
36 | 45 |
|
37 | 46 | describe('Runner.run', () => { |
38 | 47 | beforeAll(() => { |
@@ -148,7 +157,11 @@ describe('Runner.run', () => { |
148 | 157 |
|
149 | 158 | // Track agent_end events on both the agent and runner |
150 | 159 | const agentEndEvents: Array<{ context: any; output: string }> = []; |
151 | | - const runnerEndEvents: Array<{ context: any; agent: any; output: string }> = []; |
| 160 | + const runnerEndEvents: Array<{ |
| 161 | + context: any; |
| 162 | + agent: any; |
| 163 | + output: string; |
| 164 | + }> = []; |
152 | 165 |
|
153 | 166 | agent.on('agent_end', (context, output) => { |
154 | 167 | agentEndEvents.push({ context, output }); |
@@ -407,7 +420,7 @@ describe('Runner.run', () => { |
407 | 420 | usage: new Usage(), |
408 | 421 | }; |
409 | 422 | class SimpleStreamingModel implements Model { |
410 | | - constructor(private resps: ModelResponse[]) { } |
| 423 | + constructor(private resps: ModelResponse[]) {} |
411 | 424 | async getResponse(_req: ModelRequest): Promise<ModelResponse> { |
412 | 425 | const r = this.resps.shift(); |
413 | 426 | if (!r) { |
@@ -525,6 +538,124 @@ describe('Runner.run', () => { |
525 | 538 | }); |
526 | 539 | }); |
527 | 540 |
|
| 541 | + describe('gpt-5 default model adjustments', () => { |
| 542 | + class InspectableModel extends FakeModel { |
| 543 | + lastRequest: ModelRequest | undefined; |
| 544 | + |
| 545 | + constructor(response: ModelResponse) { |
| 546 | + super([response]); |
| 547 | + } |
| 548 | + |
| 549 | + override async getResponse( |
| 550 | + request: ModelRequest, |
| 551 | + ): Promise<ModelResponse> { |
| 552 | + this.lastRequest = request; |
| 553 | + return await super.getResponse(request); |
| 554 | + } |
| 555 | + } |
| 556 | + |
| 557 | + class InspectableModelProvider implements ModelProvider { |
| 558 | + constructor(private readonly model: Model) {} |
| 559 | + |
| 560 | + async getModel(_name: string): Promise<Model> { |
| 561 | + return this.model; |
| 562 | + } |
| 563 | + } |
| 564 | + |
| 565 | + let originalDefaultModel: string | undefined; |
| 566 | + |
| 567 | + beforeEach(() => { |
| 568 | + originalDefaultModel = process.env.OPENAI_DEFAULT_MODEL; |
| 569 | + process.env.OPENAI_DEFAULT_MODEL = 'gpt-5o'; |
| 570 | + }); |
| 571 | + |
| 572 | + afterEach(() => { |
| 573 | + if (originalDefaultModel === undefined) { |
| 574 | + delete process.env.OPENAI_DEFAULT_MODEL; |
| 575 | + } else { |
| 576 | + process.env.OPENAI_DEFAULT_MODEL = originalDefaultModel; |
| 577 | + } |
| 578 | + }); |
| 579 | + |
| 580 | + function createGpt5ModelSettings() { |
| 581 | + return { |
| 582 | + temperature: 0.42, |
| 583 | + providerData: { |
| 584 | + reasoning: { effort: 'high' }, |
| 585 | + text: { verbosity: 'high' }, |
| 586 | + reasoning_effort: 'medium', |
| 587 | + keep: 'value', |
| 588 | + }, |
| 589 | + reasoning: { effort: 'high', summary: 'detailed' }, |
| 590 | + text: { verbosity: 'medium' }, |
| 591 | + }; |
| 592 | + } |
| 593 | + |
| 594 | + it('strips GPT-5-only settings when the runner model is not a GPT-5 string', async () => { |
| 595 | + const modelResponse: ModelResponse = { |
| 596 | + output: [fakeModelMessage('Hello non GPT-5')], |
| 597 | + usage: new Usage(), |
| 598 | + }; |
| 599 | + const inspectableModel = new InspectableModel(modelResponse); |
| 600 | + const agent = new Agent({ |
| 601 | + name: 'NonGpt5Runner', |
| 602 | + model: inspectableModel, |
| 603 | + modelSettings: createGpt5ModelSettings(), |
| 604 | + }); |
| 605 | + |
| 606 | + const runner = new Runner(); |
| 607 | + const result = await runner.run(agent, 'hello'); |
| 608 | + |
| 609 | + expect(result.finalOutput).toBe('Hello non GPT-5'); |
| 610 | + expect(inspectableModel.lastRequest).toBeDefined(); |
| 611 | + |
| 612 | + const requestSettings = inspectableModel.lastRequest!.modelSettings; |
| 613 | + expect(requestSettings.temperature).toBe(0.42); |
| 614 | + expect(requestSettings.providerData?.keep).toBe('value'); |
| 615 | + expect(requestSettings.providerData?.reasoning).toBeUndefined(); |
| 616 | + expect(requestSettings.providerData?.text?.verbosity).toBeUndefined(); |
| 617 | + expect( |
| 618 | + (requestSettings.providerData as any)?.reasoning_effort, |
| 619 | + ).toBeUndefined(); |
| 620 | + expect(requestSettings.reasoning?.effort).toBeUndefined(); |
| 621 | + expect(requestSettings.reasoning?.summary).toBeUndefined(); |
| 622 | + expect(requestSettings.text?.verbosity).toBeUndefined(); |
| 623 | + }); |
| 624 | + |
| 625 | + it('keeps GPT-5-only settings when the agent relies on the default model', async () => { |
| 626 | + const modelResponse: ModelResponse = { |
| 627 | + output: [fakeModelMessage('Hello default GPT-5')], |
| 628 | + usage: new Usage(), |
| 629 | + }; |
| 630 | + const inspectableModel = new InspectableModel(modelResponse); |
| 631 | + const runner = new Runner({ |
| 632 | + modelProvider: new InspectableModelProvider(inspectableModel), |
| 633 | + }); |
| 634 | + |
| 635 | + const agent = new Agent({ |
| 636 | + name: 'DefaultModelAgent', |
| 637 | + modelSettings: createGpt5ModelSettings(), |
| 638 | + }); |
| 639 | + |
| 640 | + const result = await runner.run(agent, 'hello'); |
| 641 | + |
| 642 | + expect(result.finalOutput).toBe('Hello default GPT-5'); |
| 643 | + expect(inspectableModel.lastRequest).toBeDefined(); |
| 644 | + |
| 645 | + const requestSettings = inspectableModel.lastRequest!.modelSettings; |
| 646 | + expect(requestSettings.providerData?.reasoning).toEqual({ |
| 647 | + effort: 'high', |
| 648 | + }); |
| 649 | + expect(requestSettings.providerData?.text?.verbosity).toBe('high'); |
| 650 | + expect((requestSettings.providerData as any)?.reasoning_effort).toBe( |
| 651 | + 'medium', |
| 652 | + ); |
| 653 | + expect(requestSettings.reasoning?.effort).toBe('high'); |
| 654 | + expect(requestSettings.reasoning?.summary).toBe('detailed'); |
| 655 | + expect(requestSettings.text?.verbosity).toBe('medium'); |
| 656 | + }); |
| 657 | + }); |
| 658 | + |
528 | 659 | describe('selectModel', () => { |
529 | 660 | const MODEL_A = 'gpt-4o'; |
530 | 661 | const MODEL_B = 'gpt-4.1-mini'; |
|
0 commit comments