fix E2E test

404Wolf · 404Wolf · commit c4f95d42c5df · 2025-12-02T15:57:17.000-05:00
diff --git a/examples/tool-calls-beta-zod.ts b/examples/tool-calls-beta-zod.ts
@@ -100,6 +100,7 @@ async function main() {
       console.log();
     }
   }
+  console.log(JSON.stringify(runner.params.messages, null, 2));
 }
 
 main();
diff --git a/tests/lib/tools/ToolRunner.test.ts b/tests/lib/tools/ToolRunner.test.ts
@@ -5,7 +5,7 @@ import OpenAI from 'openai';
 // import { Fetch } from 'openai/sdk/internal/builtin-types';
 import { mockFetch } from '../../utils/mock-fetch';
 import { BetaRunnableTool } from 'openai/lib/beta/BetaRunnableTool';
-import { ChatCompletion, ChatCompletionChunk, ChatCompletionCreateParams, ChatCompletionMessage } from 'openai/resources';
+import { ChatCompletion, ChatCompletionChunk, ChatCompletionMessage, ChatCompletionTool, ChatCompletionToolMessageParam } from 'openai/resources';
 import { Fetch } from 'openai/internal/builtin-types';
 import { ChatCompletionStream } from 'openai/lib/ChatCompletionStream';
 
@@ -48,12 +48,12 @@ const calculatorTool: BetaRunnableTool<{ a: number; b: number; operation: string
 };
 
 // Helper functions to create content blocks
-function getWeatherToolUse(location: string, id: string = 'tool_1'): BetaContentBlock {
+function getWeatherToolUse(location: string, id: string = 'tool_1'): ChatCompletionFunctionTool {
   return { type: 'tool_use', id, name: 'getWeather', input: { location } };
 }
 
-function getWeatherToolResult(location: string, id: string = 'tool_1'): BetaToolResultBlockParam {
-  return { type: 'tool_result', tool_use_id: id, content: `Sunny in ${location}` };
+function getWeatherToolResult(location: string, id: string = 'tool_1'): ChatCompletionToolMessageParam {
+  return { role: 'tool', tool_use_id: id, content: `Sunny in ${location}` };
 }
 
 function getCalculatorToolUse(
@@ -294,7 +294,7 @@ function setupTest(params: Partial<ToolRunnerParams> = {}): SetupTestResult<bool
     };
 
     handleStreamEvents(betaMessageToStreamEvents(message));
-    return new ChatCompletionStream(message as ChatCompletionCreateParams);
+    return new ChatCompletionStream(message);
   };
 
   const client = new OpenAI({ apiKey: 'test-key', fetch: fetch, maxRetries: 0 });
diff --git a/tests/lib/tools/ToolRunnerE2E.test.ts b/tests/lib/tools/ToolRunnerE2E.test.ts
@@ -3,7 +3,7 @@ import { betaZodTool } from '../../../src/helpers/beta/zod';
 import * as z from 'zod';
 import nock from 'nock';
 import { gunzipSync } from 'zlib';
-import { RequestInfo } from '@openai/sdk/internal/builtin-types';
+import { RequestInfo } from 'openai/internal/builtin-types';
 
 describe('toolRunner integration tests', () => {
   let client: OpenAI;
@@ -78,7 +78,7 @@ describe('toolRunner integration tests', () => {
           'Global fetch is not available. Ensure you are using Node.js 18+ or have undici available.',
         );
       }
-      return globalFetch(input, init);
+      return await globalFetch(input, init);
     };
 
     client = new OpenAI({
@@ -123,12 +123,16 @@ describe('toolRunner integration tests', () => {
   it('should answer tools and run until completion', async () => {
     const tool = createTestTool();
 
-    const runner = client.beta.messages.toolRunner({
+    const runner = client.beta.chat.completions.toolRunner({
       model: 'gpt-4o',
       max_tokens: 1000,
       max_iterations: 5, // High limit, should stop before reaching it
       messages: [
-        { role: 'user', content: 'Use the test_tool with value "test", then provide a final response' },
+        {
+          role: 'user',
+          content:
+            'Use the test_tool with value "test", then provide a final response that includes the word \'foo\'.',
+        },
       ],
       tools: [tool],
     });
@@ -142,34 +146,34 @@ describe('toolRunner integration tests', () => {
     expect(messages).toHaveLength(2);
 
     // First message should contain one tool use
-    const firstMessage = messages[0]!;
-    expect(firstMessage.role).toBe('assistant');
-    expect(firstMessage.content).toHaveLength(2); // text + tool_use
-
-    const toolUseBlocks = firstMessage.content.filter((block) => block.type === 'tool_use');
-    expect(toolUseBlocks).toHaveLength(1);
-    expect(toolUseBlocks[0]!.name).toBe('test_tool');
-    expect(toolUseBlocks[0]!.input).toEqual({ value: 'test' });
-    expect(firstMessage.stop_reason).toBe('tool_use');
-
-    // Second message should be final response
-    const secondMessage = messages[1]!;
-    expect(secondMessage.role).toBe('assistant');
-    expect(secondMessage.content).toHaveLength(1);
-    expect(secondMessage.content[0]!.type).toBe('text');
-    expect(secondMessage.stop_reason).toBe('end_turn');
+    const firstMessage = messages[0]!.choices[0]!;
+    expect(firstMessage.message.role).toBe('assistant');
+    expect(firstMessage.message.content).toBeNull(); // openai only responds with tool use and null content
+    expect(firstMessage.message.tool_calls).toHaveLength(1); // the tool call should be present
+    expect(firstMessage.finish_reason).toBe('tool_calls');
+
+    // Second message should be final response with text
+    expect(messages[1]!.choices).toHaveLength(1);
+    const secondMessage = messages[1]!.choices[0]!;
+    expect(secondMessage.message.role).toBe('assistant');
+    expect(secondMessage.message.content).toContain('foo');
+    expect(secondMessage.finish_reason).toBe('stop');
   });
 
   describe('max_iterations', () => {
     it('should respect max_iterations limit', async () => {
       const tool = createCounterTool();
 
-      const runner = client.beta.messages.toolRunner({
+      const runner = client.beta.chat.completions.toolRunner({
         model: 'gpt-4o',
         max_tokens: 1000,
         max_iterations: 2,
         messages: [
-          { role: 'user', content: 'Use the test_tool with count 1, then use it again with count 2' },
+          {
+            role: 'user',
+            content:
+              "Use the test_tool with count 1, then use it again with count 2, then say '231' in the final message",
+          },
         ],
         tools: [tool],
       });
@@ -183,35 +187,53 @@ describe('toolRunner integration tests', () => {
       expect(messages).toHaveLength(2);
 
       // First message should contain tool uses
-      const firstMessage = messages[0]!;
-      expect(firstMessage.role).toBe('assistant');
-      expect(firstMessage.content).toHaveLength(3); // text + 2 tool_use blocks
+      const firstMessage = messages[0]!.choices[0]!;
+      expect(firstMessage.message.role).toBe('assistant');
+      expect(firstMessage.message.content).toBeNull();
+      expect(firstMessage.message.tool_calls).toHaveLength(2);
 
-      const toolUseBlocks = firstMessage.content.filter((block) => block.type === 'tool_use');
+      const { tool_calls: toolUseBlocks } = firstMessage.message;
+      expect(toolUseBlocks).toBeDefined();
       expect(toolUseBlocks).toHaveLength(2);
-      expect(toolUseBlocks[0]!.name).toBe('test_tool');
-      expect(toolUseBlocks[0]!.input).toEqual({ count: 1 });
-      expect(toolUseBlocks[1]!.name).toBe('test_tool');
-      expect(toolUseBlocks[1]!.input).toEqual({ count: 2 });
-
-      // Second message should be final response
-      const secondMessage = messages[1]!;
-      expect(secondMessage.role).toBe('assistant');
-      expect(secondMessage.content).toHaveLength(1);
-      expect(secondMessage.content[0]!.type).toBe('text');
-      expect(secondMessage.stop_reason).toBe('end_turn');
+
+      if (toolUseBlocks && toolUseBlocks[0] && toolUseBlocks[0].type === 'function') {
+        expect(toolUseBlocks[0].function).toBeDefined();
+        expect(toolUseBlocks[0].function.name).toBe('test_tool');
+        expect(JSON.parse(toolUseBlocks[0].function.arguments)).toEqual({ count: 1 });
+      } else {
+        // Doing it with an if else to get nice type inference
+        throw new Error('Expected tool call at index 0 to be a function');
+      }
+
+      if (toolUseBlocks && toolUseBlocks[1] && toolUseBlocks[1].type === 'function') {
+        expect(toolUseBlocks[1].function).toBeDefined();
+        expect(toolUseBlocks[1].function.name).toBe('test_tool');
+        expect(JSON.parse(toolUseBlocks[1].function.arguments)).toEqual({ count: 2 });
+      } else {
+        throw new Error('Expected tool call at index 1 to be a function');
+      }
+
+      // Second message should be final response (not a tool call)
+      const secondMessage = messages[1]!.choices[0]!;
+      expect(secondMessage.message.role).toBe('assistant');
+      expect(secondMessage.message.content).toContain('231');
+      expect(secondMessage.finish_reason).toBe('stop');
     });
   });
 
   describe('done()', () => {
     it('should consume the iterator and return final message', async () => {
       const tool = createTestTool({ inputSchema: z.object({ input: z.string() }) });
 
-      const runner = client.beta.messages.toolRunner({
+      const runner = client.beta.chat.completions.toolRunner({
         model: 'gpt-4o',
         max_tokens: 1000,
         messages: [
-          { role: 'user', content: 'Use the test_tool with input "test", then provide a final response' },
+          {
+            role: 'user',
+            content:
+              'Use the test_tool with input "test", then provide a final response with the word \'231\'',
+          },
         ],
         tools: [tool],
       });
@@ -220,17 +242,16 @@ describe('toolRunner integration tests', () => {
 
       // Final message should be the last text-only response
       expect(finalMessage.role).toBe('assistant');
-      expect(finalMessage.content).toHaveLength(1);
-      expect(finalMessage.content[0]).toHaveProperty('type', 'text');
-      expect(finalMessage.stop_reason).toBe('end_turn');
+      expect(finalMessage.tool_calls).toBeUndefined();
+      expect(finalMessage.content).toContain('231');
     });
   });
 
   describe('setMessagesParams()', () => {
     it('should update parameters using direct assignment', async () => {
       const tool = createTestTool();
 
-      const runner = client.beta.messages.toolRunner({
+      const runner = client.beta.chat.completions.toolRunner({
         model: 'gpt-4o',
         max_tokens: 1000,
         messages: [{ role: 'user', content: 'Hello' }],
@@ -251,98 +272,4 @@ describe('toolRunner integration tests', () => {
       expect(params.messages).toEqual([{ role: 'user', content: 'Updated message' }]);
     });
   });
-
-  describe('compaction', () => {
-    it('should compact messages when token threshold is exceeded', async () => {
-      const tool = {
-        name: 'submit_analysis',
-        description: 'Call this LAST with your final analysis.',
-        input_schema: {
-          type: 'object' as const,
-          properties: {
-            summary: {
-              type: 'string' as const,
-            },
-          },
-          required: ['summary'],
-        },
-        run: async (input: { summary: string }) => {
-          return 'Analysis submitted';
-        },
-      };
-
-      const runner = client.beta.messages.toolRunner({
-        model: 'gpt-4o',
-        max_tokens: 4000,
-        messages: [
-          {
-            role: 'user',
-            content:
-              'Write a detailed 500-word essay about dogs, cats, and birds. ' +
-              'Call the tool `submit_analysis` with the information about all three animals ',
-          },
-        ],
-        tools: [tool],
-        compactionControl: {
-          enabled: true,
-          contextTokenThreshold: 500, // Low threshold to trigger compaction
-        },
-        max_iterations: 1,
-      });
-
-      await runner.runUntilDone();
-      expect(runner.params.messages[0]).toMatchInlineSnapshot(`
-{
-  "content": [
-    {
-      "text": "<summary>
-## Task Overview
-The user requested:
-1. Write a detailed 500-word essay about dogs, cats, and birds
-2. Call a tool named \`submit_analysis\` with information about all three animals
-
-Success criteria:
-- Essay must be approximately 500 words
-- Must cover all three animals (dogs, cats, and birds)
-- Must be detailed
-- Must call the \`submit_analysis\` tool with the relevant information
-
-## Current State
-**Completed:** Nothing has been completed yet.
-
-**Status:** The task has just been assigned. No essay has been written, and no tool has been called.
-
-## Important Discoveries
-**Key Issue Identified:** The tool \`submit_analysis\` does not exist in my available tool set. I need to:
-1. Either inform the user that this tool is not available, OR
-2. Proceed with writing the essay and explain that I cannot call the non-existent tool
-
-**Technical Constraint:** Without knowing the expected parameters/schema for \`submit_analysis\`, even if it were available, I would need clarification on:
-- What format the information should take (structured data, summary points, the full essay text?)
-- What specific fields or parameters the tool expects
-- Whether separate calls are needed for each animal or one combined call
-
-## Next Steps
-1. **Write the 500-word essay** covering dogs, cats, and birds with detailed information about each animal
-2. **Address the tool issue** by either:
-   - Informing the user that \`submit_analysis\` is not available in my toolkit
-   - Asking for clarification about what tool they actually meant or how they want the analysis submitted
-   - Demonstrating what the tool call would look like if it existed
-3. **Deliver the essay** in a clear, organized format regardless of tool availability
-
-## Context to Preserve
-- User expects both written content (essay) AND a tool interaction
-- The essay should be substantive and detailed, not superficial
-- All three animals must receive adequate coverage in the 500-word limit
-- No specific style, tone, or audience was specified for the essay (assume general informative style)
-- No clarification was provided about whether the essay and tool call should contain the same or different information
-</summary>",
-      "type": "text",
-    },
-  ],
-  "role": "user",
-}
-`);
-    });
-  });
 });
diff --git a/tests/lib/tools/nockFixtures/ToolRunner.json b/tests/lib/tools/nockFixtures/ToolRunner.json

Original file line number	Diff line number	Diff line change
`@@ -100,6 +100,7 @@ async function main() {`
`100`	`100`	`console.log();`
`101`	`101`	`}`
`102`	`102`	`}`
	`103`	`+ console.log(JSON.stringify(runner.params.messages, null, 2));`
`103`	`104`	`}`
`104`	`105`
`105`	`106`	`main();`