Skip to content

Commit c4f95d4

Browse files
committed
fix E2E test
1 parent 95694b6 commit c4f95d4

File tree

4 files changed

+544
-142
lines changed

4 files changed

+544
-142
lines changed

examples/tool-calls-beta-zod.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ async function main() {
100100
console.log();
101101
}
102102
}
103+
console.log(JSON.stringify(runner.params.messages, null, 2));
103104
}
104105

105106
main();

tests/lib/tools/ToolRunner.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import OpenAI from 'openai';
55
// import { Fetch } from 'openai/sdk/internal/builtin-types';
66
import { mockFetch } from '../../utils/mock-fetch';
77
import { BetaRunnableTool } from 'openai/lib/beta/BetaRunnableTool';
8-
import { ChatCompletion, ChatCompletionChunk, ChatCompletionCreateParams, ChatCompletionMessage } from 'openai/resources';
8+
import { ChatCompletion, ChatCompletionChunk, ChatCompletionMessage, ChatCompletionTool, ChatCompletionToolMessageParam } from 'openai/resources';
99
import { Fetch } from 'openai/internal/builtin-types';
1010
import { ChatCompletionStream } from 'openai/lib/ChatCompletionStream';
1111

@@ -48,12 +48,12 @@ const calculatorTool: BetaRunnableTool<{ a: number; b: number; operation: string
4848
};
4949

5050
// Helper functions to create content blocks
51-
function getWeatherToolUse(location: string, id: string = 'tool_1'): BetaContentBlock {
51+
function getWeatherToolUse(location: string, id: string = 'tool_1'): ChatCompletionFunctionTool {
5252
return { type: 'tool_use', id, name: 'getWeather', input: { location } };
5353
}
5454

55-
function getWeatherToolResult(location: string, id: string = 'tool_1'): BetaToolResultBlockParam {
56-
return { type: 'tool_result', tool_use_id: id, content: `Sunny in ${location}` };
55+
function getWeatherToolResult(location: string, id: string = 'tool_1'): ChatCompletionToolMessageParam {
56+
return { role: 'tool', tool_use_id: id, content: `Sunny in ${location}` };
5757
}
5858

5959
function getCalculatorToolUse(
@@ -294,7 +294,7 @@ function setupTest(params: Partial<ToolRunnerParams> = {}): SetupTestResult<bool
294294
};
295295

296296
handleStreamEvents(betaMessageToStreamEvents(message));
297-
return new ChatCompletionStream(message as ChatCompletionCreateParams);
297+
return new ChatCompletionStream(message);
298298
};
299299

300300
const client = new OpenAI({ apiKey: 'test-key', fetch: fetch, maxRetries: 0 });

tests/lib/tools/ToolRunnerE2E.test.ts

Lines changed: 64 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { betaZodTool } from '../../../src/helpers/beta/zod';
33
import * as z from 'zod';
44
import nock from 'nock';
55
import { gunzipSync } from 'zlib';
6-
import { RequestInfo } from '@openai/sdk/internal/builtin-types';
6+
import { RequestInfo } from 'openai/internal/builtin-types';
77

88
describe('toolRunner integration tests', () => {
99
let client: OpenAI;
@@ -78,7 +78,7 @@ describe('toolRunner integration tests', () => {
7878
'Global fetch is not available. Ensure you are using Node.js 18+ or have undici available.',
7979
);
8080
}
81-
return globalFetch(input, init);
81+
return await globalFetch(input, init);
8282
};
8383

8484
client = new OpenAI({
@@ -123,12 +123,16 @@ describe('toolRunner integration tests', () => {
123123
it('should answer tools and run until completion', async () => {
124124
const tool = createTestTool();
125125

126-
const runner = client.beta.messages.toolRunner({
126+
const runner = client.beta.chat.completions.toolRunner({
127127
model: 'gpt-4o',
128128
max_tokens: 1000,
129129
max_iterations: 5, // High limit, should stop before reaching it
130130
messages: [
131-
{ role: 'user', content: 'Use the test_tool with value "test", then provide a final response' },
131+
{
132+
role: 'user',
133+
content:
134+
'Use the test_tool with value "test", then provide a final response that includes the word \'foo\'.',
135+
},
132136
],
133137
tools: [tool],
134138
});
@@ -142,34 +146,34 @@ describe('toolRunner integration tests', () => {
142146
expect(messages).toHaveLength(2);
143147

144148
// First message should contain one tool use
145-
const firstMessage = messages[0]!;
146-
expect(firstMessage.role).toBe('assistant');
147-
expect(firstMessage.content).toHaveLength(2); // text + tool_use
148-
149-
const toolUseBlocks = firstMessage.content.filter((block) => block.type === 'tool_use');
150-
expect(toolUseBlocks).toHaveLength(1);
151-
expect(toolUseBlocks[0]!.name).toBe('test_tool');
152-
expect(toolUseBlocks[0]!.input).toEqual({ value: 'test' });
153-
expect(firstMessage.stop_reason).toBe('tool_use');
154-
155-
// Second message should be final response
156-
const secondMessage = messages[1]!;
157-
expect(secondMessage.role).toBe('assistant');
158-
expect(secondMessage.content).toHaveLength(1);
159-
expect(secondMessage.content[0]!.type).toBe('text');
160-
expect(secondMessage.stop_reason).toBe('end_turn');
149+
const firstMessage = messages[0]!.choices[0]!;
150+
expect(firstMessage.message.role).toBe('assistant');
151+
expect(firstMessage.message.content).toBeNull(); // openai only responds with tool use and null content
152+
expect(firstMessage.message.tool_calls).toHaveLength(1); // the tool call should be present
153+
expect(firstMessage.finish_reason).toBe('tool_calls');
154+
155+
// Second message should be final response with text
156+
expect(messages[1]!.choices).toHaveLength(1);
157+
const secondMessage = messages[1]!.choices[0]!;
158+
expect(secondMessage.message.role).toBe('assistant');
159+
expect(secondMessage.message.content).toContain('foo');
160+
expect(secondMessage.finish_reason).toBe('stop');
161161
});
162162

163163
describe('max_iterations', () => {
164164
it('should respect max_iterations limit', async () => {
165165
const tool = createCounterTool();
166166

167-
const runner = client.beta.messages.toolRunner({
167+
const runner = client.beta.chat.completions.toolRunner({
168168
model: 'gpt-4o',
169169
max_tokens: 1000,
170170
max_iterations: 2,
171171
messages: [
172-
{ role: 'user', content: 'Use the test_tool with count 1, then use it again with count 2' },
172+
{
173+
role: 'user',
174+
content:
175+
"Use the test_tool with count 1, then use it again with count 2, then say '231' in the final message",
176+
},
173177
],
174178
tools: [tool],
175179
});
@@ -183,35 +187,53 @@ describe('toolRunner integration tests', () => {
183187
expect(messages).toHaveLength(2);
184188

185189
// First message should contain tool uses
186-
const firstMessage = messages[0]!;
187-
expect(firstMessage.role).toBe('assistant');
188-
expect(firstMessage.content).toHaveLength(3); // text + 2 tool_use blocks
190+
const firstMessage = messages[0]!.choices[0]!;
191+
expect(firstMessage.message.role).toBe('assistant');
192+
expect(firstMessage.message.content).toBeNull();
193+
expect(firstMessage.message.tool_calls).toHaveLength(2);
189194

190-
const toolUseBlocks = firstMessage.content.filter((block) => block.type === 'tool_use');
195+
const { tool_calls: toolUseBlocks } = firstMessage.message;
196+
expect(toolUseBlocks).toBeDefined();
191197
expect(toolUseBlocks).toHaveLength(2);
192-
expect(toolUseBlocks[0]!.name).toBe('test_tool');
193-
expect(toolUseBlocks[0]!.input).toEqual({ count: 1 });
194-
expect(toolUseBlocks[1]!.name).toBe('test_tool');
195-
expect(toolUseBlocks[1]!.input).toEqual({ count: 2 });
196-
197-
// Second message should be final response
198-
const secondMessage = messages[1]!;
199-
expect(secondMessage.role).toBe('assistant');
200-
expect(secondMessage.content).toHaveLength(1);
201-
expect(secondMessage.content[0]!.type).toBe('text');
202-
expect(secondMessage.stop_reason).toBe('end_turn');
198+
199+
if (toolUseBlocks && toolUseBlocks[0] && toolUseBlocks[0].type === 'function') {
200+
expect(toolUseBlocks[0].function).toBeDefined();
201+
expect(toolUseBlocks[0].function.name).toBe('test_tool');
202+
expect(JSON.parse(toolUseBlocks[0].function.arguments)).toEqual({ count: 1 });
203+
} else {
204+
// Doing it with an if else to get nice type inference
205+
throw new Error('Expected tool call at index 0 to be a function');
206+
}
207+
208+
if (toolUseBlocks && toolUseBlocks[1] && toolUseBlocks[1].type === 'function') {
209+
expect(toolUseBlocks[1].function).toBeDefined();
210+
expect(toolUseBlocks[1].function.name).toBe('test_tool');
211+
expect(JSON.parse(toolUseBlocks[1].function.arguments)).toEqual({ count: 2 });
212+
} else {
213+
throw new Error('Expected tool call at index 1 to be a function');
214+
}
215+
216+
// Second message should be final response (not a tool call)
217+
const secondMessage = messages[1]!.choices[0]!;
218+
expect(secondMessage.message.role).toBe('assistant');
219+
expect(secondMessage.message.content).toContain('231');
220+
expect(secondMessage.finish_reason).toBe('stop');
203221
});
204222
});
205223

206224
describe('done()', () => {
207225
it('should consume the iterator and return final message', async () => {
208226
const tool = createTestTool({ inputSchema: z.object({ input: z.string() }) });
209227

210-
const runner = client.beta.messages.toolRunner({
228+
const runner = client.beta.chat.completions.toolRunner({
211229
model: 'gpt-4o',
212230
max_tokens: 1000,
213231
messages: [
214-
{ role: 'user', content: 'Use the test_tool with input "test", then provide a final response' },
232+
{
233+
role: 'user',
234+
content:
235+
'Use the test_tool with input "test", then provide a final response with the word \'231\'',
236+
},
215237
],
216238
tools: [tool],
217239
});
@@ -220,17 +242,16 @@ describe('toolRunner integration tests', () => {
220242

221243
// Final message should be the last text-only response
222244
expect(finalMessage.role).toBe('assistant');
223-
expect(finalMessage.content).toHaveLength(1);
224-
expect(finalMessage.content[0]).toHaveProperty('type', 'text');
225-
expect(finalMessage.stop_reason).toBe('end_turn');
245+
expect(finalMessage.tool_calls).toBeUndefined();
246+
expect(finalMessage.content).toContain('231');
226247
});
227248
});
228249

229250
describe('setMessagesParams()', () => {
230251
it('should update parameters using direct assignment', async () => {
231252
const tool = createTestTool();
232253

233-
const runner = client.beta.messages.toolRunner({
254+
const runner = client.beta.chat.completions.toolRunner({
234255
model: 'gpt-4o',
235256
max_tokens: 1000,
236257
messages: [{ role: 'user', content: 'Hello' }],
@@ -251,98 +272,4 @@ describe('toolRunner integration tests', () => {
251272
expect(params.messages).toEqual([{ role: 'user', content: 'Updated message' }]);
252273
});
253274
});
254-
255-
describe('compaction', () => {
256-
it('should compact messages when token threshold is exceeded', async () => {
257-
const tool = {
258-
name: 'submit_analysis',
259-
description: 'Call this LAST with your final analysis.',
260-
input_schema: {
261-
type: 'object' as const,
262-
properties: {
263-
summary: {
264-
type: 'string' as const,
265-
},
266-
},
267-
required: ['summary'],
268-
},
269-
run: async (input: { summary: string }) => {
270-
return 'Analysis submitted';
271-
},
272-
};
273-
274-
const runner = client.beta.messages.toolRunner({
275-
model: 'gpt-4o',
276-
max_tokens: 4000,
277-
messages: [
278-
{
279-
role: 'user',
280-
content:
281-
'Write a detailed 500-word essay about dogs, cats, and birds. ' +
282-
'Call the tool `submit_analysis` with the information about all three animals ',
283-
},
284-
],
285-
tools: [tool],
286-
compactionControl: {
287-
enabled: true,
288-
contextTokenThreshold: 500, // Low threshold to trigger compaction
289-
},
290-
max_iterations: 1,
291-
});
292-
293-
await runner.runUntilDone();
294-
expect(runner.params.messages[0]).toMatchInlineSnapshot(`
295-
{
296-
"content": [
297-
{
298-
"text": "<summary>
299-
## Task Overview
300-
The user requested:
301-
1. Write a detailed 500-word essay about dogs, cats, and birds
302-
2. Call a tool named \`submit_analysis\` with information about all three animals
303-
304-
Success criteria:
305-
- Essay must be approximately 500 words
306-
- Must cover all three animals (dogs, cats, and birds)
307-
- Must be detailed
308-
- Must call the \`submit_analysis\` tool with the relevant information
309-
310-
## Current State
311-
**Completed:** Nothing has been completed yet.
312-
313-
**Status:** The task has just been assigned. No essay has been written, and no tool has been called.
314-
315-
## Important Discoveries
316-
**Key Issue Identified:** The tool \`submit_analysis\` does not exist in my available tool set. I need to:
317-
1. Either inform the user that this tool is not available, OR
318-
2. Proceed with writing the essay and explain that I cannot call the non-existent tool
319-
320-
**Technical Constraint:** Without knowing the expected parameters/schema for \`submit_analysis\`, even if it were available, I would need clarification on:
321-
- What format the information should take (structured data, summary points, the full essay text?)
322-
- What specific fields or parameters the tool expects
323-
- Whether separate calls are needed for each animal or one combined call
324-
325-
## Next Steps
326-
1. **Write the 500-word essay** covering dogs, cats, and birds with detailed information about each animal
327-
2. **Address the tool issue** by either:
328-
- Informing the user that \`submit_analysis\` is not available in my toolkit
329-
- Asking for clarification about what tool they actually meant or how they want the analysis submitted
330-
- Demonstrating what the tool call would look like if it existed
331-
3. **Deliver the essay** in a clear, organized format regardless of tool availability
332-
333-
## Context to Preserve
334-
- User expects both written content (essay) AND a tool interaction
335-
- The essay should be substantive and detailed, not superficial
336-
- All three animals must receive adequate coverage in the 500-word limit
337-
- No specific style, tone, or audience was specified for the essay (assume general informative style)
338-
- No clarification was provided about whether the essay and tool call should contain the same or different information
339-
</summary>",
340-
"type": "text",
341-
},
342-
],
343-
"role": "user",
344-
}
345-
`);
346-
});
347-
});
348275
});

0 commit comments

Comments
 (0)