@@ -3,7 +3,7 @@ import { betaZodTool } from '../../../src/helpers/beta/zod';
33import * as z from 'zod' ;
44import nock from 'nock' ;
55import { gunzipSync } from 'zlib' ;
6- import { RequestInfo } from '@ openai/sdk /internal/builtin-types' ;
6+ import { RequestInfo } from 'openai/internal/builtin-types' ;
77
88describe ( 'toolRunner integration tests' , ( ) => {
99 let client : OpenAI ;
@@ -78,7 +78,7 @@ describe('toolRunner integration tests', () => {
7878 'Global fetch is not available. Ensure you are using Node.js 18+ or have undici available.' ,
7979 ) ;
8080 }
81- return globalFetch ( input , init ) ;
81+ return await globalFetch ( input , init ) ;
8282 } ;
8383
8484 client = new OpenAI ( {
@@ -123,12 +123,16 @@ describe('toolRunner integration tests', () => {
123123 it ( 'should answer tools and run until completion' , async ( ) => {
124124 const tool = createTestTool ( ) ;
125125
126- const runner = client . beta . messages . toolRunner ( {
126+ const runner = client . beta . chat . completions . toolRunner ( {
127127 model : 'gpt-4o' ,
128128 max_tokens : 1000 ,
129129 max_iterations : 5 , // High limit, should stop before reaching it
130130 messages : [
131- { role : 'user' , content : 'Use the test_tool with value "test", then provide a final response' } ,
131+ {
132+ role : 'user' ,
133+ content :
134+ 'Use the test_tool with value "test", then provide a final response that includes the word \'foo\'.' ,
135+ } ,
132136 ] ,
133137 tools : [ tool ] ,
134138 } ) ;
@@ -142,34 +146,34 @@ describe('toolRunner integration tests', () => {
142146 expect ( messages ) . toHaveLength ( 2 ) ;
143147
144148 // First message should contain one tool use
145- const firstMessage = messages [ 0 ] ! ;
146- expect ( firstMessage . role ) . toBe ( 'assistant' ) ;
147- expect ( firstMessage . content ) . toHaveLength ( 2 ) ; // text + tool_use
148-
149- const toolUseBlocks = firstMessage . content . filter ( ( block ) => block . type === 'tool_use' ) ;
150- expect ( toolUseBlocks ) . toHaveLength ( 1 ) ;
151- expect ( toolUseBlocks [ 0 ] ! . name ) . toBe ( 'test_tool' ) ;
152- expect ( toolUseBlocks [ 0 ] ! . input ) . toEqual ( { value : 'test' } ) ;
153- expect ( firstMessage . stop_reason ) . toBe ( 'tool_use' ) ;
154-
155- // Second message should be final response
156- const secondMessage = messages [ 1 ] ! ;
157- expect ( secondMessage . role ) . toBe ( 'assistant' ) ;
158- expect ( secondMessage . content ) . toHaveLength ( 1 ) ;
159- expect ( secondMessage . content [ 0 ] ! . type ) . toBe ( 'text' ) ;
160- expect ( secondMessage . stop_reason ) . toBe ( 'end_turn' ) ;
149+ const firstMessage = messages [ 0 ] ! . choices [ 0 ] ! ;
150+ expect ( firstMessage . message . role ) . toBe ( 'assistant' ) ;
151+ expect ( firstMessage . message . content ) . toBeNull ( ) ; // openai only responds with tool use and null content
152+ expect ( firstMessage . message . tool_calls ) . toHaveLength ( 1 ) ; // the tool call should be present
153+ expect ( firstMessage . finish_reason ) . toBe ( 'tool_calls' ) ;
154+
155+ // Second message should be final response with text
156+ expect ( messages [ 1 ] ! . choices ) . toHaveLength ( 1 ) ;
157+ const secondMessage = messages [ 1 ] ! . choices [ 0 ] ! ;
158+ expect ( secondMessage . message . role ) . toBe ( 'assistant' ) ;
159+ expect ( secondMessage . message . content ) . toContain ( 'foo' ) ;
160+ expect ( secondMessage . finish_reason ) . toBe ( 'stop' ) ;
161161 } ) ;
162162
163163 describe ( 'max_iterations' , ( ) => {
164164 it ( 'should respect max_iterations limit' , async ( ) => {
165165 const tool = createCounterTool ( ) ;
166166
167- const runner = client . beta . messages . toolRunner ( {
167+ const runner = client . beta . chat . completions . toolRunner ( {
168168 model : 'gpt-4o' ,
169169 max_tokens : 1000 ,
170170 max_iterations : 2 ,
171171 messages : [
172- { role : 'user' , content : 'Use the test_tool with count 1, then use it again with count 2' } ,
172+ {
173+ role : 'user' ,
174+ content :
175+ "Use the test_tool with count 1, then use it again with count 2, then say '231' in the final message" ,
176+ } ,
173177 ] ,
174178 tools : [ tool ] ,
175179 } ) ;
@@ -183,35 +187,53 @@ describe('toolRunner integration tests', () => {
183187 expect ( messages ) . toHaveLength ( 2 ) ;
184188
185189 // First message should contain tool uses
186- const firstMessage = messages [ 0 ] ! ;
187- expect ( firstMessage . role ) . toBe ( 'assistant' ) ;
188- expect ( firstMessage . content ) . toHaveLength ( 3 ) ; // text + 2 tool_use blocks
190+ const firstMessage = messages [ 0 ] ! . choices [ 0 ] ! ;
191+ expect ( firstMessage . message . role ) . toBe ( 'assistant' ) ;
192+ expect ( firstMessage . message . content ) . toBeNull ( ) ;
193+ expect ( firstMessage . message . tool_calls ) . toHaveLength ( 2 ) ;
189194
190- const toolUseBlocks = firstMessage . content . filter ( ( block ) => block . type === 'tool_use' ) ;
195+ const { tool_calls : toolUseBlocks } = firstMessage . message ;
196+ expect ( toolUseBlocks ) . toBeDefined ( ) ;
191197 expect ( toolUseBlocks ) . toHaveLength ( 2 ) ;
192- expect ( toolUseBlocks [ 0 ] ! . name ) . toBe ( 'test_tool' ) ;
193- expect ( toolUseBlocks [ 0 ] ! . input ) . toEqual ( { count : 1 } ) ;
194- expect ( toolUseBlocks [ 1 ] ! . name ) . toBe ( 'test_tool' ) ;
195- expect ( toolUseBlocks [ 1 ] ! . input ) . toEqual ( { count : 2 } ) ;
196-
197- // Second message should be final response
198- const secondMessage = messages [ 1 ] ! ;
199- expect ( secondMessage . role ) . toBe ( 'assistant' ) ;
200- expect ( secondMessage . content ) . toHaveLength ( 1 ) ;
201- expect ( secondMessage . content [ 0 ] ! . type ) . toBe ( 'text' ) ;
202- expect ( secondMessage . stop_reason ) . toBe ( 'end_turn' ) ;
198+
199+ if ( toolUseBlocks && toolUseBlocks [ 0 ] && toolUseBlocks [ 0 ] . type === 'function' ) {
200+ expect ( toolUseBlocks [ 0 ] . function ) . toBeDefined ( ) ;
201+ expect ( toolUseBlocks [ 0 ] . function . name ) . toBe ( 'test_tool' ) ;
202+ expect ( JSON . parse ( toolUseBlocks [ 0 ] . function . arguments ) ) . toEqual ( { count : 1 } ) ;
203+ } else {
204+ // Doing it with an if else to get nice type inference
205+ throw new Error ( 'Expected tool call at index 0 to be a function' ) ;
206+ }
207+
208+ if ( toolUseBlocks && toolUseBlocks [ 1 ] && toolUseBlocks [ 1 ] . type === 'function' ) {
209+ expect ( toolUseBlocks [ 1 ] . function ) . toBeDefined ( ) ;
210+ expect ( toolUseBlocks [ 1 ] . function . name ) . toBe ( 'test_tool' ) ;
211+ expect ( JSON . parse ( toolUseBlocks [ 1 ] . function . arguments ) ) . toEqual ( { count : 2 } ) ;
212+ } else {
213+ throw new Error ( 'Expected tool call at index 1 to be a function' ) ;
214+ }
215+
216+ // Second message should be final response (not a tool call)
217+ const secondMessage = messages [ 1 ] ! . choices [ 0 ] ! ;
218+ expect ( secondMessage . message . role ) . toBe ( 'assistant' ) ;
219+ expect ( secondMessage . message . content ) . toContain ( '231' ) ;
220+ expect ( secondMessage . finish_reason ) . toBe ( 'stop' ) ;
203221 } ) ;
204222 } ) ;
205223
206224 describe ( 'done()' , ( ) => {
207225 it ( 'should consume the iterator and return final message' , async ( ) => {
208226 const tool = createTestTool ( { inputSchema : z . object ( { input : z . string ( ) } ) } ) ;
209227
210- const runner = client . beta . messages . toolRunner ( {
228+ const runner = client . beta . chat . completions . toolRunner ( {
211229 model : 'gpt-4o' ,
212230 max_tokens : 1000 ,
213231 messages : [
214- { role : 'user' , content : 'Use the test_tool with input "test", then provide a final response' } ,
232+ {
233+ role : 'user' ,
234+ content :
235+ 'Use the test_tool with input "test", then provide a final response with the word \'231\'' ,
236+ } ,
215237 ] ,
216238 tools : [ tool ] ,
217239 } ) ;
@@ -220,17 +242,16 @@ describe('toolRunner integration tests', () => {
220242
221243 // Final message should be the last text-only response
222244 expect ( finalMessage . role ) . toBe ( 'assistant' ) ;
223- expect ( finalMessage . content ) . toHaveLength ( 1 ) ;
224- expect ( finalMessage . content [ 0 ] ) . toHaveProperty ( 'type' , 'text' ) ;
225- expect ( finalMessage . stop_reason ) . toBe ( 'end_turn' ) ;
245+ expect ( finalMessage . tool_calls ) . toBeUndefined ( ) ;
246+ expect ( finalMessage . content ) . toContain ( '231' ) ;
226247 } ) ;
227248 } ) ;
228249
229250 describe ( 'setMessagesParams()' , ( ) => {
230251 it ( 'should update parameters using direct assignment' , async ( ) => {
231252 const tool = createTestTool ( ) ;
232253
233- const runner = client . beta . messages . toolRunner ( {
254+ const runner = client . beta . chat . completions . toolRunner ( {
234255 model : 'gpt-4o' ,
235256 max_tokens : 1000 ,
236257 messages : [ { role : 'user' , content : 'Hello' } ] ,
@@ -251,98 +272,4 @@ describe('toolRunner integration tests', () => {
251272 expect ( params . messages ) . toEqual ( [ { role : 'user' , content : 'Updated message' } ] ) ;
252273 } ) ;
253274 } ) ;
254-
255- describe ( 'compaction' , ( ) => {
256- it ( 'should compact messages when token threshold is exceeded' , async ( ) => {
257- const tool = {
258- name : 'submit_analysis' ,
259- description : 'Call this LAST with your final analysis.' ,
260- input_schema : {
261- type : 'object' as const ,
262- properties : {
263- summary : {
264- type : 'string' as const ,
265- } ,
266- } ,
267- required : [ 'summary' ] ,
268- } ,
269- run : async ( input : { summary : string } ) => {
270- return 'Analysis submitted' ;
271- } ,
272- } ;
273-
274- const runner = client . beta . messages . toolRunner ( {
275- model : 'gpt-4o' ,
276- max_tokens : 4000 ,
277- messages : [
278- {
279- role : 'user' ,
280- content :
281- 'Write a detailed 500-word essay about dogs, cats, and birds. ' +
282- 'Call the tool `submit_analysis` with the information about all three animals ' ,
283- } ,
284- ] ,
285- tools : [ tool ] ,
286- compactionControl : {
287- enabled : true ,
288- contextTokenThreshold : 500 , // Low threshold to trigger compaction
289- } ,
290- max_iterations : 1 ,
291- } ) ;
292-
293- await runner . runUntilDone ( ) ;
294- expect ( runner . params . messages [ 0 ] ) . toMatchInlineSnapshot ( `
295- {
296- "content": [
297- {
298- "text": "<summary>
299- ## Task Overview
300- The user requested:
301- 1. Write a detailed 500-word essay about dogs, cats, and birds
302- 2. Call a tool named \`submit_analysis\` with information about all three animals
303-
304- Success criteria:
305- - Essay must be approximately 500 words
306- - Must cover all three animals (dogs, cats, and birds)
307- - Must be detailed
308- - Must call the \`submit_analysis\` tool with the relevant information
309-
310- ## Current State
311- **Completed:** Nothing has been completed yet.
312-
313- **Status:** The task has just been assigned. No essay has been written, and no tool has been called.
314-
315- ## Important Discoveries
316- **Key Issue Identified:** The tool \`submit_analysis\` does not exist in my available tool set. I need to:
317- 1. Either inform the user that this tool is not available, OR
318- 2. Proceed with writing the essay and explain that I cannot call the non-existent tool
319-
320- **Technical Constraint:** Without knowing the expected parameters/schema for \`submit_analysis\`, even if it were available, I would need clarification on:
321- - What format the information should take (structured data, summary points, the full essay text?)
322- - What specific fields or parameters the tool expects
323- - Whether separate calls are needed for each animal or one combined call
324-
325- ## Next Steps
326- 1. **Write the 500-word essay** covering dogs, cats, and birds with detailed information about each animal
327- 2. **Address the tool issue** by either:
328- - Informing the user that \`submit_analysis\` is not available in my toolkit
329- - Asking for clarification about what tool they actually meant or how they want the analysis submitted
330- - Demonstrating what the tool call would look like if it existed
331- 3. **Deliver the essay** in a clear, organized format regardless of tool availability
332-
333- ## Context to Preserve
334- - User expects both written content (essay) AND a tool interaction
335- - The essay should be substantive and detailed, not superficial
336- - All three animals must receive adequate coverage in the 500-word limit
337- - No specific style, tone, or audience was specified for the essay (assume general informative style)
338- - No clarification was provided about whether the essay and tool call should contain the same or different information
339- </summary>",
340- "type": "text",
341- },
342- ],
343- "role": "user",
344- }
345- ` ) ;
346- } ) ;
347- } ) ;
348275} ) ;
0 commit comments