@@ -217,7 +217,7 @@ export const AppContextProvider = ({
217217 // prepare params
218218 const params = {
219219 messages,
220- stream : config . streamResponse ,
220+ stream : true ,
221221 cache_prompt : true ,
222222 samplers : config . samplers ,
223223 temperature : config . temperature ,
@@ -266,58 +266,44 @@ export const AppContextProvider = ({
266266 let lastMsgId = pendingMsg . id ;
267267 let shouldContinueChain = false ;
268268
269- if ( params . stream ) {
270- const chunks = getSSEStreamAsync ( fetchResponse ) ;
271- for await ( const chunk of chunks ) {
272- // const stop = chunk.stop;
273- if ( chunk . error ) {
274- throw new Error ( chunk . error ?. message || 'Unknown error' ) ;
275- }
276- const addedContent = chunk . choices [ 0 ] . delta . content ;
277- const lastContent = pendingMsg . content || '' ;
278- if ( addedContent ) {
279- pendingMsg = {
280- ...pendingMsg ,
281- content : lastContent + addedContent ,
282- } ;
283- }
284- const timings = chunk . timings ;
285- if ( timings && config . showTokensPerSecond ) {
286- // only extract what's really needed, to save some space
287- pendingMsg . timings = {
288- prompt_n : timings . prompt_n ,
289- prompt_ms : timings . prompt_ms ,
290- predicted_n : timings . predicted_n ,
291- predicted_ms : timings . predicted_ms ,
292- } ;
293- }
294- setPending ( convId , pendingMsg ) ;
295- onChunk ( ) ; // don't need to switch node for pending message
296- }
297- } else {
298- const responseData = await fetchResponse . json ( ) ;
299- if ( responseData . error ) {
300- throw new Error ( responseData . error ?. message || 'Unknown error' ) ;
269+ const chunks = getSSEStreamAsync ( fetchResponse ) ;
270+ for await ( const chunk of chunks ) {
271+ // const stop = chunk.stop;
272+ if ( chunk . error ) {
273+ throw new Error ( chunk . error ?. message || 'Unknown error' ) ;
301274 }
302275
303- const choice = responseData . choices [ 0 ] ;
304- const messageFromAPI = choice . message ;
305- let newContent = '' ;
306-
307- if ( messageFromAPI . content ) {
308- newContent = messageFromAPI . content ;
276+ const choice = chunk . choices [ 0 ] ;
277+ const addedContent = choice . delta . content ;
278+ const lastContent = pendingMsg . content || '' ;
279+ if ( addedContent ) {
280+ pendingMsg = {
281+ ...pendingMsg ,
282+ content : lastContent + addedContent ,
283+ } ;
309284 }
310285
311- // Process tool calls
312- if ( messageFromAPI . tool_calls && messageFromAPI . tool_calls . length > 0 ) {
313- // Store the raw tool calls in the pendingMsg
286+ const addedToolCalls = choice . delta . tool_calls ;
287+ if ( addedToolCalls ) {
288+ let lastToolCalls = pendingMsg . tool_calls ;
289+ if ( lastToolCalls ) {
290+ for ( let i = 0 ; i < lastToolCalls . length ; ++ i ) {
291+ // Merge previous arguments with new ones
292+ lastToolCalls [ i ] . function . arguments +=
293+ addedToolCalls [ i ] . function . arguments ;
294+ }
295+ } else {
296+ // addedTools contains definitions of tool calls
297+ lastToolCalls = addedToolCalls ;
298+ }
314299 pendingMsg = {
315300 ...pendingMsg ,
316- tool_calls : messageFromAPI . tool_calls as ToolCallRequest [ ] ,
301+ tool_calls : lastToolCalls ,
317302 } ;
318-
319- for ( let i = 0 ; i < messageFromAPI . tool_calls . length ; i ++ ) {
320- const toolCall = messageFromAPI . tool_calls [ i ] as ToolCallRequest ;
303+ } else if ( pendingMsg . tool_calls && pendingMsg . tool_calls . length > 0 ) {
304+ // Finished tool calls, execute them
305+ for ( let i = 0 ; i < pendingMsg . tool_calls . length ; i ++ ) {
306+ const toolCall = pendingMsg . tool_calls [ i ] as ToolCallRequest ;
321307 if ( toolCall ) {
322308 // Set up call id
323309 toolCall . call_id ??= `call_${ i } ` ;
@@ -343,32 +329,22 @@ export const AppContextProvider = ({
343329 lastMsgId += 1 ;
344330 }
345331 }
346- }
347332
348- if ( newContent !== '' ) {
349- pendingMsg = {
350- ...pendingMsg ,
351- content : newContent ,
352- } ;
333+ shouldContinueChain = choice . finish_reason === 'tool_calls' ;
353334 }
354335
355- // Handle timings from the non-streaming response
356- const apiTimings = responseData . timings ;
357- if ( apiTimings && config . showTokensPerSecond ) {
336+ const timings = chunk . timings ;
337+ if ( timings && config . showTokensPerSecond ) {
338+ // only extract what's really needed, to save some space
358339 pendingMsg . timings = {
359- prompt_n : apiTimings . prompt_n ,
360- prompt_ms : apiTimings . prompt_ms ,
361- predicted_n : apiTimings . predicted_n ,
362- predicted_ms : apiTimings . predicted_ms ,
340+ prompt_n : timings . prompt_n ,
341+ prompt_ms : timings . prompt_ms ,
342+ predicted_n : timings . predicted_n ,
343+ predicted_ms : timings . predicted_ms ,
363344 } ;
364345 }
365-
366- for ( const pendMsg of pendingMessages ) {
367- setPending ( convId , pendMsg ) ;
368- onChunk ( pendMsg . id ) ; // Update UI to show the processed message
369- }
370-
371- shouldContinueChain = choice . finish_reason === 'tool_calls' ;
346+ setPending ( convId , pendingMsg ) ;
347+ onChunk ( ) ; // don't need to switch node for pending message
372348 }
373349
374350 pendingMessages . unshift ( pendingMsg ) ;
0 commit comments