feat: support streaming tool calls

samolego · samolego · commit 3f76cac6341f · 2025-05-26T19:48:54.000+02:00
diff --git a/tools/server/webui/src/Config.ts b/tools/server/webui/src/Config.ts
@@ -44,7 +44,6 @@ export const CONFIG_DEFAULT = {
   // experimental features
   pyIntepreterEnabled: false,
   // Fields for tool calling
-  streamResponse: true,
   ...Object.fromEntries(
     Array.from(AVAILABLE_TOOLS.values()).map((tool: AgentTool) => [
       `tool_${tool.id}_enabled`,
diff --git a/tools/server/webui/src/components/SettingDialog.tsx b/tools/server/webui/src/components/SettingDialog.tsx
@@ -103,11 +103,6 @@ const SETTING_SECTIONS: SettingSection[] = [
             key,
           }) as SettingFieldInput
       ),
-      {
-        type: SettingInputType.CHECKBOX,
-        label: 'Enable response streaming',
-        key: 'streamResponse',
-      },
       {
         type: SettingInputType.SHORT_INPUT,
         label: 'Paste length to file',
@@ -185,20 +180,6 @@ const SETTING_SECTIONS: SettingSection[] = [
       </>
     ),
     fields: [
-      {
-        type: SettingInputType.CUSTOM,
-        key: 'custom',
-        component: () => (
-          <div className="mt-1 mb-3 p-2 bg-base-200 rounded-md text-sm">
-            <p className="font-semibold">Important Note:</p>
-            <p className="opacity-90">
-              Response streaming must be <strong>disabled</strong> to use tool
-              calling. Individual tools (listed below) will be automatically
-              disabled if streaming is enabled.
-            </p>
-          </div>
-        ),
-      },
       ...Array.from(AVAILABLE_TOOLS.values()).map(
         (tool: AgentTool) =>
           ({
@@ -467,19 +448,13 @@ export default function SettingDialog({
                   />
                 );
               } else if (field.type === SettingInputType.CHECKBOX) {
-                const isToolToggle =
-                  typeof field.key === 'string' &&
-                  field.key.startsWith('tool_') &&
-                  field.key.endsWith('_enabled');
-                const isDisabled = isToolToggle && localConfig.streamResponse;
                 return (
                   <SettingsModalCheckbox
                     key={key}
                     configKey={field.key}
                     value={!!localConfig[field.key]}
                     onChange={onChange(field.key)}
                     label={field.label as string}
-                    disabled={isDisabled}
                   />
                 );
               } else if (field.type === SettingInputType.CUSTOM) {
diff --git a/tools/server/webui/src/components/tool_calling/ToolCallArgsDisplay.tsx b/tools/server/webui/src/components/tool_calling/ToolCallArgsDisplay.tsx
@@ -15,7 +15,7 @@ export const ToolCallArgsDisplay = ({
       <div className="collapse-content">
         <div className="font-bold mb-1">Arguments:</div>
         <pre className="whitespace-pre-wrap bg-base-300 p-2 rounded">
-          {JSON.stringify(JSON.parse(toolCall.function.arguments), null, 2)}
+          {toolCall.function.arguments}
         </pre>
       </div>
     </details>
diff --git a/tools/server/webui/src/utils/app.context.tsx b/tools/server/webui/src/utils/app.context.tsx
@@ -217,7 +217,7 @@ export const AppContextProvider = ({
       // prepare params
       const params = {
         messages,
-        stream: config.streamResponse,
+        stream: true,
         cache_prompt: true,
         samplers: config.samplers,
         temperature: config.temperature,
@@ -266,58 +266,44 @@ export const AppContextProvider = ({
       let lastMsgId = pendingMsg.id;
       let shouldContinueChain = false;
 
-      if (params.stream) {
-        const chunks = getSSEStreamAsync(fetchResponse);
-        for await (const chunk of chunks) {
-          // const stop = chunk.stop;
-          if (chunk.error) {
-            throw new Error(chunk.error?.message || 'Unknown error');
-          }
-          const addedContent = chunk.choices[0].delta.content;
-          const lastContent = pendingMsg.content || '';
-          if (addedContent) {
-            pendingMsg = {
-              ...pendingMsg,
-              content: lastContent + addedContent,
-            };
-          }
-          const timings = chunk.timings;
-          if (timings && config.showTokensPerSecond) {
-            // only extract what's really needed, to save some space
-            pendingMsg.timings = {
-              prompt_n: timings.prompt_n,
-              prompt_ms: timings.prompt_ms,
-              predicted_n: timings.predicted_n,
-              predicted_ms: timings.predicted_ms,
-            };
-          }
-          setPending(convId, pendingMsg);
-          onChunk(); // don't need to switch node for pending message
-        }
-      } else {
-        const responseData = await fetchResponse.json();
-        if (responseData.error) {
-          throw new Error(responseData.error?.message || 'Unknown error');
+      const chunks = getSSEStreamAsync(fetchResponse);
+      for await (const chunk of chunks) {
+        // const stop = chunk.stop;
+        if (chunk.error) {
+          throw new Error(chunk.error?.message || 'Unknown error');
         }
 
-        const choice = responseData.choices[0];
-        const messageFromAPI = choice.message;
-        let newContent = '';
-
-        if (messageFromAPI.content) {
-          newContent = messageFromAPI.content;
+        const choice = chunk.choices[0];
+        const addedContent = choice.delta.content;
+        const lastContent = pendingMsg.content || '';
+        if (addedContent) {
+          pendingMsg = {
+            ...pendingMsg,
+            content: lastContent + addedContent,
+          };
         }
 
-        // Process tool calls
-        if (messageFromAPI.tool_calls && messageFromAPI.tool_calls.length > 0) {
-          // Store the raw tool calls in the pendingMsg
+        const addedToolCalls = choice.delta.tool_calls;
+        if (addedToolCalls) {
+          let lastToolCalls = pendingMsg.tool_calls;
+          if (lastToolCalls) {
+            for (let i = 0; i < lastToolCalls.length; ++i) {
+              // Merge previous arguments with new ones
+              lastToolCalls[i].function.arguments +=
+                addedToolCalls[i].function.arguments;
+            }
+          } else {
+            // addedTools contains definitions of tool calls
+            lastToolCalls = addedToolCalls;
+          }
           pendingMsg = {
             ...pendingMsg,
-            tool_calls: messageFromAPI.tool_calls as ToolCallRequest[],
+            tool_calls: lastToolCalls,
           };
-
-          for (let i = 0; i < messageFromAPI.tool_calls.length; i++) {
-            const toolCall = messageFromAPI.tool_calls[i] as ToolCallRequest;
+        } else if (pendingMsg.tool_calls && pendingMsg.tool_calls.length > 0) {
+          // Finished tool calls, execute them
+          for (let i = 0; i < pendingMsg.tool_calls.length; i++) {
+            const toolCall = pendingMsg.tool_calls[i] as ToolCallRequest;
             if (toolCall) {
               // Set up call id
               toolCall.call_id ??= `call_${i}`;
@@ -343,32 +329,22 @@ export const AppContextProvider = ({
               lastMsgId += 1;
             }
           }
-        }
 
-        if (newContent !== '') {
-          pendingMsg = {
-            ...pendingMsg,
-            content: newContent,
-          };
+          shouldContinueChain = choice.finish_reason === 'tool_calls';
         }
 
-        // Handle timings from the non-streaming response
-        const apiTimings = responseData.timings;
-        if (apiTimings && config.showTokensPerSecond) {
+        const timings = chunk.timings;
+        if (timings && config.showTokensPerSecond) {
+          // only extract what's really needed, to save some space
           pendingMsg.timings = {
-            prompt_n: apiTimings.prompt_n,
-            prompt_ms: apiTimings.prompt_ms,
-            predicted_n: apiTimings.predicted_n,
-            predicted_ms: apiTimings.predicted_ms,
+            prompt_n: timings.prompt_n,
+            prompt_ms: timings.prompt_ms,
+            predicted_n: timings.predicted_n,
+            predicted_ms: timings.predicted_ms,
           };
         }
-
-        for (const pendMsg of pendingMessages) {
-          setPending(convId, pendMsg);
-          onChunk(pendMsg.id); // Update UI to show the processed message
-        }
-
-        shouldContinueChain = choice.finish_reason === 'tool_calls';
+        setPending(convId, pendingMsg);
+        onChunk(); // don't need to switch node for pending message
       }
 
       pendingMessages.unshift(pendingMsg);