simstudioai
diff --git a/‎apps/sim/executor/handlers/agent/agent-handler.ts‎
Lines changed: 36 additions & 42 deletions b/‎apps/sim/executor/handlers/agent/agent-handler.ts‎
Lines changed: 36 additions & 42 deletions
diff --git a/‎apps/sim/executor/handlers/agent/memory.ts‎
Lines changed: 41 additions & 0 deletions b/‎apps/sim/executor/handlers/agent/memory.ts‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎apps/sim/package.json‎
Lines changed: 1 addition & 0 deletions b/‎apps/sim/package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/sim/providers/anthropic/index.ts‎
Lines changed: 90 additions & 31 deletions b/‎apps/sim/providers/anthropic/index.ts‎
Lines changed: 90 additions & 31 deletions
@@ -68,7 +68,20 @@ export class AgentBlockHandler implements BlockHandler {
       filteredInputs
     )
 
-    await this.persistResponseToMemory(ctx, filteredInputs, result)
+    if (this.isStreamingExecution(result)) {
+      if (filteredInputs.memoryType && filteredInputs.memoryType !== 'none') {
+        return this.wrapStreamForMemoryPersistence(
+          ctx,
+          filteredInputs,
+          result as StreamingExecution
+        )
+      }
+      return result
+    }
+
+    if (filteredInputs.memoryType && filteredInputs.memoryType !== 'none') {
+      await this.persistResponseToMemory(ctx, filteredInputs, result as BlockOutput)
+    }
 
     return result
   }
@@ -1035,28 +1048,14 @@ export class AgentBlockHandler implements BlockHandler {
   private async handleStreamingResponse(
     response: Response,
     block: SerializedBlock,
-    ctx?: ExecutionContext,
-    inputs?: AgentInputs
+    _ctx?: ExecutionContext,
+    _inputs?: AgentInputs
   ): Promise<StreamingExecution> {
     const executionDataHeader = response.headers.get('X-Execution-Data')
 
     if (executionDataHeader) {
       try {
         const executionData = JSON.parse(executionDataHeader)
-
-        if (ctx && inputs && executionData.output?.content) {
-          const assistantMessage: Message = {
-            role: 'assistant',
-            content: executionData.output.content,
-          }
-
-          memoryService
-            .persistMemoryMessage(ctx, inputs, assistantMessage)
-            .catch((error) =>
-              logger.error('Failed to persist streaming response to memory:', error)
-            )
-        }
-
         return {
           stream: response.body!,
           execution: {
@@ -1156,45 +1155,40 @@ export class AgentBlockHandler implements BlockHandler {
     }
   }
 
+  private wrapStreamForMemoryPersistence(
+    ctx: ExecutionContext,
+    inputs: AgentInputs,
+    streamingExec: StreamingExecution
+  ): StreamingExecution {
+    return {
+      stream: memoryService.wrapStreamForPersistence(
+        streamingExec.stream,
+        ctx,
+        inputs,
+        streamingExec.execution?.output
+      ),
+      execution: streamingExec.execution,
+    }
+  }
+
   private async persistResponseToMemory(
     ctx: ExecutionContext,
     inputs: AgentInputs,
-    result: BlockOutput | StreamingExecution
+    result: BlockOutput
   ): Promise<void> {
-    // Only persist if memoryType is configured
-    if (!inputs.memoryType || inputs.memoryType === 'none') {
+    const content = (result as any)?.content
+    if (!content || typeof content !== 'string') {
       return
     }
 
     try {
-      // Don't persist streaming responses here - they're handled separately
-      if (this.isStreamingExecution(result)) {
-        return
-      }
-
-      // Extract content from regular response
-      const blockOutput = result as any
-      const content = blockOutput?.content
-
-      if (!content || typeof content !== 'string') {
-        return
-      }
-
-      const assistantMessage: Message = {
-        role: 'assistant',
-        content,
-      }
-
-      await memoryService.persistMemoryMessage(ctx, inputs, assistantMessage)
-
+      await memoryService.persistMemoryMessage(ctx, inputs, { role: 'assistant', content })
       logger.debug('Persisted assistant response to memory', {
         workflowId: ctx.workflowId,
-        memoryType: inputs.memoryType,
         conversationId: inputs.conversationId,
       })
     } catch (error) {
       logger.error('Failed to persist response to memory:', error)
-      // Don't throw - memory persistence failure shouldn't break workflow execution
     }
   }
 
 
@@ -652,6 +652,47 @@ export class Memory {
       }
     }
   }
+
+  /**
+   * Wraps a streaming response to persist the assistant message when complete.
+   * Works model-agnostically by accumulating raw text chunks.
+   */
+  wrapStreamForPersistence(
+    stream: ReadableStream<Uint8Array>,
+    ctx: ExecutionContext,
+    inputs: AgentInputs,
+    executionOutput?: { content?: string }
+  ): ReadableStream<Uint8Array> {
+    let accumulatedContent = ''
+    const decoder = new TextDecoder()
+
+    const transformStream = new TransformStream<Uint8Array, Uint8Array>({
+      transform: (chunk, controller) => {
+        controller.enqueue(chunk)
+        accumulatedContent += decoder.decode(chunk, { stream: true })
+      },
+
+      flush: () => {
+        const finalContent = executionOutput?.content || accumulatedContent
+
+        if (finalContent?.trim()) {
+          this.persistMemoryMessage(ctx, inputs, { role: 'assistant', content: finalContent })
+            .then(() => {
+              logger.debug('Persisted streaming response to memory', {
+                workflowId: ctx.workflowId,
+                conversationId: inputs.conversationId,
+                contentLength: finalContent.length,
+              })
+            })
+            .catch((error) => {
+              logger.error('Failed to persist streaming response to memory:', error)
+            })
+        }
+      },
+    })
+
+    return stream.pipeThrough(transformStream)
+  }
 }
 
 export const memoryService = new Memory()
@@ -37,6 +37,7 @@
     "@browserbasehq/stagehand": "^3.0.5",
     "@cerebras/cerebras_cloud_sdk": "^1.23.0",
     "@e2b/code-interpreter": "^2.0.0",
+    "@google/genai": "1.34.0",
     "@hookform/resolvers": "^4.1.3",
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/exporter-jaeger": "2.1.0",
 
@@ -14,7 +14,11 @@ import type {
   ProviderResponse,
   TimeSegment,
 } from '@/providers/types'
-import { prepareToolExecution, prepareToolsWithUsageControl } from '@/providers/utils'
+import {
+  calculateCost,
+  prepareToolExecution,
+  prepareToolsWithUsageControl,
+} from '@/providers/utils'
 import { executeTool } from '@/tools'
 
 const logger = createLogger('AnthropicProvider')
@@ -255,28 +259,49 @@ ${fieldDescriptions}
       const providerStartTime = Date.now()
       const providerStartTimeISO = new Date(providerStartTime).toISOString()
 
-      // Create a streaming request
       const streamResponse: any = await anthropic.messages.create({
         ...payload,
         stream: true,
       })
 
-      // Start collecting token usage
-      const tokenUsage = {
-        prompt: 0,
-        completion: 0,
-        total: 0,
-      }
-
-      // Create a StreamingExecution response with a readable stream
       const streamingResult = {
-        stream: createReadableStreamFromAnthropicStream(streamResponse),
+        stream: createReadableStreamFromAnthropicStream(streamResponse, (content, usage) => {
+          streamingResult.execution.output.content = content
+          streamingResult.execution.output.tokens = {
+            prompt: usage.input_tokens,
+            completion: usage.output_tokens,
+            total: usage.input_tokens + usage.output_tokens,
+          }
+
+          const costResult = calculateCost(request.model, usage.input_tokens, usage.output_tokens)
+          streamingResult.execution.output.cost = {
+            input: costResult.input,
+            output: costResult.output,
+            total: costResult.total,
+          }
+
+          const streamEndTime = Date.now()
+          const streamEndTimeISO = new Date(streamEndTime).toISOString()
+
+          if (streamingResult.execution.output.providerTiming) {
+            streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+            streamingResult.execution.output.providerTiming.duration =
+              streamEndTime - providerStartTime
+
+            if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
+              streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
+                streamEndTime
+              streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+                streamEndTime - providerStartTime
+            }
+          }
+        }),
         execution: {
           success: true,
           output: {
-            content: '', // Will be filled by streaming content in chat component
+            content: '',
             model: request.model,
-            tokens: tokenUsage,
+            tokens: { prompt: 0, completion: 0, total: 0 },
             toolCalls: undefined,
             providerTiming: {
               startTime: providerStartTimeISO,
@@ -292,14 +317,13 @@ ${fieldDescriptions}
                 },
               ],
             },
-            // Estimate token cost based on typical Claude pricing
             cost: {
               total: 0.0,
               input: 0.0,
               output: 0.0,
             },
           },
-          logs: [], // No block logs for direct streaming
+          logs: [],
           metadata: {
             startTime: providerStartTimeISO,
             endTime: new Date().toISOString(),
@@ -309,7 +333,6 @@ ${fieldDescriptions}
         },
       }
 
-      // Return the streaming execution object
       return streamingResult as StreamingExecution
     }
 
@@ -688,6 +711,17 @@ ${fieldDescriptions}
           (currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0),
       }
 
+      const initialCost = calculateCost(
+        request.model,
+        currentResponse.usage?.input_tokens || 0,
+        currentResponse.usage?.output_tokens || 0
+      )
+      const cost = {
+        input: initialCost.input,
+        output: initialCost.output,
+        total: initialCost.total,
+      }
+
       const toolCalls = []
       const toolResults = []
       const currentMessages = [...messages]
@@ -899,12 +933,21 @@ ${fieldDescriptions}
             content = textContent
           }
 
-          // Update token counts
+          // Update token counts and cost
           if (currentResponse.usage) {
             tokens.prompt += currentResponse.usage.input_tokens || 0
             tokens.completion += currentResponse.usage.output_tokens || 0
             tokens.total +=
               (currentResponse.usage.input_tokens || 0) + (currentResponse.usage.output_tokens || 0)
+
+            const iterationCost = calculateCost(
+              request.model,
+              currentResponse.usage.input_tokens || 0,
+              currentResponse.usage.output_tokens || 0
+            )
+            cost.input += iterationCost.input
+            cost.output += iterationCost.output
+            cost.total += iterationCost.total
           }
 
           iterationCount++
@@ -931,31 +974,47 @@ ${fieldDescriptions}
       const providerEndTimeISO = new Date(providerEndTime).toISOString()
       const totalDuration = providerEndTime - providerStartTime
 
-      // After all tool processing complete, if streaming was requested, use streaming for the final response
       if (request.stream) {
         logger.info('Using streaming for final Anthropic response after tool processing')
 
-        // When streaming after tool calls with forced tools, make sure tool_choice is removed
-        // This prevents the API from trying to force tool usage again in the final streaming response
         const streamingPayload = {
           ...payload,
           messages: currentMessages,
-          // For Anthropic, omit tool_choice entirely rather than setting it to 'none'
           stream: true,
+          tool_choice: undefined,
         }
 
-        // Remove the tool_choice parameter as Anthropic doesn't accept 'none' as a string value
-        streamingPayload.tool_choice = undefined
-
         const streamResponse: any = await anthropic.messages.create(streamingPayload)
 
-        // Create a StreamingExecution response with all collected data
         const streamingResult = {
-          stream: createReadableStreamFromAnthropicStream(streamResponse),
+          stream: createReadableStreamFromAnthropicStream(streamResponse, (content, usage) => {
+            streamingResult.execution.output.content = content
+            streamingResult.execution.output.tokens = {
+              prompt: tokens.prompt + usage.input_tokens,
+              completion: tokens.completion + usage.output_tokens,
+              total: tokens.total + usage.input_tokens + usage.output_tokens,
+            }
+
+            const streamCost = calculateCost(request.model, usage.input_tokens, usage.output_tokens)
+            streamingResult.execution.output.cost = {
+              input: cost.input + streamCost.input,
+              output: cost.output + streamCost.output,
+              total: cost.total + streamCost.total,
+            }
+
+            const streamEndTime = Date.now()
+            const streamEndTimeISO = new Date(streamEndTime).toISOString()
+
+            if (streamingResult.execution.output.providerTiming) {
+              streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+              streamingResult.execution.output.providerTiming.duration =
+                streamEndTime - providerStartTime
+            }
+          }),
           execution: {
             success: true,
             output: {
-              content: '', // Will be filled by the callback
+              content: '',
               model: request.model || 'claude-3-7-sonnet-20250219',
               tokens: {
                 prompt: tokens.prompt,
@@ -980,12 +1039,12 @@ ${fieldDescriptions}
                 timeSegments: timeSegments,
               },
               cost: {
-                total: (tokens.total || 0) * 0.0001, // Estimate cost based on tokens
-                input: (tokens.prompt || 0) * 0.0001,
-                output: (tokens.completion || 0) * 0.0001,
+                input: cost.input,
+                output: cost.output,
+                total: cost.total,
               },
             },
-            logs: [], // No block logs at provider level
+            logs: [],
             metadata: {
               startTime: providerStartTimeISO,
               endTime: new Date().toISOString(),