simstudioai
diff --git a/‎apps/sim/providers/cerebras/index.ts‎
Lines changed: 51 additions & 28 deletions b/‎apps/sim/providers/cerebras/index.ts‎
Lines changed: 51 additions & 28 deletions
diff --git a/‎apps/sim/providers/cerebras/utils.ts‎
Lines changed: 45 additions & 6 deletions b/‎apps/sim/providers/cerebras/utils.ts‎
Lines changed: 45 additions & 6 deletions
diff --git a/‎apps/sim/providers/deepseek/index.ts‎
Lines changed: 56 additions & 28 deletions b/‎apps/sim/providers/deepseek/index.ts‎
Lines changed: 56 additions & 28 deletions
@@ -12,6 +12,7 @@ import type {
   TimeSegment,
 } from '@/providers/types'
 import {
+  calculateCost,
   prepareToolExecution,
   prepareToolsWithUsageControl,
   trackForcedToolUsage,
@@ -124,30 +125,40 @@ export const cerebrasProvider: ProviderConfig = {
         }
       }
 
-      // EARLY STREAMING: if streaming requested and no tools to execute, stream directly
       if (request.stream && (!tools || tools.length === 0)) {
         logger.info('Using streaming response for Cerebras request (no tools)')
+
         const streamResponse: any = await client.chat.completions.create({
           ...payload,
           stream: true,
         })
 
-        // Start collecting token usage
-        const tokenUsage = {
-          prompt: 0,
-          completion: 0,
-          total: 0,
-        }
-
-        // Create a StreamingExecution response with a readable stream
         const streamingResult = {
-          stream: createReadableStreamFromCerebrasStream(streamResponse),
+          stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
+            streamingResult.execution.output.content = content
+            streamingResult.execution.output.tokens = {
+              prompt: usage.prompt_tokens,
+              completion: usage.completion_tokens,
+              total: usage.total_tokens,
+            }
+
+            const costResult = calculateCost(
+              request.model,
+              usage.prompt_tokens,
+              usage.completion_tokens
+            )
+            streamingResult.execution.output.cost = {
+              input: costResult.input,
+              output: costResult.output,
+              total: costResult.total,
+            }
+          }),
           execution: {
             success: true,
             output: {
-              content: '', // Will be filled by streaming content in chat component
+              content: '',
               model: request.model || 'cerebras/llama-3.3-70b',
-              tokens: tokenUsage,
+              tokens: { prompt: 0, completion: 0, total: 0 },
               toolCalls: undefined,
               providerTiming: {
                 startTime: providerStartTimeISO,
@@ -163,14 +174,9 @@ export const cerebrasProvider: ProviderConfig = {
                   },
                 ],
               },
-              // Estimate token cost
-              cost: {
-                total: 0.0,
-                input: 0.0,
-                output: 0.0,
-              },
+              cost: { input: 0, output: 0, total: 0 },
             },
-            logs: [], // No block logs for direct streaming
+            logs: [],
             metadata: {
               startTime: providerStartTimeISO,
               endTime: new Date().toISOString(),
@@ -180,7 +186,6 @@ export const cerebrasProvider: ProviderConfig = {
           },
         }
 
-        // Return the streaming execution object
         return streamingResult as StreamingExecution
       }
 
@@ -473,13 +478,32 @@ export const cerebrasProvider: ProviderConfig = {
 
         const streamResponse: any = await client.chat.completions.create(streamingPayload)
 
-        // Create a StreamingExecution response with all collected data
+        const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
+
         const streamingResult = {
-          stream: createReadableStreamFromCerebrasStream(streamResponse),
+          stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
+            streamingResult.execution.output.content = content
+            streamingResult.execution.output.tokens = {
+              prompt: tokens.prompt + usage.prompt_tokens,
+              completion: tokens.completion + usage.completion_tokens,
+              total: tokens.total + usage.total_tokens,
+            }
+
+            const streamCost = calculateCost(
+              request.model,
+              usage.prompt_tokens,
+              usage.completion_tokens
+            )
+            streamingResult.execution.output.cost = {
+              input: accumulatedCost.input + streamCost.input,
+              output: accumulatedCost.output + streamCost.output,
+              total: accumulatedCost.total + streamCost.total,
+            }
+          }),
           execution: {
             success: true,
             output: {
-              content: '', // Will be filled by the callback
+              content: '',
               model: request.model || 'cerebras/llama-3.3-70b',
               tokens: {
                 prompt: tokens.prompt,
@@ -504,12 +528,12 @@ export const cerebrasProvider: ProviderConfig = {
                 timeSegments: timeSegments,
               },
               cost: {
-                total: (tokens.total || 0) * 0.0001,
-                input: (tokens.prompt || 0) * 0.0001,
-                output: (tokens.completion || 0) * 0.0001,
+                input: accumulatedCost.input,
+                output: accumulatedCost.output,
+                total: accumulatedCost.total,
               },
             },
-            logs: [], // No block logs at provider level
+            logs: [],
             metadata: {
               startTime: providerStartTimeISO,
               endTime: new Date().toISOString(),
@@ -519,7 +543,6 @@ export const cerebrasProvider: ProviderConfig = {
           },
         }
 
-        // Return the streaming execution object
         return streamingResult as StreamingExecution
       }
 
 
@@ -1,19 +1,58 @@
-/**
- * Helper to convert a Cerebras streaming response (async iterable) into a ReadableStream.
- * Enqueues only the model's text delta chunks as UTF-8 encoded bytes.
- */
+import type { CompletionUsage } from 'openai/resources/completions'
+import { createLogger } from '@/lib/logs/console/logger'
+
+const logger = createLogger('CerebrasUtils')
+
+interface CerebrasChunk {
+  choices?: Array<{
+    delta?: {
+      content?: string
+    }
+  }>
+  usage?: {
+    prompt_tokens?: number
+    completion_tokens?: number
+    total_tokens?: number
+  }
+}
+
 export function createReadableStreamFromCerebrasStream(
-  cerebrasStream: AsyncIterable<any>
-): ReadableStream {
+  cerebrasStream: AsyncIterable<CerebrasChunk>,
+  onComplete?: (content: string, usage: CompletionUsage) => void
+): ReadableStream<Uint8Array> {
+  let fullContent = ''
+  let promptTokens = 0
+  let completionTokens = 0
+  let totalTokens = 0
+
   return new ReadableStream({
     async start(controller) {
       try {
         for await (const chunk of cerebrasStream) {
+          if (chunk.usage) {
+            promptTokens = chunk.usage.prompt_tokens ?? 0
+            completionTokens = chunk.usage.completion_tokens ?? 0
+            totalTokens = chunk.usage.total_tokens ?? 0
+          }
+
           const content = chunk.choices?.[0]?.delta?.content || ''
           if (content) {
+            fullContent += content
             controller.enqueue(new TextEncoder().encode(content))
           }
         }
+
+        if (onComplete) {
+          if (promptTokens === 0 && completionTokens === 0) {
+            logger.warn('Cerebras stream completed without usage data')
+          }
+          onComplete(fullContent, {
+            prompt_tokens: promptTokens,
+            completion_tokens: completionTokens,
+            total_tokens: totalTokens || promptTokens + completionTokens,
+          })
+        }
+
         controller.close()
       } catch (error) {
         controller.error(error)
 
@@ -11,6 +11,7 @@ import type {
   TimeSegment,
 } from '@/providers/types'
 import {
+  calculateCost,
   prepareToolExecution,
   prepareToolsWithUsageControl,
   trackForcedToolUsage,
@@ -118,7 +119,6 @@ export const deepseekProvider: ProviderConfig = {
         }
       }
 
-      // EARLY STREAMING: if streaming requested and no tools to execute, stream directly
       if (request.stream && (!tools || tools.length === 0)) {
         logger.info('Using streaming response for DeepSeek request (no tools)')
 
@@ -127,22 +127,35 @@ export const deepseekProvider: ProviderConfig = {
           stream: true,
         })
 
-        // Start collecting token usage
-        const tokenUsage = {
-          prompt: 0,
-          completion: 0,
-          total: 0,
-        }
-
-        // Create a StreamingExecution response with a readable stream
         const streamingResult = {
-          stream: createReadableStreamFromDeepseekStream(streamResponse),
+          stream: createReadableStreamFromDeepseekStream(
+            streamResponse as any,
+            (content, usage) => {
+              streamingResult.execution.output.content = content
+              streamingResult.execution.output.tokens = {
+                prompt: usage.prompt_tokens,
+                completion: usage.completion_tokens,
+                total: usage.total_tokens,
+              }
+
+              const costResult = calculateCost(
+                request.model,
+                usage.prompt_tokens,
+                usage.completion_tokens
+              )
+              streamingResult.execution.output.cost = {
+                input: costResult.input,
+                output: costResult.output,
+                total: costResult.total,
+              }
+            }
+          ),
           execution: {
             success: true,
             output: {
-              content: '', // Will be filled by streaming content in chat component
+              content: '',
               model: request.model || 'deepseek-chat',
-              tokens: tokenUsage,
+              tokens: { prompt: 0, completion: 0, total: 0 },
               toolCalls: undefined,
               providerTiming: {
                 startTime: providerStartTimeISO,
@@ -158,14 +171,9 @@ export const deepseekProvider: ProviderConfig = {
                   },
                 ],
               },
-              // Estimate token cost
-              cost: {
-                total: 0.0,
-                input: 0.0,
-                output: 0.0,
-              },
+              cost: { input: 0, output: 0, total: 0 },
             },
-            logs: [], // No block logs for direct streaming
+            logs: [],
             metadata: {
               startTime: providerStartTimeISO,
               endTime: new Date().toISOString(),
@@ -175,7 +183,6 @@ export const deepseekProvider: ProviderConfig = {
           },
         }
 
-        // Return the streaming execution object
         return streamingResult as StreamingExecution
       }
 
@@ -450,13 +457,35 @@ export const deepseekProvider: ProviderConfig = {
 
         const streamResponse = await deepseek.chat.completions.create(streamingPayload)
 
-        // Create a StreamingExecution response with all collected data
+        const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
+
         const streamingResult = {
-          stream: createReadableStreamFromDeepseekStream(streamResponse),
+          stream: createReadableStreamFromDeepseekStream(
+            streamResponse as any,
+            (content, usage) => {
+              streamingResult.execution.output.content = content
+              streamingResult.execution.output.tokens = {
+                prompt: tokens.prompt + usage.prompt_tokens,
+                completion: tokens.completion + usage.completion_tokens,
+                total: tokens.total + usage.total_tokens,
+              }
+
+              const streamCost = calculateCost(
+                request.model,
+                usage.prompt_tokens,
+                usage.completion_tokens
+              )
+              streamingResult.execution.output.cost = {
+                input: accumulatedCost.input + streamCost.input,
+                output: accumulatedCost.output + streamCost.output,
+                total: accumulatedCost.total + streamCost.total,
+              }
+            }
+          ),
           execution: {
             success: true,
             output: {
-              content: '', // Will be filled by the callback
+              content: '',
               model: request.model || 'deepseek-chat',
               tokens: {
                 prompt: tokens.prompt,
@@ -481,12 +510,12 @@ export const deepseekProvider: ProviderConfig = {
                 timeSegments: timeSegments,
               },
               cost: {
-                total: (tokens.total || 0) * 0.0001,
-                input: (tokens.prompt || 0) * 0.0001,
-                output: (tokens.completion || 0) * 0.0001,
+                input: accumulatedCost.input,
+                output: accumulatedCost.output,
+                total: accumulatedCost.total,
               },
             },
-            logs: [], // No block logs at provider level
+            logs: [],
             metadata: {
               startTime: providerStartTimeISO,
               endTime: new Date().toISOString(),
@@ -496,7 +525,6 @@ export const deepseekProvider: ProviderConfig = {
           },
         }
 
-        // Return the streaming execution object
         return streamingResult as StreamingExecution
       }