Skip to content

Commit 61d9f37

Browse files
committed
update other providers
1 parent 23afc46 commit 61d9f37

File tree

9 files changed

+422
-187
lines changed

9 files changed

+422
-187
lines changed

apps/sim/providers/cerebras/index.ts

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import type {
1212
TimeSegment,
1313
} from '@/providers/types'
1414
import {
15+
calculateCost,
1516
prepareToolExecution,
1617
prepareToolsWithUsageControl,
1718
trackForcedToolUsage,
@@ -124,30 +125,40 @@ export const cerebrasProvider: ProviderConfig = {
124125
}
125126
}
126127

127-
// EARLY STREAMING: if streaming requested and no tools to execute, stream directly
128128
if (request.stream && (!tools || tools.length === 0)) {
129129
logger.info('Using streaming response for Cerebras request (no tools)')
130+
130131
const streamResponse: any = await client.chat.completions.create({
131132
...payload,
132133
stream: true,
133134
})
134135

135-
// Start collecting token usage
136-
const tokenUsage = {
137-
prompt: 0,
138-
completion: 0,
139-
total: 0,
140-
}
141-
142-
// Create a StreamingExecution response with a readable stream
143136
const streamingResult = {
144-
stream: createReadableStreamFromCerebrasStream(streamResponse),
137+
stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
138+
streamingResult.execution.output.content = content
139+
streamingResult.execution.output.tokens = {
140+
prompt: usage.prompt_tokens,
141+
completion: usage.completion_tokens,
142+
total: usage.total_tokens,
143+
}
144+
145+
const costResult = calculateCost(
146+
request.model,
147+
usage.prompt_tokens,
148+
usage.completion_tokens
149+
)
150+
streamingResult.execution.output.cost = {
151+
input: costResult.input,
152+
output: costResult.output,
153+
total: costResult.total,
154+
}
155+
}),
145156
execution: {
146157
success: true,
147158
output: {
148-
content: '', // Will be filled by streaming content in chat component
159+
content: '',
149160
model: request.model || 'cerebras/llama-3.3-70b',
150-
tokens: tokenUsage,
161+
tokens: { prompt: 0, completion: 0, total: 0 },
151162
toolCalls: undefined,
152163
providerTiming: {
153164
startTime: providerStartTimeISO,
@@ -163,14 +174,9 @@ export const cerebrasProvider: ProviderConfig = {
163174
},
164175
],
165176
},
166-
// Estimate token cost
167-
cost: {
168-
total: 0.0,
169-
input: 0.0,
170-
output: 0.0,
171-
},
177+
cost: { input: 0, output: 0, total: 0 },
172178
},
173-
logs: [], // No block logs for direct streaming
179+
logs: [],
174180
metadata: {
175181
startTime: providerStartTimeISO,
176182
endTime: new Date().toISOString(),
@@ -180,7 +186,6 @@ export const cerebrasProvider: ProviderConfig = {
180186
},
181187
}
182188

183-
// Return the streaming execution object
184189
return streamingResult as StreamingExecution
185190
}
186191

@@ -473,13 +478,32 @@ export const cerebrasProvider: ProviderConfig = {
473478

474479
const streamResponse: any = await client.chat.completions.create(streamingPayload)
475480

476-
// Create a StreamingExecution response with all collected data
481+
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
482+
477483
const streamingResult = {
478-
stream: createReadableStreamFromCerebrasStream(streamResponse),
484+
stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
485+
streamingResult.execution.output.content = content
486+
streamingResult.execution.output.tokens = {
487+
prompt: tokens.prompt + usage.prompt_tokens,
488+
completion: tokens.completion + usage.completion_tokens,
489+
total: tokens.total + usage.total_tokens,
490+
}
491+
492+
const streamCost = calculateCost(
493+
request.model,
494+
usage.prompt_tokens,
495+
usage.completion_tokens
496+
)
497+
streamingResult.execution.output.cost = {
498+
input: accumulatedCost.input + streamCost.input,
499+
output: accumulatedCost.output + streamCost.output,
500+
total: accumulatedCost.total + streamCost.total,
501+
}
502+
}),
479503
execution: {
480504
success: true,
481505
output: {
482-
content: '', // Will be filled by the callback
506+
content: '',
483507
model: request.model || 'cerebras/llama-3.3-70b',
484508
tokens: {
485509
prompt: tokens.prompt,
@@ -504,12 +528,12 @@ export const cerebrasProvider: ProviderConfig = {
504528
timeSegments: timeSegments,
505529
},
506530
cost: {
507-
total: (tokens.total || 0) * 0.0001,
508-
input: (tokens.prompt || 0) * 0.0001,
509-
output: (tokens.completion || 0) * 0.0001,
531+
input: accumulatedCost.input,
532+
output: accumulatedCost.output,
533+
total: accumulatedCost.total,
510534
},
511535
},
512-
logs: [], // No block logs at provider level
536+
logs: [],
513537
metadata: {
514538
startTime: providerStartTimeISO,
515539
endTime: new Date().toISOString(),
@@ -519,7 +543,6 @@ export const cerebrasProvider: ProviderConfig = {
519543
},
520544
}
521545

522-
// Return the streaming execution object
523546
return streamingResult as StreamingExecution
524547
}
525548

apps/sim/providers/cerebras/utils.ts

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,58 @@
1-
/**
2-
* Helper to convert a Cerebras streaming response (async iterable) into a ReadableStream.
3-
* Enqueues only the model's text delta chunks as UTF-8 encoded bytes.
4-
*/
1+
import type { CompletionUsage } from 'openai/resources/completions'
2+
import { createLogger } from '@/lib/logs/console/logger'
3+
4+
const logger = createLogger('CerebrasUtils')
5+
6+
interface CerebrasChunk {
7+
choices?: Array<{
8+
delta?: {
9+
content?: string
10+
}
11+
}>
12+
usage?: {
13+
prompt_tokens?: number
14+
completion_tokens?: number
15+
total_tokens?: number
16+
}
17+
}
18+
519
export function createReadableStreamFromCerebrasStream(
6-
cerebrasStream: AsyncIterable<any>
7-
): ReadableStream {
20+
cerebrasStream: AsyncIterable<CerebrasChunk>,
21+
onComplete?: (content: string, usage: CompletionUsage) => void
22+
): ReadableStream<Uint8Array> {
23+
let fullContent = ''
24+
let promptTokens = 0
25+
let completionTokens = 0
26+
let totalTokens = 0
27+
828
return new ReadableStream({
929
async start(controller) {
1030
try {
1131
for await (const chunk of cerebrasStream) {
32+
if (chunk.usage) {
33+
promptTokens = chunk.usage.prompt_tokens ?? 0
34+
completionTokens = chunk.usage.completion_tokens ?? 0
35+
totalTokens = chunk.usage.total_tokens ?? 0
36+
}
37+
1238
const content = chunk.choices?.[0]?.delta?.content || ''
1339
if (content) {
40+
fullContent += content
1441
controller.enqueue(new TextEncoder().encode(content))
1542
}
1643
}
44+
45+
if (onComplete) {
46+
if (promptTokens === 0 && completionTokens === 0) {
47+
logger.warn('Cerebras stream completed without usage data')
48+
}
49+
onComplete(fullContent, {
50+
prompt_tokens: promptTokens,
51+
completion_tokens: completionTokens,
52+
total_tokens: totalTokens || promptTokens + completionTokens,
53+
})
54+
}
55+
1756
controller.close()
1857
} catch (error) {
1958
controller.error(error)

apps/sim/providers/deepseek/index.ts

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import type {
1111
TimeSegment,
1212
} from '@/providers/types'
1313
import {
14+
calculateCost,
1415
prepareToolExecution,
1516
prepareToolsWithUsageControl,
1617
trackForcedToolUsage,
@@ -118,7 +119,6 @@ export const deepseekProvider: ProviderConfig = {
118119
}
119120
}
120121

121-
// EARLY STREAMING: if streaming requested and no tools to execute, stream directly
122122
if (request.stream && (!tools || tools.length === 0)) {
123123
logger.info('Using streaming response for DeepSeek request (no tools)')
124124

@@ -127,22 +127,35 @@ export const deepseekProvider: ProviderConfig = {
127127
stream: true,
128128
})
129129

130-
// Start collecting token usage
131-
const tokenUsage = {
132-
prompt: 0,
133-
completion: 0,
134-
total: 0,
135-
}
136-
137-
// Create a StreamingExecution response with a readable stream
138130
const streamingResult = {
139-
stream: createReadableStreamFromDeepseekStream(streamResponse),
131+
stream: createReadableStreamFromDeepseekStream(
132+
streamResponse as any,
133+
(content, usage) => {
134+
streamingResult.execution.output.content = content
135+
streamingResult.execution.output.tokens = {
136+
prompt: usage.prompt_tokens,
137+
completion: usage.completion_tokens,
138+
total: usage.total_tokens,
139+
}
140+
141+
const costResult = calculateCost(
142+
request.model,
143+
usage.prompt_tokens,
144+
usage.completion_tokens
145+
)
146+
streamingResult.execution.output.cost = {
147+
input: costResult.input,
148+
output: costResult.output,
149+
total: costResult.total,
150+
}
151+
}
152+
),
140153
execution: {
141154
success: true,
142155
output: {
143-
content: '', // Will be filled by streaming content in chat component
156+
content: '',
144157
model: request.model || 'deepseek-chat',
145-
tokens: tokenUsage,
158+
tokens: { prompt: 0, completion: 0, total: 0 },
146159
toolCalls: undefined,
147160
providerTiming: {
148161
startTime: providerStartTimeISO,
@@ -158,14 +171,9 @@ export const deepseekProvider: ProviderConfig = {
158171
},
159172
],
160173
},
161-
// Estimate token cost
162-
cost: {
163-
total: 0.0,
164-
input: 0.0,
165-
output: 0.0,
166-
},
174+
cost: { input: 0, output: 0, total: 0 },
167175
},
168-
logs: [], // No block logs for direct streaming
176+
logs: [],
169177
metadata: {
170178
startTime: providerStartTimeISO,
171179
endTime: new Date().toISOString(),
@@ -175,7 +183,6 @@ export const deepseekProvider: ProviderConfig = {
175183
},
176184
}
177185

178-
// Return the streaming execution object
179186
return streamingResult as StreamingExecution
180187
}
181188

@@ -450,13 +457,35 @@ export const deepseekProvider: ProviderConfig = {
450457

451458
const streamResponse = await deepseek.chat.completions.create(streamingPayload)
452459

453-
// Create a StreamingExecution response with all collected data
460+
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
461+
454462
const streamingResult = {
455-
stream: createReadableStreamFromDeepseekStream(streamResponse),
463+
stream: createReadableStreamFromDeepseekStream(
464+
streamResponse as any,
465+
(content, usage) => {
466+
streamingResult.execution.output.content = content
467+
streamingResult.execution.output.tokens = {
468+
prompt: tokens.prompt + usage.prompt_tokens,
469+
completion: tokens.completion + usage.completion_tokens,
470+
total: tokens.total + usage.total_tokens,
471+
}
472+
473+
const streamCost = calculateCost(
474+
request.model,
475+
usage.prompt_tokens,
476+
usage.completion_tokens
477+
)
478+
streamingResult.execution.output.cost = {
479+
input: accumulatedCost.input + streamCost.input,
480+
output: accumulatedCost.output + streamCost.output,
481+
total: accumulatedCost.total + streamCost.total,
482+
}
483+
}
484+
),
456485
execution: {
457486
success: true,
458487
output: {
459-
content: '', // Will be filled by the callback
488+
content: '',
460489
model: request.model || 'deepseek-chat',
461490
tokens: {
462491
prompt: tokens.prompt,
@@ -481,12 +510,12 @@ export const deepseekProvider: ProviderConfig = {
481510
timeSegments: timeSegments,
482511
},
483512
cost: {
484-
total: (tokens.total || 0) * 0.0001,
485-
input: (tokens.prompt || 0) * 0.0001,
486-
output: (tokens.completion || 0) * 0.0001,
513+
input: accumulatedCost.input,
514+
output: accumulatedCost.output,
515+
total: accumulatedCost.total,
487516
},
488517
},
489-
logs: [], // No block logs at provider level
518+
logs: [],
490519
metadata: {
491520
startTime: providerStartTimeISO,
492521
endTime: new Date().toISOString(),
@@ -496,7 +525,6 @@ export const deepseekProvider: ProviderConfig = {
496525
},
497526
}
498527

499-
// Return the streaming execution object
500528
return streamingResult as StreamingExecution
501529
}
502530

0 commit comments

Comments
 (0)