@@ -12,6 +12,7 @@ import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from ".
1212import { BaseProvider } from "./base-provider"
1313import { DEFAULT_HEADERS } from "./constants"
1414import { t } from "../../i18n"
15+ import { getApiRequestTimeout } from "./utils/timeout-config"
1516
1617const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
1718const CEREBRAS_DEFAULT_TEMPERATURE = 0
@@ -146,128 +147,143 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
146147 }
147148
148149 try {
149- const response = await fetch ( `${ CEREBRAS_BASE_URL } /chat/completions` , {
150- method : "POST" ,
151- headers : {
152- ...DEFAULT_HEADERS ,
153- "Content-Type" : "application/json" ,
154- Authorization : `Bearer ${ this . apiKey } ` ,
155- } ,
156- body : JSON . stringify ( requestBody ) ,
157- } )
158-
159- if ( ! response . ok ) {
160- const errorText = await response . text ( )
161-
162- let errorMessage = "Unknown error"
163- try {
164- const errorJson = JSON . parse ( errorText )
165- errorMessage = errorJson . error ?. message || errorJson . message || JSON . stringify ( errorJson , null , 2 )
166- } catch {
167- errorMessage = errorText || `HTTP ${ response . status } `
150+ const controller = new AbortController ( )
151+ let timeout = getApiRequestTimeout ( )
152+ let timer : NodeJS . Timeout | undefined
153+ try {
154+ if ( timeout !== 0 ) {
155+ timer = setTimeout ( ( ) => controller . abort ( ) , timeout )
168156 }
157+ const response = await fetch ( `${ CEREBRAS_BASE_URL } /chat/completions` , {
158+ method : "POST" ,
159+ headers : {
160+ ...DEFAULT_HEADERS ,
161+ "Content-Type" : "application/json" ,
162+ Authorization : `Bearer ${ this . apiKey } ` ,
163+ } ,
164+ body : JSON . stringify ( requestBody ) ,
165+ signal : controller . signal ,
166+ } )
167+
168+ if ( ! response . ok ) {
169+ const errorText = await response . text ( )
170+
171+ let errorMessage = "Unknown error"
172+ try {
173+ const errorJson = JSON . parse ( errorText )
174+ errorMessage =
175+ errorJson . error ?. message || errorJson . message || JSON . stringify ( errorJson , null , 2 )
176+ } catch {
177+ errorMessage = errorText || `HTTP ${ response . status } `
178+ }
169179
170- // Provide more actionable error messages
171- if ( response . status === 401 ) {
172- throw new Error ( t ( "common:errors.cerebras.authenticationFailed" ) )
173- } else if ( response . status === 403 ) {
174- throw new Error ( t ( "common:errors.cerebras.accessForbidden" ) )
175- } else if ( response . status === 429 ) {
176- throw new Error ( t ( "common:errors.cerebras.rateLimitExceeded" ) )
177- } else if ( response . status >= 500 ) {
178- throw new Error ( t ( "common:errors.cerebras.serverError" , { status : response . status } ) )
179- } else {
180- throw new Error (
181- t ( "common:errors.cerebras.genericError" , { status : response . status , message : errorMessage } ) ,
182- )
180+ // Provide more actionable error messages
181+ if ( response . status === 401 ) {
182+ throw new Error ( t ( "common:errors.cerebras.authenticationFailed" ) )
183+ } else if ( response . status === 403 ) {
184+ throw new Error ( t ( "common:errors.cerebras.accessForbidden" ) )
185+ } else if ( response . status === 429 ) {
186+ throw new Error ( t ( "common:errors.cerebras.rateLimitExceeded" ) )
187+ } else if ( response . status >= 500 ) {
188+ throw new Error ( t ( "common:errors.cerebras.serverError" , { status : response . status } ) )
189+ } else {
190+ throw new Error (
191+ t ( "common:errors.cerebras.genericError" , {
192+ status : response . status ,
193+ message : errorMessage ,
194+ } ) ,
195+ )
196+ }
183197 }
184- }
185198
186- if ( ! response . body ) {
187- throw new Error ( t ( "common:errors.cerebras.noResponseBody" ) )
188- }
199+ if ( ! response . body ) {
200+ throw new Error ( t ( "common:errors.cerebras.noResponseBody" ) )
201+ }
189202
190- // Initialize XmlMatcher to parse <think>...</think> tags
191- const matcher = new XmlMatcher (
192- "think" ,
193- ( chunk ) =>
194- ( {
195- type : chunk . matched ? "reasoning" : "text" ,
196- text : chunk . data ,
197- } ) as const ,
198- )
199-
200- const reader = response . body . getReader ( )
201- const decoder = new TextDecoder ( )
202- let buffer = ""
203- let inputTokens = 0
204- let outputTokens = 0
203+ // Initialize XmlMatcher to parse <think>...</think> tags
204+ const matcher = new XmlMatcher (
205+ "think" ,
206+ ( chunk ) =>
207+ ( {
208+ type : chunk . matched ? "reasoning" : "text" ,
209+ text : chunk . data ,
210+ } ) as const ,
211+ )
212+
213+ const reader = response . body . getReader ( )
214+ const decoder = new TextDecoder ( )
215+ let buffer = ""
216+ let inputTokens = 0
217+ let outputTokens = 0
205218
206- try {
207- while ( true ) {
208- const { done, value } = await reader . read ( )
209- if ( done ) break
210-
211- buffer += decoder . decode ( value , { stream : true } )
212- const lines = buffer . split ( "\n" )
213- buffer = lines . pop ( ) || "" // Keep the last incomplete line in the buffer
214-
215- for ( const line of lines ) {
216- if ( line . trim ( ) === "" ) continue
217-
218- try {
219- if ( line . startsWith ( "data: " ) ) {
220- const jsonStr = line . slice ( 6 ) . trim ( )
221- if ( jsonStr === "[DONE]" ) {
222- continue
223- }
219+ try {
220+ while ( true ) {
221+ const { done, value } = await reader . read ( )
222+ if ( done ) break
223+
224+ buffer += decoder . decode ( value , { stream : true } )
225+ const lines = buffer . split ( "\n" )
226+ buffer = lines . pop ( ) || "" // Keep the last incomplete line in the buffer
227+
228+ for ( const line of lines ) {
229+ if ( line . trim ( ) === "" ) continue
230+
231+ try {
232+ if ( line . startsWith ( "data: " ) ) {
233+ const jsonStr = line . slice ( 6 ) . trim ( )
234+ if ( jsonStr === "[DONE]" ) {
235+ continue
236+ }
224237
225- const parsed = JSON . parse ( jsonStr )
238+ const parsed = JSON . parse ( jsonStr )
226239
227- // Handle text content - parse for thinking tokens
228- if ( parsed . choices ?. [ 0 ] ?. delta ?. content ) {
229- const content = parsed . choices [ 0 ] . delta . content
240+ // Handle text content - parse for thinking tokens
241+ if ( parsed . choices ?. [ 0 ] ?. delta ?. content ) {
242+ const content = parsed . choices [ 0 ] . delta . content
230243
231- // Use XmlMatcher to parse <think>...</think> tags
232- for ( const chunk of matcher . update ( content ) ) {
233- yield chunk
244+ // Use XmlMatcher to parse <think>...</think> tags
245+ for ( const chunk of matcher . update ( content ) ) {
246+ yield chunk
247+ }
234248 }
235- }
236249
237- // Handle usage information if available
238- if ( parsed . usage ) {
239- inputTokens = parsed . usage . prompt_tokens || 0
240- outputTokens = parsed . usage . completion_tokens || 0
250+ // Handle usage information if available
251+ if ( parsed . usage ) {
252+ inputTokens = parsed . usage . prompt_tokens || 0
253+ outputTokens = parsed . usage . completion_tokens || 0
254+ }
241255 }
256+ } catch ( error ) {
257+ // Silently ignore malformed streaming data lines
242258 }
243- } catch ( error ) {
244- // Silently ignore malformed streaming data lines
245259 }
246260 }
261+ } finally {
262+ reader . releaseLock ( )
247263 }
248- } finally {
249- reader . releaseLock ( )
250- }
251264
252- // Process any remaining content in the matcher
253- for ( const chunk of matcher . final ( ) ) {
254- yield chunk
255- }
265+ // Process any remaining content in the matcher
266+ for ( const chunk of matcher . final ( ) ) {
267+ yield chunk
268+ }
256269
257- // Provide token usage estimate if not available from API
258- if ( inputTokens === 0 || outputTokens === 0 ) {
259- const inputText = systemPrompt + cerebrasMessages . map ( ( m ) => m . content ) . join ( "" )
260- inputTokens = inputTokens || Math . ceil ( inputText . length / 4 ) // Rough estimate: 4 chars per token
261- outputTokens = outputTokens || Math . ceil ( ( max_tokens || 1000 ) / 10 ) // Rough estimate
262- }
270+ // Provide token usage estimate if not available from API
271+ if ( inputTokens === 0 || outputTokens === 0 ) {
272+ const inputText = systemPrompt + cerebrasMessages . map ( ( m ) => m . content ) . join ( "" )
273+ inputTokens = inputTokens || Math . ceil ( inputText . length / 4 ) // Rough estimate: 4 chars per token
274+ outputTokens = outputTokens || Math . ceil ( ( max_tokens || 1000 ) / 10 ) // Rough estimate
275+ }
263276
264- // Store usage for cost calculation
265- this . lastUsage = { inputTokens, outputTokens }
277+ // Store usage for cost calculation
278+ this . lastUsage = { inputTokens, outputTokens }
266279
267- yield {
268- type : "usage" ,
269- inputTokens,
270- outputTokens,
280+ yield {
281+ type : "usage" ,
282+ inputTokens,
283+ outputTokens,
284+ }
285+ } finally {
286+ if ( timer ) clearTimeout ( timer )
271287 }
272288 } catch ( error ) {
273289 if ( error instanceof Error ) {
0 commit comments