Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { getModelParams } from "../transform/model-params"
import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { calculateApiCostAnthropic } from "../../shared/cost"
import { getApiRequestTimeout } from "./utils/timeout-config"

export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
private options: ApiHandlerOptions
Expand All @@ -30,9 +31,21 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
const apiKeyFieldName =
this.options.anthropicBaseUrl && this.options.anthropicUseAuthToken ? "authToken" : "apiKey"

let timeout = getApiRequestTimeout()

// match behaviour with other SDK where 0 means no timeout instead of instantly timing out
if (timeout === 0) {
// Use 2147483647 (2^31 - 1) as the maximum timeout value for setTimeout
// JavaScript's setTimeout has a maximum delay limit of 2147483647ms (32-bit signed integer max)
// Values larger than this may be clamped to 1ms or cause unexpected behavior
// 2147483647 is the safe maximum value that won't cause issues
timeout = 2147483647
}

this.client = new Anthropic({
baseURL: this.options.anthropicBaseUrl || undefined,
[apiKeyFieldName]: this.options.apiKey,
timeout,
})
}

Expand Down
12 changes: 12 additions & 0 deletions src/api/providers/base-openai-compatible-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format"

import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { DEFAULT_HEADERS } from "./constants"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { BaseProvider } from "./base-provider"
import { handleOpenAIError } from "./utils/openai-error-handler"

Expand Down Expand Up @@ -56,10 +57,21 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
throw new Error("API key is required")
}

let timeout = getApiRequestTimeout()
// match behaviour with other SDK where 0 means no timeout instead of instantly timing out
if (timeout === 0) {
// Use 2147483647 (2^31 - 1) as the maximum timeout value for setTimeout
// JavaScript's setTimeout has a maximum delay limit of 2147483647ms (32-bit signed integer max)
// Values larger than this may be clamped to 1ms or cause unexpected behavior
// 2147483647 is the safe maximum value that won't cause issues
timeout = 2147483647
}

this.client = new OpenAI({
baseURL,
apiKey: this.options.apiKey,
defaultHeaders: DEFAULT_HEADERS,
timeout,
})
}

Expand Down
27 changes: 20 additions & 7 deletions src/api/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import { convertToBedrockConverseMessages as sharedConverter } from "../transfor
import { getModelParams } from "../transform/model-params"
import { shouldUseReasoningBudget } from "../../shared/api"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { getApiRequestTimeout } from "./utils/timeout-config"

/************************************************************************************
*
Expand Down Expand Up @@ -401,17 +402,17 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
}

// Create AbortController with 10 minute timeout
// Create AbortController with configured timeout
const controller = new AbortController()
let timeoutId: NodeJS.Timeout | undefined
const timeoutMs = getApiRequestTimeout()

try {
timeoutId = setTimeout(
() => {
if (timeoutMs !== 0) {
timeoutId = setTimeout(() => {
controller.abort()
},
10 * 60 * 1000,
)
}, timeoutMs)
}

const command = new ConverseStreamCommand(payload)
const response = await this.client.send(command, {
Expand Down Expand Up @@ -670,8 +671,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
inferenceConfig,
}

const controller = new AbortController()
let timeoutId: NodeJS.Timeout | undefined
const timeoutMs = getApiRequestTimeout()

if (timeoutMs !== 0) {
timeoutId = setTimeout(() => {
controller.abort()
}, timeoutMs)
}

const command = new ConverseCommand(payload)
const response = await this.client.send(command)
const response = await this.client.send(command, { abortSignal: controller.signal })

if (
response?.output?.message?.content &&
Expand All @@ -680,6 +691,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
response.output.message.content[0].text.trim().length > 0
) {
try {
if (timeoutId) clearTimeout(timeoutId)
return response.output.message.content[0].text
} catch (parseError) {
logger.error("Failed to parse Bedrock response", {
Expand All @@ -688,6 +700,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
})
}
}
if (timeoutId) clearTimeout(timeoutId)
return ""
} catch (error) {
// Use the extracted error handling method for all errors
Expand Down
220 changes: 118 additions & 102 deletions src/api/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from ".
import { BaseProvider } from "./base-provider"
import { DEFAULT_HEADERS } from "./constants"
import { t } from "../../i18n"
import { getApiRequestTimeout } from "./utils/timeout-config"

const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
const CEREBRAS_DEFAULT_TEMPERATURE = 0
Expand Down Expand Up @@ -146,128 +147,143 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
}

try {
const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, {
method: "POST",
headers: {
...DEFAULT_HEADERS,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(requestBody),
})

if (!response.ok) {
const errorText = await response.text()

let errorMessage = "Unknown error"
try {
const errorJson = JSON.parse(errorText)
errorMessage = errorJson.error?.message || errorJson.message || JSON.stringify(errorJson, null, 2)
} catch {
errorMessage = errorText || `HTTP ${response.status}`
const controller = new AbortController()
let timeout = getApiRequestTimeout()
let timer: NodeJS.Timeout | undefined
try {
if (timeout !== 0) {
timer = setTimeout(() => controller.abort(), timeout)
}
const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, {
method: "POST",
headers: {
...DEFAULT_HEADERS,
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify(requestBody),
signal: controller.signal,
})

if (!response.ok) {
const errorText = await response.text()

let errorMessage = "Unknown error"
try {
const errorJson = JSON.parse(errorText)
errorMessage =
errorJson.error?.message || errorJson.message || JSON.stringify(errorJson, null, 2)
} catch {
errorMessage = errorText || `HTTP ${response.status}`
}

// Provide more actionable error messages
if (response.status === 401) {
throw new Error(t("common:errors.cerebras.authenticationFailed"))
} else if (response.status === 403) {
throw new Error(t("common:errors.cerebras.accessForbidden"))
} else if (response.status === 429) {
throw new Error(t("common:errors.cerebras.rateLimitExceeded"))
} else if (response.status >= 500) {
throw new Error(t("common:errors.cerebras.serverError", { status: response.status }))
} else {
throw new Error(
t("common:errors.cerebras.genericError", { status: response.status, message: errorMessage }),
)
// Provide more actionable error messages
if (response.status === 401) {
throw new Error(t("common:errors.cerebras.authenticationFailed"))
} else if (response.status === 403) {
throw new Error(t("common:errors.cerebras.accessForbidden"))
} else if (response.status === 429) {
throw new Error(t("common:errors.cerebras.rateLimitExceeded"))
} else if (response.status >= 500) {
throw new Error(t("common:errors.cerebras.serverError", { status: response.status }))
} else {
throw new Error(
t("common:errors.cerebras.genericError", {
status: response.status,
message: errorMessage,
}),
)
}
}
}

if (!response.body) {
throw new Error(t("common:errors.cerebras.noResponseBody"))
}
if (!response.body) {
throw new Error(t("common:errors.cerebras.noResponseBody"))
}

// Initialize XmlMatcher to parse <think>...</think> tags
const matcher = new XmlMatcher(
"think",
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
text: chunk.data,
}) as const,
)

const reader = response.body.getReader()
const decoder = new TextDecoder()
let buffer = ""
let inputTokens = 0
let outputTokens = 0
// Initialize XmlMatcher to parse <think>...</think> tags
const matcher = new XmlMatcher(
"think",
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
text: chunk.data,
}) as const,
)

const reader = response.body.getReader()
const decoder = new TextDecoder()
let buffer = ""
let inputTokens = 0
let outputTokens = 0

try {
while (true) {
const { done, value } = await reader.read()
if (done) break

buffer += decoder.decode(value, { stream: true })
const lines = buffer.split("\n")
buffer = lines.pop() || "" // Keep the last incomplete line in the buffer

for (const line of lines) {
if (line.trim() === "") continue

try {
if (line.startsWith("data: ")) {
const jsonStr = line.slice(6).trim()
if (jsonStr === "[DONE]") {
continue
}
try {
while (true) {
const { done, value } = await reader.read()
if (done) break

buffer += decoder.decode(value, { stream: true })
const lines = buffer.split("\n")
buffer = lines.pop() || "" // Keep the last incomplete line in the buffer

for (const line of lines) {
if (line.trim() === "") continue

try {
if (line.startsWith("data: ")) {
const jsonStr = line.slice(6).trim()
if (jsonStr === "[DONE]") {
continue
}

const parsed = JSON.parse(jsonStr)
const parsed = JSON.parse(jsonStr)

// Handle text content - parse for thinking tokens
if (parsed.choices?.[0]?.delta?.content) {
const content = parsed.choices[0].delta.content
// Handle text content - parse for thinking tokens
if (parsed.choices?.[0]?.delta?.content) {
const content = parsed.choices[0].delta.content

// Use XmlMatcher to parse <think>...</think> tags
for (const chunk of matcher.update(content)) {
yield chunk
// Use XmlMatcher to parse <think>...</think> tags
for (const chunk of matcher.update(content)) {
yield chunk
}
}
}

// Handle usage information if available
if (parsed.usage) {
inputTokens = parsed.usage.prompt_tokens || 0
outputTokens = parsed.usage.completion_tokens || 0
// Handle usage information if available
if (parsed.usage) {
inputTokens = parsed.usage.prompt_tokens || 0
outputTokens = parsed.usage.completion_tokens || 0
}
}
} catch (error) {
// Silently ignore malformed streaming data lines
}
} catch (error) {
// Silently ignore malformed streaming data lines
}
}
} finally {
reader.releaseLock()
}
} finally {
reader.releaseLock()
}

// Process any remaining content in the matcher
for (const chunk of matcher.final()) {
yield chunk
}
// Process any remaining content in the matcher
for (const chunk of matcher.final()) {
yield chunk
}

// Provide token usage estimate if not available from API
if (inputTokens === 0 || outputTokens === 0) {
const inputText = systemPrompt + cerebrasMessages.map((m) => m.content).join("")
inputTokens = inputTokens || Math.ceil(inputText.length / 4) // Rough estimate: 4 chars per token
outputTokens = outputTokens || Math.ceil((max_tokens || 1000) / 10) // Rough estimate
}
// Provide token usage estimate if not available from API
if (inputTokens === 0 || outputTokens === 0) {
const inputText = systemPrompt + cerebrasMessages.map((m) => m.content).join("")
inputTokens = inputTokens || Math.ceil(inputText.length / 4) // Rough estimate: 4 chars per token
outputTokens = outputTokens || Math.ceil((max_tokens || 1000) / 10) // Rough estimate
}

// Store usage for cost calculation
this.lastUsage = { inputTokens, outputTokens }
// Store usage for cost calculation
this.lastUsage = { inputTokens, outputTokens }

yield {
type: "usage",
inputTokens,
outputTokens,
yield {
type: "usage",
inputTokens,
outputTokens,
}
} finally {
if (timer) clearTimeout(timer)
}
} catch (error) {
if (error instanceof Error) {
Expand Down
Loading
Loading