Skip to content

Commit 2ec4bf6

Browse files
committed
fix: match timeout behaviour with docs
1 parent 2c8c140 commit 2ec4bf6

File tree

14 files changed

+346
-214
lines changed

14 files changed

+346
-214
lines changed

src/api/providers/anthropic.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { getModelParams } from "../transform/model-params"
1818
import { BaseProvider } from "./base-provider"
1919
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
2020
import { calculateApiCostAnthropic } from "../../shared/cost"
21+
import { getApiRequestTimeout } from "./utils/timeout-config"
2122

2223
export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
2324
private options: ApiHandlerOptions
@@ -30,9 +31,17 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
3031
const apiKeyFieldName =
3132
this.options.anthropicBaseUrl && this.options.anthropicUseAuthToken ? "authToken" : "apiKey"
3233

34+
let timeout = getApiRequestTimeout()
35+
36+
// match behaviour with other SDK where 0 means no timeout instead of instantly timing out
37+
if (timeout === 0) {
38+
timeout = Number.MAX_SAFE_INTEGER
39+
}
40+
3341
this.client = new Anthropic({
3442
baseURL: this.options.anthropicBaseUrl || undefined,
3543
[apiKeyFieldName]: this.options.apiKey,
44+
timeout,
3645
})
3746
}
3847

src/api/providers/base-openai-compatible-provider.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format"
99

1010
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
1111
import { DEFAULT_HEADERS } from "./constants"
12+
import { getApiRequestTimeout } from "./utils/timeout-config"
1213
import { BaseProvider } from "./base-provider"
1314
import { handleOpenAIError } from "./utils/openai-error-handler"
1415

@@ -56,10 +57,17 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
5657
throw new Error("API key is required")
5758
}
5859

60+
let timeout = getApiRequestTimeout()
61+
// match behaviour with other SDK where 0 means no timeout instead of instantly timing out
62+
if (timeout === 0) {
63+
timeout = Number.MAX_SAFE_INTEGER
64+
}
65+
5966
this.client = new OpenAI({
6067
baseURL,
6168
apiKey: this.options.apiKey,
6269
defaultHeaders: DEFAULT_HEADERS,
70+
timeout,
6371
})
6472
}
6573

src/api/providers/bedrock.ts

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import { convertToBedrockConverseMessages as sharedConverter } from "../transfor
3333
import { getModelParams } from "../transform/model-params"
3434
import { shouldUseReasoningBudget } from "../../shared/api"
3535
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
36+
import { getApiRequestTimeout } from "./utils/timeout-config"
3637

3738
/************************************************************************************
3839
*
@@ -401,17 +402,17 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
401402
...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }),
402403
}
403404

404-
// Create AbortController with 10 minute timeout
405+
// Create AbortController with configured timeout
405406
const controller = new AbortController()
406407
let timeoutId: NodeJS.Timeout | undefined
408+
const timeoutMs = getApiRequestTimeout()
407409

408410
try {
409-
timeoutId = setTimeout(
410-
() => {
411+
if (timeoutMs !== 0) {
412+
timeoutId = setTimeout(() => {
411413
controller.abort()
412-
},
413-
10 * 60 * 1000,
414-
)
414+
}, timeoutMs)
415+
}
415416

416417
const command = new ConverseStreamCommand(payload)
417418
const response = await this.client.send(command, {
@@ -670,8 +671,18 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
670671
inferenceConfig,
671672
}
672673

674+
const controller = new AbortController()
675+
let timeoutId: NodeJS.Timeout | undefined
676+
const timeoutMs = getApiRequestTimeout()
677+
678+
if (timeoutMs !== 0) {
679+
timeoutId = setTimeout(() => {
680+
controller.abort()
681+
}, timeoutMs)
682+
}
683+
673684
const command = new ConverseCommand(payload)
674-
const response = await this.client.send(command)
685+
const response = await this.client.send(command, { abortSignal: controller.signal })
675686

676687
if (
677688
response?.output?.message?.content &&
@@ -680,6 +691,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
680691
response.output.message.content[0].text.trim().length > 0
681692
) {
682693
try {
694+
if (timeoutId) clearTimeout(timeoutId)
683695
return response.output.message.content[0].text
684696
} catch (parseError) {
685697
logger.error("Failed to parse Bedrock response", {
@@ -688,6 +700,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
688700
})
689701
}
690702
}
703+
if (timeoutId) clearTimeout(timeoutId)
691704
return ""
692705
} catch (error) {
693706
// Use the extracted error handling method for all errors

src/api/providers/cerebras.ts

Lines changed: 118 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import type { ApiHandlerCreateMessageMetadata, SingleCompletionHandler } from ".
1212
import { BaseProvider } from "./base-provider"
1313
import { DEFAULT_HEADERS } from "./constants"
1414
import { t } from "../../i18n"
15+
import { getApiRequestTimeout } from "./utils/timeout-config"
1516

1617
const CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
1718
const CEREBRAS_DEFAULT_TEMPERATURE = 0
@@ -146,128 +147,143 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
146147
}
147148

148149
try {
149-
const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, {
150-
method: "POST",
151-
headers: {
152-
...DEFAULT_HEADERS,
153-
"Content-Type": "application/json",
154-
Authorization: `Bearer ${this.apiKey}`,
155-
},
156-
body: JSON.stringify(requestBody),
157-
})
158-
159-
if (!response.ok) {
160-
const errorText = await response.text()
161-
162-
let errorMessage = "Unknown error"
163-
try {
164-
const errorJson = JSON.parse(errorText)
165-
errorMessage = errorJson.error?.message || errorJson.message || JSON.stringify(errorJson, null, 2)
166-
} catch {
167-
errorMessage = errorText || `HTTP ${response.status}`
150+
const controller = new AbortController()
151+
let timeout = getApiRequestTimeout()
152+
let timer: NodeJS.Timeout | undefined
153+
try {
154+
if (timeout !== 0) {
155+
timer = setTimeout(() => controller.abort(), timeout)
168156
}
157+
const response = await fetch(`${CEREBRAS_BASE_URL}/chat/completions`, {
158+
method: "POST",
159+
headers: {
160+
...DEFAULT_HEADERS,
161+
"Content-Type": "application/json",
162+
Authorization: `Bearer ${this.apiKey}`,
163+
},
164+
body: JSON.stringify(requestBody),
165+
signal: controller.signal,
166+
})
167+
168+
if (!response.ok) {
169+
const errorText = await response.text()
170+
171+
let errorMessage = "Unknown error"
172+
try {
173+
const errorJson = JSON.parse(errorText)
174+
errorMessage =
175+
errorJson.error?.message || errorJson.message || JSON.stringify(errorJson, null, 2)
176+
} catch {
177+
errorMessage = errorText || `HTTP ${response.status}`
178+
}
169179

170-
// Provide more actionable error messages
171-
if (response.status === 401) {
172-
throw new Error(t("common:errors.cerebras.authenticationFailed"))
173-
} else if (response.status === 403) {
174-
throw new Error(t("common:errors.cerebras.accessForbidden"))
175-
} else if (response.status === 429) {
176-
throw new Error(t("common:errors.cerebras.rateLimitExceeded"))
177-
} else if (response.status >= 500) {
178-
throw new Error(t("common:errors.cerebras.serverError", { status: response.status }))
179-
} else {
180-
throw new Error(
181-
t("common:errors.cerebras.genericError", { status: response.status, message: errorMessage }),
182-
)
180+
// Provide more actionable error messages
181+
if (response.status === 401) {
182+
throw new Error(t("common:errors.cerebras.authenticationFailed"))
183+
} else if (response.status === 403) {
184+
throw new Error(t("common:errors.cerebras.accessForbidden"))
185+
} else if (response.status === 429) {
186+
throw new Error(t("common:errors.cerebras.rateLimitExceeded"))
187+
} else if (response.status >= 500) {
188+
throw new Error(t("common:errors.cerebras.serverError", { status: response.status }))
189+
} else {
190+
throw new Error(
191+
t("common:errors.cerebras.genericError", {
192+
status: response.status,
193+
message: errorMessage,
194+
}),
195+
)
196+
}
183197
}
184-
}
185198

186-
if (!response.body) {
187-
throw new Error(t("common:errors.cerebras.noResponseBody"))
188-
}
199+
if (!response.body) {
200+
throw new Error(t("common:errors.cerebras.noResponseBody"))
201+
}
189202

190-
// Initialize XmlMatcher to parse <think>...</think> tags
191-
const matcher = new XmlMatcher(
192-
"think",
193-
(chunk) =>
194-
({
195-
type: chunk.matched ? "reasoning" : "text",
196-
text: chunk.data,
197-
}) as const,
198-
)
199-
200-
const reader = response.body.getReader()
201-
const decoder = new TextDecoder()
202-
let buffer = ""
203-
let inputTokens = 0
204-
let outputTokens = 0
203+
// Initialize XmlMatcher to parse <think>...</think> tags
204+
const matcher = new XmlMatcher(
205+
"think",
206+
(chunk) =>
207+
({
208+
type: chunk.matched ? "reasoning" : "text",
209+
text: chunk.data,
210+
}) as const,
211+
)
212+
213+
const reader = response.body.getReader()
214+
const decoder = new TextDecoder()
215+
let buffer = ""
216+
let inputTokens = 0
217+
let outputTokens = 0
205218

206-
try {
207-
while (true) {
208-
const { done, value } = await reader.read()
209-
if (done) break
210-
211-
buffer += decoder.decode(value, { stream: true })
212-
const lines = buffer.split("\n")
213-
buffer = lines.pop() || "" // Keep the last incomplete line in the buffer
214-
215-
for (const line of lines) {
216-
if (line.trim() === "") continue
217-
218-
try {
219-
if (line.startsWith("data: ")) {
220-
const jsonStr = line.slice(6).trim()
221-
if (jsonStr === "[DONE]") {
222-
continue
223-
}
219+
try {
220+
while (true) {
221+
const { done, value } = await reader.read()
222+
if (done) break
223+
224+
buffer += decoder.decode(value, { stream: true })
225+
const lines = buffer.split("\n")
226+
buffer = lines.pop() || "" // Keep the last incomplete line in the buffer
227+
228+
for (const line of lines) {
229+
if (line.trim() === "") continue
230+
231+
try {
232+
if (line.startsWith("data: ")) {
233+
const jsonStr = line.slice(6).trim()
234+
if (jsonStr === "[DONE]") {
235+
continue
236+
}
224237

225-
const parsed = JSON.parse(jsonStr)
238+
const parsed = JSON.parse(jsonStr)
226239

227-
// Handle text content - parse for thinking tokens
228-
if (parsed.choices?.[0]?.delta?.content) {
229-
const content = parsed.choices[0].delta.content
240+
// Handle text content - parse for thinking tokens
241+
if (parsed.choices?.[0]?.delta?.content) {
242+
const content = parsed.choices[0].delta.content
230243

231-
// Use XmlMatcher to parse <think>...</think> tags
232-
for (const chunk of matcher.update(content)) {
233-
yield chunk
244+
// Use XmlMatcher to parse <think>...</think> tags
245+
for (const chunk of matcher.update(content)) {
246+
yield chunk
247+
}
234248
}
235-
}
236249

237-
// Handle usage information if available
238-
if (parsed.usage) {
239-
inputTokens = parsed.usage.prompt_tokens || 0
240-
outputTokens = parsed.usage.completion_tokens || 0
250+
// Handle usage information if available
251+
if (parsed.usage) {
252+
inputTokens = parsed.usage.prompt_tokens || 0
253+
outputTokens = parsed.usage.completion_tokens || 0
254+
}
241255
}
256+
} catch (error) {
257+
// Silently ignore malformed streaming data lines
242258
}
243-
} catch (error) {
244-
// Silently ignore malformed streaming data lines
245259
}
246260
}
261+
} finally {
262+
reader.releaseLock()
247263
}
248-
} finally {
249-
reader.releaseLock()
250-
}
251264

252-
// Process any remaining content in the matcher
253-
for (const chunk of matcher.final()) {
254-
yield chunk
255-
}
265+
// Process any remaining content in the matcher
266+
for (const chunk of matcher.final()) {
267+
yield chunk
268+
}
256269

257-
// Provide token usage estimate if not available from API
258-
if (inputTokens === 0 || outputTokens === 0) {
259-
const inputText = systemPrompt + cerebrasMessages.map((m) => m.content).join("")
260-
inputTokens = inputTokens || Math.ceil(inputText.length / 4) // Rough estimate: 4 chars per token
261-
outputTokens = outputTokens || Math.ceil((max_tokens || 1000) / 10) // Rough estimate
262-
}
270+
// Provide token usage estimate if not available from API
271+
if (inputTokens === 0 || outputTokens === 0) {
272+
const inputText = systemPrompt + cerebrasMessages.map((m) => m.content).join("")
273+
inputTokens = inputTokens || Math.ceil(inputText.length / 4) // Rough estimate: 4 chars per token
274+
outputTokens = outputTokens || Math.ceil((max_tokens || 1000) / 10) // Rough estimate
275+
}
263276

264-
// Store usage for cost calculation
265-
this.lastUsage = { inputTokens, outputTokens }
277+
// Store usage for cost calculation
278+
this.lastUsage = { inputTokens, outputTokens }
266279

267-
yield {
268-
type: "usage",
269-
inputTokens,
270-
outputTokens,
280+
yield {
281+
type: "usage",
282+
inputTokens,
283+
outputTokens,
284+
}
285+
} finally {
286+
if (timer) clearTimeout(timer)
271287
}
272288
} catch (error) {
273289
if (error instanceof Error) {

src/api/providers/huggingface.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { ApiStream } from "../transform/stream"
66
import { convertToOpenAiMessages } from "../transform/openai-format"
77
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
88
import { DEFAULT_HEADERS } from "./constants"
9+
import { getApiRequestTimeout } from "./utils/timeout-config"
910
import { BaseProvider } from "./base-provider"
1011
import { getHuggingFaceModels, getCachedHuggingFaceModels } from "./fetchers/huggingface"
1112
import { handleOpenAIError } from "./utils/openai-error-handler"
@@ -24,10 +25,16 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
2425
throw new Error("Hugging Face API key is required")
2526
}
2627

28+
let timeout = getApiRequestTimeout()
29+
if (timeout === 0) {
30+
timeout = Number.MAX_SAFE_INTEGER
31+
}
32+
2733
this.client = new OpenAI({
2834
baseURL: "https://router.huggingface.co/v1",
2935
apiKey: this.options.huggingFaceApiKey,
3036
defaultHeaders: DEFAULT_HEADERS,
37+
timeout,
3138
})
3239

3340
// Try to get cached models first

0 commit comments

Comments
 (0)