Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 40 additions & 59 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import { calculateApiCostOpenAI } from "../../shared/cost"

import { convertToOpenAiMessages } from "../transform/openai-format"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { getModelParams } from "../transform/model-params"
import { AnthropicReasoningParams } from "../transform/reasoning"

import { DEFAULT_HEADERS } from "./constants"
import { getModels } from "./fetchers/modelCache"
import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"

// Requesty usage includes an extra field for Anthropic use cases.
// Safely cast the prompt token details section to the appropriate structure.
Expand All @@ -31,10 +33,7 @@ type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
mode?: string
}
}
thinking?: {
type: string
budget_tokens?: number
}
thinking?: AnthropicReasoningParams
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like Requesty is using the Anthropic format for extending thinking.

}

export class RequestyHandler extends BaseProvider implements SingleCompletionHandler {
Expand All @@ -44,25 +43,33 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan

constructor(options: ApiHandlerOptions) {
super()
this.options = options

const apiKey = this.options.requestyApiKey ?? "not-provided"
const baseURL = "https://router.requesty.ai/v1"

const defaultHeaders = DEFAULT_HEADERS
this.options = options

this.client = new OpenAI({ baseURL, apiKey, defaultHeaders })
this.client = new OpenAI({
baseURL: "https://router.requesty.ai/v1",
apiKey: this.options.requestyApiKey ?? "not-provided",
defaultHeaders: DEFAULT_HEADERS,
})
}

public async fetchModel() {
this.models = await getModels({ provider: "requesty" })
return this.getModel()
}

override getModel(): { id: string; info: ModelInfo } {
override getModel() {
const id = this.options.requestyModelId ?? requestyDefaultModelId
const info = this.models[id] ?? requestyDefaultModelInfo
return { id, info }

const params = getModelParams({
format: "anthropic",
modelId: id,
model: info,
settings: this.options,
})

return { id, info, ...params }
}

protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
Expand Down Expand Up @@ -90,70 +97,44 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const model = await this.fetchModel()

let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
const {
id: model,
info,
maxTokens: max_tokens,
temperature,
reasoningEffort: reasoning_effort,
reasoning: thinking,
} = await this.fetchModel()

const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
]

let maxTokens = undefined
if (this.options.modelMaxTokens) {
maxTokens = this.options.modelMaxTokens
} else if (this.options.includeMaxTokens) {
maxTokens = model.info.maxTokens
}

let reasoningEffort = undefined
if (this.options.reasoningEffort) {
reasoningEffort = this.options.reasoningEffort
}

let thinking = undefined
if (this.options.modelMaxThinkingTokens) {
thinking = {
type: "enabled",
budget_tokens: this.options.modelMaxThinkingTokens,
}
}

const temperature = this.options.modelTemperature

const completionParams: RequestyChatCompletionParams = {
model: model.id,
max_tokens: maxTokens,
messages: openAiMessages,
temperature: temperature,
model,
max_tokens,
temperature,
reasoning_effort,
thinking,
stream: true,
stream_options: { include_usage: true },
reasoning_effort: reasoningEffort,
thinking: thinking,
requesty: {
trace_id: metadata?.taskId,
extra: {
mode: metadata?.mode,
},
},
requesty: { trace_id: metadata?.taskId, extra: { mode: metadata?.mode } },
}

const stream = await this.client.chat.completions.create(completionParams)

let lastUsage: any = undefined

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta

if (delta?.content) {
yield {
type: "text",
text: delta.content,
}
yield { type: "text", text: delta.content }
}

if (delta && "reasoning_content" in delta && delta.reasoning_content) {
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
}
yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" }
}

if (chunk.usage) {
Expand All @@ -162,7 +143,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
}

if (lastUsage) {
yield this.processUsageMetrics(lastUsage, model.info)
yield this.processUsageMetrics(lastUsage, info)
}
}

Expand Down
Loading