11import { Anthropic } from "@anthropic-ai/sdk"
22import OpenAI from "openai"
3- import { ApiHandlerOptions , ModelInfo , openAiModelInfoSaneDefaults } from "../../shared/api"
3+ import { ApiHandlerOptions , ModelInfo , requestyDefaultModelId , requestyDefaultModelInfo } from "../../shared/api"
44import { ApiHandler } from "../index"
55import { withRetry } from "../retry"
66import { convertToOpenAiMessages } from "../transform/openai-format"
7+ import { calculateApiCostOpenAI } from "../../utils/cost"
78import { ApiStream } from "../transform/stream"
89
910export class RequestyHandler implements ApiHandler {
@@ -24,21 +25,34 @@ export class RequestyHandler implements ApiHandler {
2425
2526 @withRetry ( )
2627 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
27- const modelId = this . options . requestyModelId ?? ""
28+ const model = this . getModel ( )
2829
2930 const openAiMessages : OpenAI . Chat . ChatCompletionMessageParam [ ] = [
3031 { role : "system" , content : systemPrompt } ,
3132 ...convertToOpenAiMessages ( messages ) ,
3233 ]
3334
35+ const reasoningEffort = this . options . o3MiniReasoningEffort || "medium"
36+ const reasoning = { reasoning_effort : reasoningEffort }
37+ const reasoningArgs = model . id === "openai/o3-mini" ? reasoning : { }
38+
39+ const thinkingBudget = this . options . thinkingBudgetTokens || 0
40+ const thinking =
41+ thinkingBudget > 0
42+ ? { thinking : { type : "enabled" , budget_tokens : thinkingBudget } }
43+ : { thinking : { type : "disabled" } }
44+ const thinkingArgs = model . id . includes ( "claude-3-7-sonnet" ) ? thinking : { }
45+
3446 // @ts -ignore-next-line
3547 const stream = await this . client . chat . completions . create ( {
36- model : modelId ,
48+ model : model . id ,
49+ max_tokens : model . info . maxTokens || undefined ,
3750 messages : openAiMessages ,
3851 temperature : 0 ,
3952 stream : true ,
4053 stream_options : { include_usage : true } ,
41- ...( modelId === "openai/o3-mini" ? { reasoning_effort : this . options . o3MiniReasoningEffort || "medium" } : { } ) ,
54+ ...reasoningArgs ,
55+ ...thinkingArgs ,
4256 } )
4357
4458 for await ( const chunk of stream ) {
@@ -73,7 +87,7 @@ export class RequestyHandler implements ApiHandler {
7387 const outputTokens = usage . completion_tokens || 0
7488 const cacheWriteTokens = usage . prompt_tokens_details ?. caching_tokens || undefined
7589 const cacheReadTokens = usage . prompt_tokens_details ?. cached_tokens || undefined
76- const totalCost = 0 // TODO: Replace with calculateApiCostOpenAI(model.info, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
90+ const totalCost = calculateApiCostOpenAI ( model . info , inputTokens , outputTokens , cacheWriteTokens , cacheReadTokens )
7791
7892 yield {
7993 type : "usage" ,
@@ -88,9 +102,11 @@ export class RequestyHandler implements ApiHandler {
88102 }
89103
90104 getModel ( ) : { id : string ; info : ModelInfo } {
91- return {
92- id : this . options . requestyModelId ?? "" ,
93- info : openAiModelInfoSaneDefaults ,
105+ const modelId = this . options . requestyModelId
106+ const modelInfo = this . options . requestyModelInfo
107+ if ( modelId && modelInfo ) {
108+ return { id : modelId , info : modelInfo }
94109 }
110+ return { id : requestyDefaultModelId , info : requestyDefaultModelInfo }
95111 }
96112}
0 commit comments