@@ -17,6 +17,40 @@ export class LiteLlmHandler implements ApiHandler {
1717 } )
1818 }
1919
20+ async calculateCost ( prompt_tokens : number , completion_tokens : number ) : Promise < number | undefined > {
21+ // Reference: https://github.com/BerriAI/litellm/blob/122ee634f434014267af104814022af1d9a0882f/litellm/proxy/spend_tracking/spend_management_endpoints.py#L1473
22+ const modelId = this . options . liteLlmModelId || liteLlmDefaultModelId
23+ try {
24+ const response = await fetch ( `${ this . client . baseURL } /spend/calculate` , {
25+ method : "POST" ,
26+ headers : {
27+ "Content-Type" : "application/json" ,
28+ Authorization : `Bearer ${ this . options . liteLlmApiKey } ` ,
29+ } ,
30+ body : JSON . stringify ( {
31+ completion_response : {
32+ model : modelId ,
33+ usage : {
34+ prompt_tokens,
35+ completion_tokens,
36+ } ,
37+ } ,
38+ } ) ,
39+ } )
40+
41+ if ( response . ok ) {
42+ const data : { cost : number } = await response . json ( )
43+ return data . cost
44+ } else {
45+ console . error ( "Error calculating spend:" , response . statusText )
46+ return undefined
47+ }
48+ } catch ( error ) {
49+ console . error ( "Error calculating spend:" , error )
50+ return undefined
51+ }
52+ }
53+
2054 async * createMessage ( systemPrompt : string , messages : Anthropic . Messages . MessageParam [ ] ) : ApiStream {
2155 const formattedMessages = convertToOpenAiMessages ( messages )
2256 const systemMessage : OpenAI . Chat . ChatCompletionSystemMessageParam = {
@@ -39,6 +73,9 @@ export class LiteLlmHandler implements ApiHandler {
3973 stream_options : { include_usage : true } ,
4074 } )
4175
76+ const inputCost = ( await this . calculateCost ( 1e6 , 0 ) ) || 0
77+ const outputCost = ( await this . calculateCost ( 0 , 1e6 ) ) || 0
78+
4279 for await ( const chunk of stream ) {
4380 const delta = chunk . choices [ 0 ] ?. delta
4481 if ( delta ?. content ) {
@@ -49,10 +86,13 @@ export class LiteLlmHandler implements ApiHandler {
4986 }
5087
5188 if ( chunk . usage ) {
89+ const totalCost =
90+ ( inputCost * chunk . usage . prompt_tokens ) / 1e6 + ( outputCost * chunk . usage . completion_tokens ) / 1e6
5291 yield {
5392 type : "usage" ,
5493 inputTokens : chunk . usage . prompt_tokens || 0 ,
5594 outputTokens : chunk . usage . completion_tokens || 0 ,
95+ totalCost,
5696 }
5797 }
5898 }
0 commit comments