diff --git a/src/api/providers/__tests__/requesty.test.ts b/src/api/providers/__tests__/requesty.test.ts index 7867b15ebc5..47921a1c532 100644 --- a/src/api/providers/__tests__/requesty.test.ts +++ b/src/api/providers/__tests__/requesty.test.ts @@ -22,8 +22,10 @@ describe("RequestyHandler", () => { contextWindow: 4000, supportsPromptCache: false, supportsImages: true, - inputPrice: 0, - outputPrice: 0, + inputPrice: 1, + outputPrice: 10, + cacheReadsPrice: 0.1, + cacheWritesPrice: 1.5, }, openAiStreamingEnabled: true, includeMaxTokens: true, // Add this to match the implementation @@ -83,8 +85,12 @@ describe("RequestyHandler", () => { yield { choices: [{ delta: { content: " world" } }], usage: { - prompt_tokens: 10, - completion_tokens: 5, + prompt_tokens: 30, + completion_tokens: 10, + prompt_tokens_details: { + cached_tokens: 15, + caching_tokens: 5, + }, }, } }, @@ -105,10 +111,11 @@ describe("RequestyHandler", () => { { type: "text", text: " world" }, { type: "usage", - inputTokens: 10, - outputTokens: 5, - cacheWriteTokens: undefined, - cacheReadTokens: undefined, + inputTokens: 30, + outputTokens: 10, + cacheWriteTokens: 5, + cacheReadTokens: 15, + totalCost: 0.000119, // (10 * 1 / 1,000,000) + (5 * 1.5 / 1,000,000) + (15 * 0.1 / 1,000,000) + (10 * 10 / 1,000,000) }, ]) @@ -182,6 +189,9 @@ describe("RequestyHandler", () => { type: "usage", inputTokens: 10, outputTokens: 5, + cacheWriteTokens: 0, + cacheReadTokens: 0, + totalCost: 0.00006, // (10 * 1 / 1,000,000) + (5 * 10 / 1,000,000) }, ]) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 9262f3b75a5..5d85a86a5a7 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -111,7 +111,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } if (chunk.usage) { - yield this.processUsageMetrics(chunk.usage) + yield this.processUsageMetrics(chunk.usage, modelInfo) } } } else { @@ -134,11 +134,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl type: "text", text: response.choices[0]?.message.content || "", } - yield this.processUsageMetrics(response.usage) + yield this.processUsageMetrics(response.usage, modelInfo) } } - protected processUsageMetrics(usage: any): ApiStreamUsageChunk { + protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk { return { type: "usage", inputTokens: usage?.prompt_tokens || 0, diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 0060bfc5d8a..434d6f43161 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -1,9 +1,20 @@ import axios from "axios" import { ModelInfo, requestyModelInfoSaneDefaults, requestyDefaultModelId } from "../../shared/api" -import { parseApiPrice } from "../../utils/cost" +import { calculateApiCostOpenAI, parseApiPrice } from "../../utils/cost" import { ApiStreamUsageChunk } from "../transform/stream" import { OpenAiHandler, OpenAiHandlerOptions } from "./openai" +import OpenAI from "openai" + +// Requesty usage includes an extra field for Anthropic use cases. +// Safely cast the prompt token details section to the appropriate structure. +interface RequestyUsage extends OpenAI.CompletionUsage { + prompt_tokens_details?: { + caching_tokens?: number + cached_tokens?: number + } + total_cost?: number +} export class RequestyHandler extends OpenAiHandler { constructor(options: OpenAiHandlerOptions) { @@ -27,13 +38,22 @@ export class RequestyHandler extends OpenAiHandler { } } - protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { + protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk { + const requestyUsage = usage as RequestyUsage + const inputTokens = requestyUsage?.prompt_tokens || 0 + const outputTokens = requestyUsage?.completion_tokens || 0 + const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0 + const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0 + const totalCost = modelInfo + ? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens) + : 0 return { type: "usage", - inputTokens: usage?.prompt_tokens || 0, - outputTokens: usage?.completion_tokens || 0, - cacheWriteTokens: usage?.cache_creation_input_tokens, - cacheReadTokens: usage?.cache_read_input_tokens, + inputTokens: inputTokens, + outputTokens: outputTokens, + cacheWriteTokens: cacheWriteTokens, + cacheReadTokens: cacheReadTokens, + totalCost: totalCost, } } } diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index bf1215e2388..0ce2a6e26a6 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import * as vscode from "vscode" import { SingleCompletionHandler } from "../" -import { calculateApiCost } from "../../utils/cost" +import { calculateApiCostAnthropic } from "../../utils/cost" import { ApiStream } from "../transform/stream" import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format" import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils" @@ -462,7 +462,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan type: "usage", inputTokens: totalInputTokens, outputTokens: totalOutputTokens, - totalCost: calculateApiCost(this.getModel().info, totalInputTokens, totalOutputTokens), + totalCost: calculateApiCostAnthropic(this.getModel().info, totalInputTokens, totalOutputTokens), } } catch (error: unknown) { this.ensureCleanState() diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 16f1d4e99d2..7f2b77ec1ec 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -55,7 +55,7 @@ import { ClineAskResponse } from "../shared/WebviewMessage" import { GlobalFileNames } from "../shared/globalFileNames" import { defaultModeSlug, getModeBySlug, getFullModeDetails } from "../shared/modes" import { EXPERIMENT_IDS, experiments as Experiments, ExperimentId } from "../shared/experiments" -import { calculateApiCost } from "../utils/cost" +import { calculateApiCostAnthropic } from "../utils/cost" import { fileExistsAtPath } from "../utils/fs" import { arePathsEqual, getReadablePath } from "../utils/path" import { parseMentions } from "./mentions" @@ -875,7 +875,7 @@ export class Cline { // The way this agentic loop works is that cline will be given a task that he then calls tools to complete. unless there's an attempt_completion call, we keep responding back to him with his tool's responses until he either attempt_completion or does not use anymore tools. If he does not use anymore tools, we ask him to consider if he's completed the task and then call attempt_completion, otherwise proceed with completing the task. // There is a MAX_REQUESTS_PER_TASK limit to prevent infinite requests, but Cline is prompted to finish the task as efficiently as he can. - //const totalCost = this.calculateApiCost(totalInputTokens, totalOutputTokens) + //const totalCost = this.calculateApiCostAnthropic(totalInputTokens, totalOutputTokens) if (didEndLoop) { // For now a task never 'completes'. This will only happen if the user hits max requests and denies resetting the count. //this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`) @@ -3159,7 +3159,7 @@ export class Cline { cacheReads: cacheReadTokens, cost: totalCost ?? - calculateApiCost( + calculateApiCostAnthropic( this.api.getModel().info, inputTokens, outputTokens, diff --git a/src/utils/__tests__/cost.test.ts b/src/utils/__tests__/cost.test.ts index e390c4af7f6..4501f86b880 100644 --- a/src/utils/__tests__/cost.test.ts +++ b/src/utils/__tests__/cost.test.ts @@ -1,8 +1,8 @@ -import { calculateApiCost } from "../cost" +import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../cost" import { ModelInfo } from "../../shared/api" describe("Cost Utility", () => { - describe("calculateApiCost", () => { + describe("calculateApiCostAnthropic", () => { const mockModelInfo: ModelInfo = { maxTokens: 8192, contextWindow: 200_000, @@ -14,7 +14,7 @@ describe("Cost Utility", () => { } it("should calculate basic input/output costs correctly", () => { - const cost = calculateApiCost(mockModelInfo, 1000, 500) + const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 @@ -23,7 +23,7 @@ describe("Cost Utility", () => { }) it("should handle cache writes cost", () => { - const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000) + const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 @@ -33,7 +33,7 @@ describe("Cost Utility", () => { }) it("should handle cache reads cost", () => { - const cost = calculateApiCost(mockModelInfo, 1000, 500, undefined, 3000) + const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 @@ -43,7 +43,7 @@ describe("Cost Utility", () => { }) it("should handle all cost components together", () => { - const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000, 3000) + const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 @@ -60,17 +60,17 @@ describe("Cost Utility", () => { supportsPromptCache: true, } - const cost = calculateApiCost(modelWithoutPrices, 1000, 500, 2000, 3000) + const cost = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000) expect(cost).toBe(0) }) it("should handle zero tokens", () => { - const cost = calculateApiCost(mockModelInfo, 0, 0, 0, 0) + const cost = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0) expect(cost).toBe(0) }) it("should handle undefined cache values", () => { - const cost = calculateApiCost(mockModelInfo, 1000, 500) + const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500) // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 @@ -85,7 +85,7 @@ describe("Cost Utility", () => { cacheReadsPrice: undefined, } - const cost = calculateApiCost(modelWithoutCachePrices, 1000, 500, 2000, 3000) + const cost = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000) // Should only include input and output costs // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 @@ -94,4 +94,97 @@ describe("Cost Utility", () => { expect(cost).toBe(0.0105) }) }) + + describe("calculateApiCostOpenAI", () => { + const mockModelInfo: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsPromptCache: true, + inputPrice: 3.0, // $3 per million tokens + outputPrice: 15.0, // $15 per million tokens + cacheWritesPrice: 3.75, // $3.75 per million tokens + cacheReadsPrice: 0.3, // $0.30 per million tokens + } + + it("should calculate basic input/output costs correctly", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500) + + // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Total: 0.003 + 0.0075 = 0.0105 + expect(cost).toBe(0.0105) + }) + + it("should handle cache writes cost", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000) + + // Input cost: (3.0 / 1_000_000) * (3000 - 2000) = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 + // Total: 0.003 + 0.0075 + 0.0075 = 0.018 + expect(cost).toBeCloseTo(0.018, 6) + }) + + it("should handle cache reads cost", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000) + + // Input cost: (3.0 / 1_000_000) * (4000 - 3000) = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 + // Total: 0.003 + 0.0075 + 0.0009 = 0.0114 + expect(cost).toBe(0.0114) + }) + + it("should handle all cost components together", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000) + + // Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075 + // Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009 + // Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189 + expect(cost).toBe(0.0189) + }) + + it("should handle missing prices gracefully", () => { + const modelWithoutPrices: ModelInfo = { + maxTokens: 8192, + contextWindow: 200_000, + supportsPromptCache: true, + } + + const cost = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000) + expect(cost).toBe(0) + }) + + it("should handle zero tokens", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0) + expect(cost).toBe(0) + }) + + it("should handle undefined cache values", () => { + const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500) + + // Input cost: (3.0 / 1_000_000) * 1000 = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Total: 0.003 + 0.0075 = 0.0105 + expect(cost).toBe(0.0105) + }) + + it("should handle missing cache prices", () => { + const modelWithoutCachePrices: ModelInfo = { + ...mockModelInfo, + cacheWritesPrice: undefined, + cacheReadsPrice: undefined, + } + + const cost = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000) + + // Should only include input and output costs + // Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003 + // Output cost: (15.0 / 1_000_000) * 500 = 0.0075 + // Total: 0.003 + 0.0075 = 0.0105 + expect(cost).toBe(0.0105) + }) + }) }) diff --git a/src/utils/cost.ts b/src/utils/cost.ts index adc2ded0a87..48108b63480 100644 --- a/src/utils/cost.ts +++ b/src/utils/cost.ts @@ -1,26 +1,57 @@ import { ModelInfo } from "../shared/api" -export function calculateApiCost( +function calculateApiCostInternal( modelInfo: ModelInfo, inputTokens: number, outputTokens: number, - cacheCreationInputTokens?: number, - cacheReadInputTokens?: number, + cacheCreationInputTokens: number, + cacheReadInputTokens: number, ): number { - const modelCacheWritesPrice = modelInfo.cacheWritesPrice - let cacheWritesCost = 0 - if (cacheCreationInputTokens && modelCacheWritesPrice) { - cacheWritesCost = (modelCacheWritesPrice / 1_000_000) * cacheCreationInputTokens - } - const modelCacheReadsPrice = modelInfo.cacheReadsPrice - let cacheReadsCost = 0 - if (cacheReadInputTokens && modelCacheReadsPrice) { - cacheReadsCost = (modelCacheReadsPrice / 1_000_000) * cacheReadInputTokens - } + const cacheWritesCost = ((modelInfo.cacheWritesPrice || 0) / 1_000_000) * cacheCreationInputTokens + const cacheReadsCost = ((modelInfo.cacheReadsPrice || 0) / 1_000_000) * cacheReadInputTokens const baseInputCost = ((modelInfo.inputPrice || 0) / 1_000_000) * inputTokens const outputCost = ((modelInfo.outputPrice || 0) / 1_000_000) * outputTokens const totalCost = cacheWritesCost + cacheReadsCost + baseInputCost + outputCost return totalCost } +// For Anthropic compliant usage, the input tokens count does NOT include the cached tokens +export function calculateApiCostAnthropic( + modelInfo: ModelInfo, + inputTokens: number, + outputTokens: number, + cacheCreationInputTokens?: number, + cacheReadInputTokens?: number, +): number { + const cacheCreationInputTokensNum = cacheCreationInputTokens || 0 + const cacheReadInputTokensNum = cacheReadInputTokens || 0 + return calculateApiCostInternal( + modelInfo, + inputTokens, + outputTokens, + cacheCreationInputTokensNum, + cacheReadInputTokensNum, + ) +} + +// For OpenAI compliant usage, the input tokens count INCLUDES the cached tokens +export function calculateApiCostOpenAI( + modelInfo: ModelInfo, + inputTokens: number, + outputTokens: number, + cacheCreationInputTokens?: number, + cacheReadInputTokens?: number, +): number { + const cacheCreationInputTokensNum = cacheCreationInputTokens || 0 + const cacheReadInputTokensNum = cacheReadInputTokens || 0 + const nonCachedInputTokens = Math.max(0, inputTokens - cacheCreationInputTokensNum - cacheReadInputTokensNum) + return calculateApiCostInternal( + modelInfo, + nonCachedInputTokens, + outputTokens, + cacheCreationInputTokensNum, + cacheReadInputTokensNum, + ) +} + export const parseApiPrice = (price: any) => (price ? parseFloat(price) * 1_000_000 : undefined)