Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions src/api/providers/__tests__/requesty.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ describe("RequestyHandler", () => {
contextWindow: 4000,
supportsPromptCache: false,
supportsImages: true,
inputPrice: 0,
outputPrice: 0,
inputPrice: 1,
outputPrice: 10,
cacheReadsPrice: 0.1,
cacheWritesPrice: 1.5,
},
openAiStreamingEnabled: true,
includeMaxTokens: true, // Add this to match the implementation
Expand Down Expand Up @@ -83,8 +85,12 @@ describe("RequestyHandler", () => {
yield {
choices: [{ delta: { content: " world" } }],
usage: {
prompt_tokens: 10,
completion_tokens: 5,
prompt_tokens: 30,
completion_tokens: 10,
prompt_tokens_details: {
cached_tokens: 15,
caching_tokens: 5,
},
},
}
},
Expand All @@ -105,10 +111,11 @@ describe("RequestyHandler", () => {
{ type: "text", text: " world" },
{
type: "usage",
inputTokens: 10,
outputTokens: 5,
cacheWriteTokens: undefined,
cacheReadTokens: undefined,
inputTokens: 30,
outputTokens: 10,
cacheWriteTokens: 5,
cacheReadTokens: 15,
totalCost: 0.000119, // (10 * 1 / 1,000,000) + (5 * 1.5 / 1,000,000) + (15 * 0.1 / 1,000,000) + (10 * 10 / 1,000,000)
},
])

Expand Down Expand Up @@ -182,6 +189,9 @@ describe("RequestyHandler", () => {
type: "usage",
inputTokens: 10,
outputTokens: 5,
cacheWriteTokens: 0,
cacheReadTokens: 0,
totalCost: 0.00006, // (10 * 1 / 1,000,000) + (5 * 10 / 1,000,000)
},
])

Expand Down
6 changes: 3 additions & 3 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}
}
if (chunk.usage) {
yield this.processUsageMetrics(chunk.usage)
yield this.processUsageMetrics(chunk.usage, modelInfo)
}
}
} else {
Expand All @@ -134,11 +134,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
type: "text",
text: response.choices[0]?.message.content || "",
}
yield this.processUsageMetrics(response.usage)
yield this.processUsageMetrics(response.usage, modelInfo)
}
}

protected processUsageMetrics(usage: any): ApiStreamUsageChunk {
protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
return {
type: "usage",
inputTokens: usage?.prompt_tokens || 0,
Expand Down
32 changes: 26 additions & 6 deletions src/api/providers/requesty.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
import axios from "axios"

import { ModelInfo, requestyModelInfoSaneDefaults, requestyDefaultModelId } from "../../shared/api"
import { parseApiPrice } from "../../utils/cost"
import { calculateApiCostOpenAI, parseApiPrice } from "../../utils/cost"
import { ApiStreamUsageChunk } from "../transform/stream"
import { OpenAiHandler, OpenAiHandlerOptions } from "./openai"
import OpenAI from "openai"

// Requesty usage includes an extra field for Anthropic use cases.
// Safely cast the prompt token details section to the appropriate structure.
interface RequestyUsage extends OpenAI.CompletionUsage {
prompt_tokens_details?: {
caching_tokens?: number
cached_tokens?: number
}
total_cost?: number
}

export class RequestyHandler extends OpenAiHandler {
constructor(options: OpenAiHandlerOptions) {
Expand All @@ -27,13 +38,22 @@ export class RequestyHandler extends OpenAiHandler {
}
}

protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
protected override processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk {
const requestyUsage = usage as RequestyUsage
const inputTokens = requestyUsage?.prompt_tokens || 0
const outputTokens = requestyUsage?.completion_tokens || 0
const cacheWriteTokens = requestyUsage?.prompt_tokens_details?.caching_tokens || 0
const cacheReadTokens = requestyUsage?.prompt_tokens_details?.cached_tokens || 0
const totalCost = modelInfo
? calculateApiCostOpenAI(modelInfo, inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens)
: 0
return {
type: "usage",
inputTokens: usage?.prompt_tokens || 0,
outputTokens: usage?.completion_tokens || 0,
cacheWriteTokens: usage?.cache_creation_input_tokens,
cacheReadTokens: usage?.cache_read_input_tokens,
inputTokens: inputTokens,
outputTokens: outputTokens,
cacheWriteTokens: cacheWriteTokens,
cacheReadTokens: cacheReadTokens,
totalCost: totalCost,
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/api/providers/vscode-lm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
import * as vscode from "vscode"

import { SingleCompletionHandler } from "../"
import { calculateApiCost } from "../../utils/cost"
import { calculateApiCostAnthropic } from "../../utils/cost"
import { ApiStream } from "../transform/stream"
import { convertToVsCodeLmMessages } from "../transform/vscode-lm-format"
import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
Expand Down Expand Up @@ -462,7 +462,7 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
type: "usage",
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
totalCost: calculateApiCost(this.getModel().info, totalInputTokens, totalOutputTokens),
totalCost: calculateApiCostAnthropic(this.getModel().info, totalInputTokens, totalOutputTokens),
}
} catch (error: unknown) {
this.ensureCleanState()
Expand Down
6 changes: 3 additions & 3 deletions src/core/Cline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ import { ClineAskResponse } from "../shared/WebviewMessage"
import { GlobalFileNames } from "../shared/globalFileNames"
import { defaultModeSlug, getModeBySlug, getFullModeDetails } from "../shared/modes"
import { EXPERIMENT_IDS, experiments as Experiments, ExperimentId } from "../shared/experiments"
import { calculateApiCost } from "../utils/cost"
import { calculateApiCostAnthropic } from "../utils/cost"
import { fileExistsAtPath } from "../utils/fs"
import { arePathsEqual, getReadablePath } from "../utils/path"
import { parseMentions } from "./mentions"
Expand Down Expand Up @@ -875,7 +875,7 @@ export class Cline {
// The way this agentic loop works is that cline will be given a task that he then calls tools to complete. unless there's an attempt_completion call, we keep responding back to him with his tool's responses until he either attempt_completion or does not use anymore tools. If he does not use anymore tools, we ask him to consider if he's completed the task and then call attempt_completion, otherwise proceed with completing the task.
// There is a MAX_REQUESTS_PER_TASK limit to prevent infinite requests, but Cline is prompted to finish the task as efficiently as he can.

//const totalCost = this.calculateApiCost(totalInputTokens, totalOutputTokens)
//const totalCost = this.calculateApiCostAntrhopic(totalInputTokens, totalOutputTokens)
if (didEndLoop) {
// For now a task never 'completes'. This will only happen if the user hits max requests and denies resetting the count.
//this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`)
Expand Down Expand Up @@ -3159,7 +3159,7 @@ export class Cline {
cacheReads: cacheReadTokens,
cost:
totalCost ??
calculateApiCost(
calculateApiCostAnthropic(
this.api.getModel().info,
inputTokens,
outputTokens,
Expand Down
113 changes: 103 additions & 10 deletions src/utils/__tests__/cost.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { calculateApiCost } from "../cost"
import { calculateApiCostAnthropic, calculateApiCostOpenAI } from "../cost"
import { ModelInfo } from "../../shared/api"

describe("Cost Utility", () => {
describe("calculateApiCost", () => {
describe("calculateApiCostAnthropic", () => {
const mockModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
Expand All @@ -14,7 +14,7 @@ describe("Cost Utility", () => {
}

it("should calculate basic input/output costs correctly", () => {
const cost = calculateApiCost(mockModelInfo, 1000, 500)
const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
Expand All @@ -23,7 +23,7 @@ describe("Cost Utility", () => {
})

it("should handle cache writes cost", () => {
const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000)
const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
Expand All @@ -33,7 +33,7 @@ describe("Cost Utility", () => {
})

it("should handle cache reads cost", () => {
const cost = calculateApiCost(mockModelInfo, 1000, 500, undefined, 3000)
const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, undefined, 3000)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
Expand All @@ -43,7 +43,7 @@ describe("Cost Utility", () => {
})

it("should handle all cost components together", () => {
const cost = calculateApiCost(mockModelInfo, 1000, 500, 2000, 3000)
const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500, 2000, 3000)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
Expand All @@ -60,17 +60,17 @@ describe("Cost Utility", () => {
supportsPromptCache: true,
}

const cost = calculateApiCost(modelWithoutPrices, 1000, 500, 2000, 3000)
const cost = calculateApiCostAnthropic(modelWithoutPrices, 1000, 500, 2000, 3000)
expect(cost).toBe(0)
})

it("should handle zero tokens", () => {
const cost = calculateApiCost(mockModelInfo, 0, 0, 0, 0)
const cost = calculateApiCostAnthropic(mockModelInfo, 0, 0, 0, 0)
expect(cost).toBe(0)
})

it("should handle undefined cache values", () => {
const cost = calculateApiCost(mockModelInfo, 1000, 500)
const cost = calculateApiCostAnthropic(mockModelInfo, 1000, 500)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
Expand All @@ -85,7 +85,7 @@ describe("Cost Utility", () => {
cacheReadsPrice: undefined,
}

const cost = calculateApiCost(modelWithoutCachePrices, 1000, 500, 2000, 3000)
const cost = calculateApiCostAnthropic(modelWithoutCachePrices, 1000, 500, 2000, 3000)

// Should only include input and output costs
// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
Expand All @@ -94,4 +94,97 @@ describe("Cost Utility", () => {
expect(cost).toBe(0.0105)
})
})

describe("calculateApiCostOpenAI", () => {
const mockModelInfo: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
supportsPromptCache: true,
inputPrice: 3.0, // $3 per million tokens
outputPrice: 15.0, // $15 per million tokens
cacheWritesPrice: 3.75, // $3.75 per million tokens
cacheReadsPrice: 0.3, // $0.30 per million tokens
}

it("should calculate basic input/output costs correctly", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Total: 0.003 + 0.0075 = 0.0105
expect(cost).toBe(0.0105)
})

it("should handle cache writes cost", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 3000, 500, 2000)

// Input cost: (3.0 / 1_000_000) * (3000 - 2000) = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
// Total: 0.003 + 0.0075 + 0.0075 = 0.018
expect(cost).toBeCloseTo(0.018, 6)
})

it("should handle cache reads cost", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 4000, 500, undefined, 3000)

// Input cost: (3.0 / 1_000_000) * (4000 - 3000) = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
// Total: 0.003 + 0.0075 + 0.0009 = 0.0114
expect(cost).toBe(0.0114)
})

it("should handle all cost components together", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 6000, 500, 2000, 3000)

// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Cache writes: (3.75 / 1_000_000) * 2000 = 0.0075
// Cache reads: (0.3 / 1_000_000) * 3000 = 0.0009
// Total: 0.003 + 0.0075 + 0.0075 + 0.0009 = 0.0189
expect(cost).toBe(0.0189)
})

it("should handle missing prices gracefully", () => {
const modelWithoutPrices: ModelInfo = {
maxTokens: 8192,
contextWindow: 200_000,
supportsPromptCache: true,
}

const cost = calculateApiCostOpenAI(modelWithoutPrices, 1000, 500, 2000, 3000)
expect(cost).toBe(0)
})

it("should handle zero tokens", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 0, 0, 0, 0)
expect(cost).toBe(0)
})

it("should handle undefined cache values", () => {
const cost = calculateApiCostOpenAI(mockModelInfo, 1000, 500)

// Input cost: (3.0 / 1_000_000) * 1000 = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Total: 0.003 + 0.0075 = 0.0105
expect(cost).toBe(0.0105)
})

it("should handle missing cache prices", () => {
const modelWithoutCachePrices: ModelInfo = {
...mockModelInfo,
cacheWritesPrice: undefined,
cacheReadsPrice: undefined,
}

const cost = calculateApiCostOpenAI(modelWithoutCachePrices, 6000, 500, 2000, 3000)

// Should only include input and output costs
// Input cost: (3.0 / 1_000_000) * (6000 - 2000 - 3000) = 0.003
// Output cost: (15.0 / 1_000_000) * 500 = 0.0075
// Total: 0.003 + 0.0075 = 0.0105
expect(cost).toBe(0.0105)
})
})
})
Loading