Skip to content

Commit 51b5743

Browse files
author
Sannidhya Sah
committed
Implement enhanced token counting with provider-specific optimizations
1 parent 302dc2d commit 51b5743

File tree

13 files changed

+942
-52
lines changed

13 files changed

+942
-52
lines changed

src/api/providers/base-provider.ts

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,71 @@
1-
import { Anthropic } from "@anthropic-ai/sdk"
2-
3-
import { ModelInfo } from "../../shared/api"
4-
51
import { ApiHandler } from "../index"
62
import { ApiStream } from "../transform/stream"
73
import { countTokens } from "../../utils/countTokens"
4+
import { formatTokenInfo, createTokenTooltip } from "../../utils/tokenDisplay"
5+
import type { ModelInfo, TokenUsageInfo } from "../../shared/api"
6+
7+
// Use any to bypass strict type checking for compatibility
8+
type ContentBlockParam = any
9+
type MessageParam = any
810

911
/**
1012
* Base class for API providers that implements common functionality.
1113
*/
1214
export abstract class BaseProvider implements ApiHandler {
13-
abstract createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
15+
abstract createMessage(systemPrompt: string, messages: MessageParam[]): ApiStream
1416
abstract getModel(): { id: string; info: ModelInfo }
1517

1618
/**
17-
* Default token counting implementation using tiktoken.
19+
* Gets the last token usage information
20+
*/
21+
lastTokenUsage?: TokenUsageInfo
22+
23+
/**
24+
* Default token counting implementation using enhanced tiktoken.
1825
* Providers can override this to use their native token counting endpoints.
1926
*
2027
* @param content The content to count tokens for
2128
* @returns A promise resolving to the token count
2229
*/
23-
async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
30+
async countTokens(content: ContentBlockParam[]): Promise<number> {
2431
if (content.length === 0) {
2532
return 0
2633
}
2734

28-
return countTokens(content, { useWorker: true })
35+
// Get the provider ID from the model info
36+
const { id: providerId } = this.getModel()
37+
38+
// Use the provider ID to get provider-specific token counting with enhanced accuracy
39+
return countTokens(content, {
40+
useWorker: true,
41+
provider: providerId,
42+
useEnhanced: true, // Use enhanced tiktoken implementation by default
43+
})
44+
}
45+
46+
/**
47+
* Formats token information for display in the UI
48+
* @returns Formatted token usage string
49+
*/
50+
formatTokenDisplay(): string {
51+
if (!this.lastTokenUsage) {
52+
return "No token usage information available"
53+
}
54+
55+
const { id: providerId } = this.getModel()
56+
return formatTokenInfo(this.lastTokenUsage, providerId)
57+
}
58+
59+
/**
60+
* Creates a detailed tooltip for token usage
61+
* @returns Tooltip text with detailed token usage
62+
*/
63+
createTokenTooltip(): string {
64+
if (!this.lastTokenUsage) {
65+
return "No token usage information available"
66+
}
67+
68+
const { id: providerId } = this.getModel()
69+
return createTokenTooltip(this.lastTokenUsage, providerId)
2970
}
3071
}

src/api/providers/openrouter.ts

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,23 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
4141
// See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
4242
// `CompletionsAPI.CompletionUsage`
4343
// See also: https://openrouter.ai/docs/use-cases/usage-accounting
44-
interface CompletionUsage {
45-
completion_tokens?: number
46-
completion_tokens_details?: {
47-
reasoning_tokens?: number
48-
}
49-
prompt_tokens?: number
44+
interface CompletionUsage extends OpenAI.CompletionUsage {
45+
// Proprietary OpenRouter properties
5046
prompt_tokens_details?: {
5147
cached_tokens?: number
5248
}
53-
total_tokens?: number
5449
cost?: number
50+
// Additional OpenRouter properties that may be present
51+
system_tokens?: number
52+
cached_tokens?: number
5553
}
5654

5755
export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
5856
protected options: ApiHandlerOptions
5957
private client: OpenAI
6058
protected models: ModelRecord = {}
59+
// Token usage cache for the last API call
60+
// Use base class property for token usage information
6161

6262
constructor(options: ApiHandlerOptions) {
6363
super()
@@ -155,15 +155,24 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
155155
}
156156

157157
if (lastUsage) {
158-
yield {
159-
type: "usage",
158+
// Save token usage for future reference
159+
this.lastTokenUsage = {
160160
inputTokens: lastUsage.prompt_tokens || 0,
161161
outputTokens: lastUsage.completion_tokens || 0,
162-
// Waiting on OpenRouter to figure out what this represents in the Gemini case
163-
// and how to best support it.
164-
// cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
165-
reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
162+
cachedTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0,
163+
reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens || 0,
166164
totalCost: lastUsage.cost || 0,
165+
provider: "openrouter",
166+
estimationMethod: "api",
167+
}
168+
169+
yield {
170+
type: "usage",
171+
inputTokens: this.lastTokenUsage.inputTokens,
172+
outputTokens: this.lastTokenUsage.outputTokens,
173+
cacheReadTokens: this.lastTokenUsage.cachedTokens,
174+
reasoningTokens: this.lastTokenUsage.reasoningTokens,
175+
totalCost: this.lastTokenUsage.totalCost,
167176
}
168177
}
169178
}
@@ -196,6 +205,62 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
196205
}
197206
}
198207

208+
/**
209+
* OpenRouter-specific token counting implementation
210+
* @param content Content to count tokens for
211+
* @returns Estimated token count from OpenRouter API
212+
*/
213+
override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
214+
try {
215+
const { id: modelId, info: modelInfo } = this.getModel()
216+
// Convert content blocks to a simple text message for token counting
217+
let textContent = ""
218+
for (const block of content) {
219+
if (block.type === "text") {
220+
textContent += block.text || ""
221+
} else if (block.type === "image") {
222+
textContent += "[IMAGE]"
223+
}
224+
}
225+
const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "user", content: textContent }]
226+
const response = await this.client.chat.completions.create({
227+
model: modelId,
228+
messages,
229+
stream: false,
230+
max_tokens: 0, // Don't generate any tokens, just count them
231+
})
232+
if (response.usage) {
233+
const usage = response.usage as CompletionUsage
234+
const inputTokens = usage.prompt_tokens || 0
235+
const reasoningTokens = usage.system_tokens || 0
236+
const cachedTokens = usage.cached_tokens || 0
237+
238+
// Calculate cost based on token usage and model rates
239+
const inputRate = modelInfo.inputPrice || 0 // Price per 1K tokens
240+
const totalCost = (inputTokens * inputRate) / 1000
241+
242+
// Store token usage for UI display
243+
this.lastTokenUsage = {
244+
inputTokens,
245+
outputTokens: 0, // No output tokens for counting-only request
246+
reasoningTokens,
247+
cachedTokens,
248+
totalCost,
249+
provider: "openrouter",
250+
estimationMethod: "api",
251+
}
252+
return inputTokens // Ensure we return a number, not undefined
253+
}
254+
255+
// Fallback to base implementation if the response doesn't include usage info
256+
console.warn("OpenRouter token counting didn't return usage info, using fallback")
257+
return super.countTokens(content)
258+
} catch (error) {
259+
console.warn("OpenRouter token counting failed, using fallback", error)
260+
return super.countTokens(content)
261+
}
262+
}
263+
199264
async completePrompt(prompt: string) {
200265
let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel()
201266

src/api/providers/requesty.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
3131
protected options: ApiHandlerOptions
3232
protected models: ModelRecord = {}
3333
private client: OpenAI
34+
// Token usage cache for the last API call
35+
// Use base class property for token usage information
3436

3537
constructor(options: ApiHandlerOptions) {
3638
super()
@@ -123,6 +125,73 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
123125
}
124126
}
125127

128+
/**
129+
* Requesty-specific token counting implementation
130+
* @param content Content to count tokens for
131+
* @returns Estimated token count from Requesty API
132+
*/
133+
override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
134+
try {
135+
// Get the current model
136+
const { id: modelId, info: modelInfo } = this.getModel()
137+
138+
// Convert content blocks to a simple text message for token counting
139+
let textContent = ""
140+
141+
// Extract text content from Anthropic content blocks
142+
for (const block of content) {
143+
if (block.type === "text") {
144+
textContent += block.text || ""
145+
} else if (block.type === "image") {
146+
// For images, add a placeholder text to account for some tokens
147+
textContent += "[IMAGE]"
148+
}
149+
}
150+
151+
// Create a simple message with the text content
152+
const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "user", content: textContent }]
153+
154+
// Request token count from Requesty API
155+
const response = await this.client.chat.completions.create({
156+
model: modelId,
157+
messages,
158+
stream: false,
159+
max_tokens: 0, // Don't generate any tokens, just count them
160+
})
161+
162+
// Extract token count from response
163+
if (response.usage) {
164+
// Store token usage for future reference
165+
const requestyUsage = response.usage as RequestyUsage
166+
const inputTokens = requestyUsage.prompt_tokens || 0
167+
const cacheWriteTokens = requestyUsage.prompt_tokens_details?.caching_tokens || 0
168+
const cacheReadTokens = requestyUsage.prompt_tokens_details?.cached_tokens || 0
169+
const totalCost = modelInfo
170+
? calculateApiCostOpenAI(modelInfo, inputTokens, 0, cacheWriteTokens, cacheReadTokens)
171+
: 0
172+
173+
this.lastTokenUsage = {
174+
inputTokens: inputTokens,
175+
outputTokens: 0, // No output since max_tokens is 0
176+
cacheWriteTokens: cacheWriteTokens,
177+
cacheReadTokens: cacheReadTokens,
178+
totalCost: totalCost,
179+
provider: "requesty",
180+
estimationMethod: "api",
181+
}
182+
183+
return inputTokens
184+
}
185+
186+
// Fallback to base implementation if the response doesn't include usage info
187+
console.warn("Requesty token counting didn't return usage info, using fallback")
188+
return super.countTokens(content)
189+
} catch (error) {
190+
console.warn("Requesty token counting failed, using fallback", error)
191+
return super.countTokens(content)
192+
}
193+
}
194+
126195
async completePrompt(prompt: string): Promise<string> {
127196
const model = await this.fetchModel()
128197

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import OpenAI from "openai"
3+
4+
/**
5+
* Convert Anthropic content blocks to OpenAI format
6+
* @param content Array of Anthropic content blocks
7+
* @returns A formatted content string or array usable with OpenAI
8+
*/
9+
export function convertAnthropicContentToOpenAI(
10+
content: Anthropic.Messages.ContentBlockParam[],
11+
): OpenAI.Chat.ChatCompletionContentPart[] | string {
12+
if (content.length === 0) {
13+
return ""
14+
}
15+
16+
const result: OpenAI.Chat.ChatCompletionContentPart[] = []
17+
18+
for (const block of content) {
19+
if (block.type === "text") {
20+
result.push({
21+
type: "text",
22+
text: block.text || "",
23+
})
24+
} else if (block.type === "image" && block.source) {
25+
// Handle base64 images
26+
if (typeof block.source === "object" && "data" in block.source) {
27+
result.push({
28+
type: "image_url",
29+
image_url: {
30+
url: `data:image/jpeg;base64,${block.source.data}`,
31+
},
32+
})
33+
}
34+
// Handle URL-based images
35+
else if (typeof block.source === "object" && "url" in block.source) {
36+
// Use type assertion to tell TypeScript that url exists
37+
const sourceWithUrl = block.source as { url: string }
38+
result.push({
39+
type: "image_url",
40+
image_url: {
41+
url: sourceWithUrl.url,
42+
},
43+
})
44+
}
45+
}
46+
}
47+
48+
// If there's only one text block, just return the text string for simpler messages
49+
if (result.length === 1 && result[0].type === "text") {
50+
return result[0].text
51+
}
52+
53+
return result
54+
}

src/shared/api.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,3 +1759,27 @@ export function toRouterName(value?: string): RouterName {
17591759
export type ModelRecord = Record<string, ModelInfo>
17601760

17611761
export type RouterModels = Record<RouterName, ModelRecord>
1762+
1763+
/**
1764+
* Interface for token usage information returned by providers
1765+
*/
1766+
export interface TokenUsageInfo {
1767+
/** Number of input/prompt tokens */
1768+
inputTokens: number
1769+
/** Number of output/completion tokens */
1770+
outputTokens: number
1771+
/** Number of tokens read from cache (if applicable) */
1772+
cachedTokens?: number
1773+
/** Number of reasoning tokens (if applicable) */
1774+
reasoningTokens?: number
1775+
/** Number of tokens written to cache (if applicable) */
1776+
cacheWriteTokens?: number
1777+
/** Number of tokens read from cache (if applicable) */
1778+
cacheReadTokens?: number
1779+
/** Total cost of the request in provider's currency units */
1780+
totalCost: number
1781+
/** Provider name */
1782+
provider: string
1783+
/** Method used to estimate tokens ('api' or 'estimated') */
1784+
estimationMethod?: "api" | "estimated"
1785+
}

0 commit comments

Comments
 (0)