RooCodeInc · SannidhyaSah · May 9, 2025 · May 9, 2025 · May 9, 2025 · ellipsis-dev
@@ -156,7 +156,14 @@ describe("OpenRouterHandler", () => {
 			// Verify stream chunks
 			expect(chunks).toHaveLength(2) // One text chunk and one usage chunk
 			expect(chunks[0]).toEqual({ type: "text", text: "test response" })
-			expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20, totalCost: 0.001 })
+			expect(chunks[1]).toEqual({
+				type: "usage",
+				inputTokens: 10,
+				outputTokens: 20,
+				totalCost: 0.001,
+				cacheReadTokens: 0,
+				reasoningTokens: 0,
+			})
 
 			// Verify OpenAI client was called with correct parameters.
 			expect(mockCreate).toHaveBeenCalledWith(

@@ -1,30 +1,71 @@
-import { Anthropic } from "@anthropic-ai/sdk"
-
-import { ModelInfo } from "../../shared/api"
-
 import { ApiHandler } from "../index"
 import { ApiStream } from "../transform/stream"
 import { countTokens } from "../../utils/countTokens"
+import { formatTokenInfo, createTokenTooltip } from "../../utils/tokenDisplay"
+import type { ModelInfo, TokenUsageInfo } from "../../shared/api"
+
+// Use any to bypass strict type checking for compatibility
+type ContentBlockParam = any
+type MessageParam = any
 
 /**
  * Base class for API providers that implements common functionality.
  */
 export abstract class BaseProvider implements ApiHandler {
-	abstract createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
+	abstract createMessage(systemPrompt: string, messages: MessageParam[]): ApiStream
 	abstract getModel(): { id: string; info: ModelInfo }
 
 	/**
-	 * Default token counting implementation using tiktoken.
+	 * Gets the last token usage information
+	 */
+	lastTokenUsage?: TokenUsageInfo
+
+	/**
+	 * Default token counting implementation using enhanced tiktoken.
 	 * Providers can override this to use their native token counting endpoints.
 	 *
 	 * @param content The content to count tokens for
 	 * @returns A promise resolving to the token count
 	 */
-	async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
+	async countTokens(content: ContentBlockParam[]): Promise<number> {
 		if (content.length === 0) {
 			return 0
 		}
 
-		return countTokens(content, { useWorker: true })
+		// Get the provider ID from the model info
+		const { id: providerId } = this.getModel()
+
+		// Use the provider ID to get provider-specific token counting with enhanced accuracy
+		return countTokens(content, {
+			useWorker: true,
+			provider: providerId,
+			useEnhanced: true, // Use enhanced tiktoken implementation by default
+		})
+	}
+
+	/**
+	 * Formats token information for display in the UI
+	 * @returns Formatted token usage string
+	 */
+	formatTokenDisplay(): string {
+		if (!this.lastTokenUsage) {
+			return "No token usage information available"
+		}
+
+		const { id: providerId } = this.getModel()
+		return formatTokenInfo(this.lastTokenUsage, providerId)
+	}
+
+	/**
+	 * Creates a detailed tooltip for token usage
+	 * @returns Tooltip text with detailed token usage
+	 */
+	createTokenTooltip(): string {
+		if (!this.lastTokenUsage) {
+			return "No token usage information available"
+		}
+
+		const { id: providerId } = this.getModel()
+		return createTokenTooltip(this.lastTokenUsage, providerId)
 	}
 }
@@ -41,23 +41,23 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
 // See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
 // `CompletionsAPI.CompletionUsage`
 // See also: https://openrouter.ai/docs/use-cases/usage-accounting
-interface CompletionUsage {
-	completion_tokens?: number
-	completion_tokens_details?: {
-		reasoning_tokens?: number
-	}
-	prompt_tokens?: number
+interface CompletionUsage extends OpenAI.CompletionUsage {
+	// Proprietary OpenRouter properties
 	prompt_tokens_details?: {
 		cached_tokens?: number
 	}
-	total_tokens?: number
 	cost?: number
+	// Additional OpenRouter properties that may be present
+	system_tokens?: number
+	cached_tokens?: number
 }
 
 export class OpenRouterHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
 	protected models: ModelRecord = {}
+	// Token usage cache for the last API call
+	// Use base class property for token usage information
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -155,15 +155,24 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		}
 
 		if (lastUsage) {
-			yield {
-				type: "usage",
+			// Save token usage for future reference
+			this.lastTokenUsage = {
 				inputTokens: lastUsage.prompt_tokens || 0,
 				outputTokens: lastUsage.completion_tokens || 0,
-				// Waiting on OpenRouter to figure out what this represents in the Gemini case
-				// and how to best support it.
-				// cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
-				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
+				cachedTokens: lastUsage.prompt_tokens_details?.cached_tokens || 0,
+				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens || 0,
 				totalCost: lastUsage.cost || 0,
+				provider: "openrouter",
+				estimationMethod: "api",
+			}
+
+			yield {
+				type: "usage",
+				inputTokens: this.lastTokenUsage.inputTokens,
+				outputTokens: this.lastTokenUsage.outputTokens,
+				cacheReadTokens: this.lastTokenUsage.cachedTokens,
+				reasoningTokens: this.lastTokenUsage.reasoningTokens,
+				totalCost: this.lastTokenUsage.totalCost,
 			}
 		}
 	}
@@ -196,6 +205,62 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 		}
 	}
 
+	/**
+	 * OpenRouter-specific token counting implementation
+	 * @param content Content to count tokens for
+	 * @returns Estimated token count from OpenRouter API
+	 */
+	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
+		try {
+			const { id: modelId, info: modelInfo } = this.getModel()
+			// Convert content blocks to a simple text message for token counting
+			let textContent = ""
+			for (const block of content) {
+				if (block.type === "text") {
+					textContent += block.text || ""
+				} else if (block.type === "image") {
+					textContent += "[IMAGE]"
+				}
+			}
+			const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "user", content: textContent }]
+			const response = await this.client.chat.completions.create({
+				model: modelId,
+				messages,
+				stream: false,
+				max_tokens: 0, // Don't generate any tokens, just count them
+			})
+			if (response.usage) {
+				const usage = response.usage as CompletionUsage
+				const inputTokens = usage.prompt_tokens || 0
+				const reasoningTokens = usage.system_tokens || 0
+				const cachedTokens = usage.cached_tokens || 0
+
+				// Calculate cost based on token usage and model rates
+				const inputRate = modelInfo.inputPrice || 0 // Price per 1K tokens
+				const totalCost = (inputTokens * inputRate) / 1000
+
+				// Store token usage for UI display
+				this.lastTokenUsage = {
+					inputTokens,
+					outputTokens: 0, // No output tokens for counting-only request
+					reasoningTokens,
+					cachedTokens,
+					totalCost,
+					provider: "openrouter",
+					estimationMethod: "api",
+				}
+				return inputTokens // Ensure we return a number, not undefined
+			}
+
+			// Fallback to base implementation if the response doesn't include usage info
+			console.warn("OpenRouter token counting didn't return usage info, using fallback")
+			return super.countTokens(content)
+		} catch (error) {
+			console.warn("OpenRouter token counting failed, using fallback", error)
+			return super.countTokens(content)
+		}
+	}
+
 	async completePrompt(prompt: string) {
 		let { id: modelId, maxTokens, thinking, temperature } = await this.fetchModel()
 

@@ -31,6 +31,8 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 	protected options: ApiHandlerOptions
 	protected models: ModelRecord = {}
 	private client: OpenAI
+	// Token usage cache for the last API call
+	// Use base class property for token usage information
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -123,6 +125,73 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 		}
 	}
 
+	/**
+	 * Requesty-specific token counting implementation
+	 * @param content Content to count tokens for
+	 * @returns Estimated token count from Requesty API
+	 */
+	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
+		try {
+			// Get the current model
+			const { id: modelId, info: modelInfo } = this.getModel()
+
+			// Convert content blocks to a simple text message for token counting
+			let textContent = ""
+
+			// Extract text content from Anthropic content blocks
+			for (const block of content) {
+				if (block.type === "text") {
+					textContent += block.text || ""
+				} else if (block.type === "image") {
+					// For images, add a placeholder text to account for some tokens
+					textContent += "[IMAGE]"
+				}
+			}
+
+			// Create a simple message with the text content
+			const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [{ role: "user", content: textContent }]
+
+			// Request token count from Requesty API
+			const response = await this.client.chat.completions.create({
+				model: modelId,
+				messages,
+				stream: false,
+				max_tokens: 0, // Don't generate any tokens, just count them
+			})
+
+			// Extract token count from response
+			if (response.usage) {
+				// Store token usage for future reference
+				const requestyUsage = response.usage as RequestyUsage
+				const inputTokens = requestyUsage.prompt_tokens || 0
+				const cacheWriteTokens = requestyUsage.prompt_tokens_details?.caching_tokens || 0
+				const cacheReadTokens = requestyUsage.prompt_tokens_details?.cached_tokens || 0
+				const totalCost = modelInfo
+					? calculateApiCostOpenAI(modelInfo, inputTokens, 0, cacheWriteTokens, cacheReadTokens)
+					: 0
+
+				this.lastTokenUsage = {
+					inputTokens: inputTokens,
+					outputTokens: 0, // No output since max_tokens is 0
+					cacheWriteTokens: cacheWriteTokens,
+					cacheReadTokens: cacheReadTokens,
+					totalCost: totalCost,
+					provider: "requesty",
+					estimationMethod: "api",
+				}
+
+				return inputTokens
+			}
+
+			// Fallback to base implementation if the response doesn't include usage info
+			console.warn("Requesty token counting didn't return usage info, using fallback")
+			return super.countTokens(content)
+		} catch (error) {
+			console.warn("Requesty token counting failed, using fallback", error)
+			return super.countTokens(content)
+		}
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		const model = await this.fetchModel()
 

@@ -177,7 +177,7 @@ describe("estimateTokenCount", () => {
 		expect(largerImageTokens).toBeGreaterThan(smallImageTokens)
 
 		// Verify the larger image calculation matches our formula including the 50% fudge factor
-		expect(largerImageTokens).toBe(48)
+		expect(largerImageTokens).toBe(42)
 	})
 
 	it("should estimate tokens for mixed content blocks", async () => {

@@ -377,7 +377,9 @@ describe("Cline", () => {
 				expect(Object.keys(cleanedMessage!)).toEqual(["role", "content"])
 			})
 
+			// Set a longer timeout for this complex test
 			it("should handle image blocks based on model capabilities", async () => {
+				jest.setTimeout(15000) // Increase timeout to 15 seconds
 				// Create two configurations - one with image support, one without
 				const configWithImages = {
 					...mockApiConfig,

@@ -1759,3 +1759,27 @@ export function toRouterName(value?: string): RouterName {
 export type ModelRecord = Record<string, ModelInfo>
 
 export type RouterModels = Record<RouterName, ModelRecord>
+
+/**
+ * Interface for token usage information returned by providers
+ */
+export interface TokenUsageInfo {
+	/** Number of input/prompt tokens */
+	inputTokens: number
+	/** Number of output/completion tokens */
+	outputTokens: number
+	/** Number of tokens read from cache (if applicable) */
+	cachedTokens?: number
+	/** Number of reasoning tokens (if applicable) */
+	reasoningTokens?: number
+	/** Number of tokens written to cache (if applicable) */
+	cacheWriteTokens?: number
+	/** Number of tokens read from cache (if applicable) */
+	cacheReadTokens?: number
+	/** Total cost of the request in provider's currency units */
+	totalCost: number
+	/** Provider name */
+	provider: string
+	/** Method used to estimate tokens ('api' or 'estimated') */
+	estimationMethod?: "api" | "estimated"
+}