feat: introducing token-counting strategy using both local and API-based counting

HahaBill · HahaBill · commit 58f691524b04 · 2025-07-16T21:47:27.000+01:00
diff --git a/src/api/index.ts b/src/api/index.ts
@@ -54,9 +54,17 @@ export interface ApiHandler {
 	 * but they can override this to use their native token counting endpoints
 	 *
 	 * @param content The content to count tokens for
+	 * @param options Additional options for token counting
 	 * @returns A promise resolving to the token count
 	 */
-	countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number>
+	countTokens(
+		content: Array<Anthropic.Messages.ContentBlockParam>,
+		options: {
+			maxTokens?: number | null
+			effectiveThreshold?: number
+			totalTokens: number
+		},
+	): Promise<number>
 }
 
 export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
@@ -271,31 +271,18 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		return content?.type === "text" ? content.text : ""
 	}
 
-	/**
-	 * Counts tokens for the given content using Anthropic's API
-	 *
-	 * @param content The content blocks to count tokens for
-	 * @returns A promise resolving to the token count
-	 */
-	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
-		try {
-			return await super.countTokens(content)
-		} catch (error) {
-			console.warn("Anthropic local token counting failed, falling back to remote API", error)
-			try {
-				const { id: model } = this.getModel()
-				const response = await this.client.messages.countTokens({
-					model,
-					messages: [{ role: "user", content }],
-				})
-				if (response.input_tokens !== undefined) {
-					return response.input_tokens
-				}
-				console.warn("Anthropic remote token counting returned undefined, falling back to 0")
-			} catch (remoteError) {
-				console.warn("Anthropic remote token counting failed, falling back to 0", remoteError)
-			}
-			return 0
+	protected override async apiBasedTokenCount(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
+		const { id: model } = this.getModel()
+		console.log(`API-BASED COUNTINNNNG`)
+		const response = await this.client.messages.countTokens({
+			model,
+			messages: [{ role: "user", content }],
+		})
+
+		if (response.input_tokens === undefined) {
+			throw new Error("Anthropic remote token counting returned undefined.")
 		}
+
+		return response.input_tokens
 	}
 }
diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts
@@ -2,14 +2,66 @@ import { Anthropic } from "@anthropic-ai/sdk"
 
 import type { ModelInfo } from "@roo-code/types"
 
+import { getAllowedTokens, isSafetyNetTriggered } from "../utils/context-safety"
 import type { ApiHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { ApiStream } from "../transform/stream"
-import { countTokens } from "../../utils/countTokens"
+import { countTokens as localCountTokens } from "../../utils/countTokens"
 
 /**
- * Base class for API providers that implements common functionality.
+ * A utility class to compare local token estimates with precise API counts
+ * and calculate a factor to improve estimation accuracy.
+ */
+class TokenCountComparator {
+	private static readonly MAX_SAMPLES = 20
+	private static readonly DEFAULT_SAFETY_FACTOR = 1.2
+	private static readonly ADDITIONAL_SAFETY_FACTOR = 1.0
+
+	private samples: Array<{ local: number; api: number }> = []
+	private safetyFactor = TokenCountComparator.DEFAULT_SAFETY_FACTOR
+
+	public addSample(local: number, api: number): void {
+		if (local > 0 && api > 0) {
+			this.samples.push({ local, api })
+			if (this.samples.length > TokenCountComparator.MAX_SAMPLES) {
+				this.samples.shift()
+			}
+			this.recalculateSafetyFactor()
+		}
+	}
+
+	public getSafetyFactor(): number {
+		return this.safetyFactor
+	}
+
+	private recalculateSafetyFactor(): void {
+		if (this.samples.length === 0) {
+			this.safetyFactor = TokenCountComparator.DEFAULT_SAFETY_FACTOR
+			return
+		}
+
+		const totalRatio = this.samples.reduce((sum, sample) => sum + sample.api / sample.local, 0)
+		const averageRatio = totalRatio / this.samples.length
+		this.safetyFactor = Math.max(1, averageRatio) * TokenCountComparator.ADDITIONAL_SAFETY_FACTOR
+	}
+
+	public getSampleCount(): number {
+		return this.samples.length
+	}
+
+	public getAverageRatio(): number {
+		if (this.samples.length === 0) return 1
+		const totalRatio = this.samples.reduce((sum, sample) => sum + sample.api / sample.local, 0)
+		return totalRatio / this.samples.length
+	}
+}
+
+/**
+ * Base class for API providers that implements common functionality
  */
 export abstract class BaseProvider implements ApiHandler {
+	protected isFirstRequest = true
+	protected tokenComparator = new TokenCountComparator()
+
 	abstract createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
@@ -18,18 +70,63 @@ export abstract class BaseProvider implements ApiHandler {
 
 	abstract getModel(): { id: string; info: ModelInfo }
 
-	/**
-	 * Default token counting implementation using tiktoken.
-	 * Providers can override this to use their native token counting endpoints.
-	 *
-	 * @param content The content to count tokens for
-	 * @returns A promise resolving to the token count
-	 */
-	async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
+	// Override this function for each API provider
+	protected async apiBasedTokenCount(content: Anthropic.Messages.ContentBlockParam[]) {
+		return await localCountTokens(content, { useWorker: true })
+	}
+
+	async countTokens(
+		content: Anthropic.Messages.ContentBlockParam[],
+		options: {
+			maxTokens?: number | null
+			effectiveThreshold?: number
+			totalTokens: number
+		},
+	): Promise<number> {
 		if (content.length === 0) {
 			return 0
 		}
 
-		return countTokens(content, { useWorker: true })
+		const providerName = this.constructor.name
+
+		if (this.isFirstRequest) {
+			this.isFirstRequest = false
+			try {
+				const apiCount = await this.apiBasedTokenCount(content)
+				const localEstimate = await localCountTokens(content, { useWorker: true })
+				this.tokenComparator.addSample(localEstimate, apiCount)
+
+				return apiCount
+			} catch (error) {
+				const localEstimate = await localCountTokens(content, { useWorker: true })
+				return localEstimate
+			}
+		}
+
+		const localEstimate = await localCountTokens(content, { useWorker: true })
+
+		const { info } = this.getModel()
+		const contextWindow = info.contextWindow
+		const allowedTokens = getAllowedTokens(contextWindow, options.maxTokens)
+		const projectedTokens = options.totalTokens + localEstimate * this.tokenComparator.getSafetyFactor()
+
+		if (
+			isSafetyNetTriggered({
+				projectedTokens,
+				contextWindow,
+				effectiveThreshold: options.effectiveThreshold,
+				allowedTokens,
+			})
+		) {
+			try {
+				const apiCount = await this.apiBasedTokenCount(content)
+				this.tokenComparator.addSample(localEstimate, apiCount)
+				return apiCount
+			} catch (error) {
+				return Math.ceil(localEstimate * this.tokenComparator.getSafetyFactor())
+			}
+		}
+
+		return localEstimate
 	}
 }
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
@@ -25,7 +25,6 @@ type GeminiHandlerOptions = ApiHandlerOptions & {
 
 export class GeminiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
-
 	private client: GoogleGenAI
 
 	constructor({ isVertex, ...options }: GeminiHandlerOptions) {
@@ -167,26 +166,18 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
-	override async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
-		try {
-			return await super.countTokens(content)
-		} catch (error) {
-			console.warn("Gemini local token counting failed, falling back to remote API", error)
-			try {
-				const { id: model } = this.getModel()
-				const response = await this.client.models.countTokens({
-					model,
-					contents: convertAnthropicContentToGemini(content),
-				})
-				if (response.totalTokens !== undefined) {
-					return response.totalTokens
-				}
-				console.warn("Gemini remote token counting returned undefined, falling back to 0")
-			} catch (remoteError) {
-				console.warn("Gemini remote token counting failed, falling back to 0", remoteError)
-			}
-			return 0
+	protected override async apiBasedTokenCount(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
+		const { id: model } = this.getModel()
+		const response = await this.client.models.countTokens({
+			model,
+			contents: convertAnthropicContentToGemini(content),
+		})
+
+		if (response.totalTokens === undefined) {
+			throw new Error("Gemini API returned undefined token count")
 		}
+
+		return response.totalTokens
 	}
 
 	public calculateCost({
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
@@ -64,7 +64,11 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 
 		let inputTokens = 0
 		try {
-			inputTokens = await this.countTokens([{ type: "text", text: systemPrompt }, ...toContentBlocks(messages)])
+			inputTokens = await this.countTokens([{ type: "text", text: systemPrompt }, ...toContentBlocks(messages)], {
+				totalTokens: 0,
+				maxTokens: null,
+				effectiveThreshold: undefined,
+			})
 		} catch (err) {
 			console.error("[LmStudio] Failed to count input tokens:", err)
 			inputTokens = 0
@@ -112,7 +116,11 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 
 			let outputTokens = 0
 			try {
-				outputTokens = await this.countTokens([{ type: "text", text: assistantText }])
+				outputTokens = await this.countTokens([{ type: "text", text: assistantText }], {
+					totalTokens: 0,
+					maxTokens: null,
+					effectiveThreshold: undefined,
+				})
 			} catch (err) {
 				console.error("[LmStudio] Failed to count output tokens:", err)
 				outputTokens = 0
diff --git a/src/api/utils/context-safety.ts b/src/api/utils/context-safety.ts
@@ -0,0 +1,49 @@
+import { TOKEN_BUFFER_PERCENTAGE } from "../../core/sliding-window"
+
+type SafetyNetOptions = {
+	projectedTokens: number
+	contextWindow: number
+	effectiveThreshold?: number
+	allowedTokens: number
+}
+
+/**
+ * Calculates the allowed token limit for a given context window, reserving
+ * space for the response and a safety buffer.
+ *
+ * @param contextWindow The total context window size of the model.
+ * @param maxTokens The maximum number of tokens reserved for the response.
+ * @returns The number of tokens allowed for the prompt context.
+ */
+export function getAllowedTokens(contextWindow: number, maxTokens?: number | null) {
+	// Calculate the maximum tokens reserved for response
+	const reservedTokens = maxTokens ?? contextWindow * 0.2
+
+	// Calculate available tokens for conversation history
+	// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
+	return contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
+}
+
+/**
+ * Determines if the token counting safety net should be triggered.
+ *
+ * The safety net is triggered if the projected token count exceeds either:
+ * 1. The effective condensation threshold (as a percentage of the context window).
+ * 2. The absolute allowed token limit.
+ *
+ * @param options The options for the safety net check.
+ * @returns True if the safety net should be triggered, false otherwise.
+ */
+export function isSafetyNetTriggered({
+	projectedTokens,
+	contextWindow,
+	effectiveThreshold,
+	allowedTokens,
+}: SafetyNetOptions): boolean {
+	// Ensure a valid threshold, defaulting to a high value if not provided,
+	// which effectively relies on the allowedTokens check.
+	const threshold = effectiveThreshold ?? 100
+	const contextPercent = (100 * projectedTokens) / contextWindow
+
+	return contextPercent >= threshold || projectedTokens > allowedTokens
+}
diff --git a/src/core/condense/index.ts b/src/core/condense/index.ts
@@ -198,7 +198,13 @@ export async function summarizeConversation(
 		typeof message.content === "string" ? [{ text: message.content, type: "text" as const }] : message.content,
 	)
 
-	const newContextTokens = outputTokens + (await apiHandler.countTokens(contextBlocks))
+	const newContextTokens =
+		outputTokens +
+		(await apiHandler.countTokens(contextBlocks, {
+			totalTokens: 0,
+			maxTokens: null,
+			effectiveThreshold: undefined,
+		}))
 	if (newContextTokens >= prevContextTokens) {
 		const error = t("common:errors.condense_context_grew")
 		return { ...response, cost, error }
diff --git a/src/core/sliding-window/__tests__/sliding-window.spec.ts b/src/core/sliding-window/__tests__/sliding-window.spec.ts
diff --git a/src/core/sliding-window/index.ts b/src/core/sliding-window/index.ts