Use OpenRouter's new usage flag for more reliable pricing responses

saoudrizwan · saoudrizwan · commit a29689dfd35b · 2025-03-22T14:40:23.000-07:00
diff --git a/src/api/providers/cline.ts b/src/api/providers/cline.ts
@@ -20,7 +20,7 @@ export class ClineHandler implements ApiHandler {
 
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
-		const genId = yield* streamOpenRouterFormatRequest(
+		yield* streamOpenRouterFormatRequest(
 			this.client,
 			systemPrompt,
 			messages,
@@ -29,27 +29,6 @@ export class ClineHandler implements ApiHandler {
 			this.options.thinkingBudgetTokens,
 			this.options.openRouterProviderSorting,
 		)
-
-		try {
-			const response = await axios.get(`https://api.cline.bot/v1/generation?id=${genId}`, {
-				headers: {
-					Authorization: `Bearer ${this.options.clineApiKey}`,
-				},
-				timeout: 5_000, // this request hangs sometimes
-			})
-
-			const generation = response.data
-			console.log("cline generation details:", generation)
-			yield {
-				type: "usage",
-				inputTokens: generation?.native_tokens_prompt || 0,
-				outputTokens: generation?.native_tokens_completion || 0,
-				totalCost: generation?.total_cost || 0,
-			}
-		} catch (error) {
-			// ignore if fails
-			console.error("Error fetching cline generation details:", error)
-		}
 	}
 
 	getModel(): { id: string; info: ModelInfo } {
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -29,7 +29,7 @@ export class OpenRouterHandler implements ApiHandler {
 	@withRetry()
 	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
-		const genId = yield* streamOpenRouterFormatRequest(
+		yield* streamOpenRouterFormatRequest(
 			this.client,
 			systemPrompt,
 			messages,
@@ -38,45 +38,6 @@ export class OpenRouterHandler implements ApiHandler {
 			this.options.thinkingBudgetTokens,
 			this.options.openRouterProviderSorting,
 		)
-
-		if (genId) {
-			await delay(500) // FIXME: necessary delay to ensure generation endpoint is ready
-			try {
-				const generationIterator = this.fetchGenerationDetails(genId)
-				const generation = (await generationIterator.next()).value
-				// console.log("OpenRouter generation details:", generation)
-				yield {
-					type: "usage",
-					// cacheWriteTokens: 0,
-					// cacheReadTokens: 0,
-					// openrouter generation endpoint fails often
-					inputTokens: generation?.native_tokens_prompt || 0,
-					outputTokens: generation?.native_tokens_completion || 0,
-					totalCost: generation?.total_cost || 0,
-				}
-			} catch (error) {
-				// ignore if fails
-				console.error("Error fetching OpenRouter generation details:", error)
-			}
-		}
-	}
-
-	@withRetry({ maxRetries: 4, baseDelay: 250, maxDelay: 1000, retryAllErrors: true })
-	async *fetchGenerationDetails(genId: string) {
-		// console.log("Fetching generation details for:", genId)
-		try {
-			const response = await axios.get(`https://openrouter.ai/api/v1/generation?id=${genId}`, {
-				headers: {
-					Authorization: `Bearer ${this.options.openRouterApiKey}`,
-				},
-				timeout: 5_000, // this request hangs sometimes
-			})
-			yield response.data?.data
-		} catch (error) {
-			// ignore if fails
-			console.error("Error fetching OpenRouter generation details:", error)
-			throw error
-		}
 	}
 
 	getModel(): { id: string; info: ModelInfo } {
diff --git a/src/api/transform/openrouter-stream.ts b/src/api/transform/openrouter-stream.ts
@@ -14,7 +14,7 @@ export async function* streamOpenRouterFormatRequest(
 	o3MiniReasoningEffort?: string,
 	thinkingBudgetTokens?: number,
 	openRouterProviderSorting?: string,
-): AsyncGenerator<ApiStreamChunk, string | undefined, unknown> {
+): AsyncGenerator<ApiStreamChunk, undefined, unknown> {
 	// Convert Anthropic messages to OpenAI format
 	let openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 		{ role: "system", content: systemPrompt },
@@ -144,12 +144,14 @@ export async function* streamOpenRouterFormatRequest(
 		stream: true,
 		transforms: shouldApplyMiddleOutTransform ? ["middle-out"] : undefined,
 		include_reasoning: true,
+		stream_options: { include_usage: true },
 		...(model.id === "openai/o3-mini" ? { reasoning_effort: o3MiniReasoningEffort || "medium" } : {}),
 		...(reasoning ? { reasoning } : {}),
 		...(openRouterProviderSorting ? { provider: { sort: openRouterProviderSorting } } : {}),
 	})
 
-	let genId: string | undefined
+	// let genId: string | undefined
+	let didOutputUsage: boolean = false
 
 	for await (const chunk of stream) {
 		// openrouter returns an error object instead of the openai sdk throwing an error
@@ -161,8 +163,15 @@ export async function* streamOpenRouterFormatRequest(
 			throw new Error(`OpenRouter API Error ${error.code}: ${error.message}${metadataStr}`)
 		}
 
-		if (!genId && chunk.id) {
-			genId = chunk.id
+		if (chunk.usage && !didOutputUsage) {
+			yield {
+				type: "usage",
+				inputTokens: chunk.usage.prompt_tokens || 0,
+				outputTokens: chunk.usage.completion_tokens || 0,
+				// @ts-ignore-next-line
+				totalCost: chunk.usage.cost || 0,
+			}
+			didOutputUsage = true
 		}
 
 		const delta = chunk.choices[0]?.delta
@@ -182,6 +191,4 @@ export async function* streamOpenRouterFormatRequest(
 			}
 		}
 	}
-
-	return genId
 }