Skip to content

Commit 6f81b77

Browse files
authored
Revert "Fix token usage / cost often being underreported" (#7039)
1 parent 140310f commit 6f81b77

File tree

2 files changed

+25
-173
lines changed

2 files changed

+25
-173
lines changed

packages/types/src/global-settings.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@ export const DEFAULT_WRITE_DELAY_MS = 1000
2929
*/
3030
export const DEFAULT_TERMINAL_OUTPUT_CHARACTER_LIMIT = 50_000
3131

32-
/**
33-
* Default timeout for background usage collection in milliseconds.
34-
* This timeout prevents the background task from running indefinitely
35-
* when collecting usage data from streaming API responses.
36-
*/
37-
export const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 30_000
38-
3932
/**
4033
* GlobalSettings
4134
*/

src/core/task/Task.ts

Lines changed: 25 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import {
2929
getApiProtocol,
3030
getModelId,
3131
DEFAULT_CONSECUTIVE_MISTAKE_LIMIT,
32-
DEFAULT_USAGE_COLLECTION_TIMEOUT_MS,
3332
isBlockingAsk,
3433
} from "@roo-code/types"
3534
import { TelemetryService } from "@roo-code/telemetry"
@@ -1566,10 +1565,6 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
15661565
// of prices in tasks from history (it's worth removing a few months
15671566
// from now).
15681567
const updateApiReqMsg = (cancelReason?: ClineApiReqCancelReason, streamingFailedMessage?: string) => {
1569-
if (lastApiReqIndex < 0 || !this.clineMessages[lastApiReqIndex]) {
1570-
return
1571-
}
1572-
15731568
const existingData = JSON.parse(this.clineMessages[lastApiReqIndex].text || "{}")
15741569
this.clineMessages[lastApiReqIndex].text = JSON.stringify({
15751570
...existingData,
@@ -1660,11 +1655,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
16601655
this.isStreaming = true
16611656

16621657
try {
1663-
const iterator = stream[Symbol.asyncIterator]()
1664-
let item = await iterator.next()
1665-
while (!item.done) {
1666-
const chunk = item.value
1667-
item = await iterator.next()
1658+
for await (const chunk of stream) {
16681659
if (!chunk) {
16691660
// Sometimes chunk is undefined, no idea that can cause
16701661
// it, but this workaround seems to fix it.
@@ -1732,165 +1723,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
17321723
break
17331724
}
17341725

1726+
// PREV: We need to let the request finish for openrouter to
1727+
// get generation details.
1728+
// UPDATE: It's better UX to interrupt the request at the
1729+
// cost of the API cost not being retrieved.
17351730
if (this.didAlreadyUseTool) {
17361731
assistantMessage +=
17371732
"\n\n[Response interrupted by a tool use result. Only one tool may be used at a time and should be placed at the end of the message.]"
17381733
break
17391734
}
17401735
}
1741-
1742-
// Create a copy of current token values to avoid race conditions
1743-
const currentTokens = {
1744-
input: inputTokens,
1745-
output: outputTokens,
1746-
cacheWrite: cacheWriteTokens,
1747-
cacheRead: cacheReadTokens,
1748-
total: totalCost,
1749-
}
1750-
1751-
const drainStreamInBackgroundToFindAllUsage = async (apiReqIndex: number) => {
1752-
const timeoutMs = DEFAULT_USAGE_COLLECTION_TIMEOUT_MS
1753-
const startTime = Date.now()
1754-
const modelId = getModelId(this.apiConfiguration)
1755-
1756-
// Local variables to accumulate usage data without affecting the main flow
1757-
let bgInputTokens = currentTokens.input
1758-
let bgOutputTokens = currentTokens.output
1759-
let bgCacheWriteTokens = currentTokens.cacheWrite
1760-
let bgCacheReadTokens = currentTokens.cacheRead
1761-
let bgTotalCost = currentTokens.total
1762-
1763-
// Helper function to capture telemetry and update messages
1764-
const captureUsageData = async (
1765-
tokens: {
1766-
input: number
1767-
output: number
1768-
cacheWrite: number
1769-
cacheRead: number
1770-
total?: number
1771-
},
1772-
messageIndex: number = apiReqIndex,
1773-
) => {
1774-
if (tokens.input > 0 || tokens.output > 0 || tokens.cacheWrite > 0 || tokens.cacheRead > 0) {
1775-
// Update the shared variables atomically
1776-
inputTokens = tokens.input
1777-
outputTokens = tokens.output
1778-
cacheWriteTokens = tokens.cacheWrite
1779-
cacheReadTokens = tokens.cacheRead
1780-
totalCost = tokens.total
1781-
1782-
// Update the API request message with the latest usage data
1783-
updateApiReqMsg()
1784-
await this.saveClineMessages()
1785-
1786-
// Update the specific message in the webview
1787-
const apiReqMessage = this.clineMessages[messageIndex]
1788-
if (apiReqMessage) {
1789-
await this.updateClineMessage(apiReqMessage)
1790-
}
1791-
1792-
// Capture telemetry
1793-
TelemetryService.instance.captureLlmCompletion(this.taskId, {
1794-
inputTokens: tokens.input,
1795-
outputTokens: tokens.output,
1796-
cacheWriteTokens: tokens.cacheWrite,
1797-
cacheReadTokens: tokens.cacheRead,
1798-
cost:
1799-
tokens.total ??
1800-
calculateApiCostAnthropic(
1801-
this.api.getModel().info,
1802-
tokens.input,
1803-
tokens.output,
1804-
tokens.cacheWrite,
1805-
tokens.cacheRead,
1806-
),
1807-
})
1808-
}
1809-
}
1810-
1811-
try {
1812-
// Continue processing the original stream from where the main loop left off
1813-
let usageFound = false
1814-
let chunkCount = 0
1815-
1816-
// Use the same iterator that the main loop was using
1817-
while (!item.done) {
1818-
// Check for timeout
1819-
if (Date.now() - startTime > timeoutMs) {
1820-
console.warn(
1821-
`[Background Usage Collection] Timed out after ${timeoutMs}ms for model: ${modelId}, processed ${chunkCount} chunks`,
1822-
)
1823-
// Clean up the iterator before breaking
1824-
if (iterator.return) {
1825-
await iterator.return(undefined)
1826-
}
1827-
break
1828-
}
1829-
1830-
const chunk = item.value
1831-
item = await iterator.next()
1832-
chunkCount++
1833-
1834-
if (chunk && chunk.type === "usage") {
1835-
usageFound = true
1836-
bgInputTokens += chunk.inputTokens
1837-
bgOutputTokens += chunk.outputTokens
1838-
bgCacheWriteTokens += chunk.cacheWriteTokens ?? 0
1839-
bgCacheReadTokens += chunk.cacheReadTokens ?? 0
1840-
bgTotalCost = chunk.totalCost
1841-
}
1842-
}
1843-
1844-
if (
1845-
usageFound ||
1846-
bgInputTokens > 0 ||
1847-
bgOutputTokens > 0 ||
1848-
bgCacheWriteTokens > 0 ||
1849-
bgCacheReadTokens > 0
1850-
) {
1851-
// We have usage data either from a usage chunk or accumulated tokens
1852-
await captureUsageData(
1853-
{
1854-
input: bgInputTokens,
1855-
output: bgOutputTokens,
1856-
cacheWrite: bgCacheWriteTokens,
1857-
cacheRead: bgCacheReadTokens,
1858-
total: bgTotalCost,
1859-
},
1860-
lastApiReqIndex,
1861-
)
1862-
} else {
1863-
console.warn(
1864-
`[Background Usage Collection] Suspicious: request ${apiReqIndex} is complete, but no usage info was found. Model: ${modelId}`,
1865-
)
1866-
}
1867-
} catch (error) {
1868-
console.error("Error draining stream for usage data:", error)
1869-
// Still try to capture whatever usage data we have collected so far
1870-
if (
1871-
bgInputTokens > 0 ||
1872-
bgOutputTokens > 0 ||
1873-
bgCacheWriteTokens > 0 ||
1874-
bgCacheReadTokens > 0
1875-
) {
1876-
await captureUsageData(
1877-
{
1878-
input: bgInputTokens,
1879-
output: bgOutputTokens,
1880-
cacheWrite: bgCacheWriteTokens,
1881-
cacheRead: bgCacheReadTokens,
1882-
total: bgTotalCost,
1883-
},
1884-
lastApiReqIndex,
1885-
)
1886-
}
1887-
}
1888-
}
1889-
1890-
// Start the background task and handle any errors
1891-
drainStreamInBackgroundToFindAllUsage(lastApiReqIndex).catch((error) => {
1892-
console.error("Background usage collection failed:", error)
1893-
})
18941736
} catch (error) {
18951737
// Abandoned happens when extension is no longer waiting for the
18961738
// Cline instance to finish aborting (error is thrown here when
@@ -1924,6 +1766,24 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
19241766
this.isStreaming = false
19251767
}
19261768

1769+
if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
1770+
TelemetryService.instance.captureLlmCompletion(this.taskId, {
1771+
inputTokens,
1772+
outputTokens,
1773+
cacheWriteTokens,
1774+
cacheReadTokens,
1775+
cost:
1776+
totalCost ??
1777+
calculateApiCostAnthropic(
1778+
this.api.getModel().info,
1779+
inputTokens,
1780+
outputTokens,
1781+
cacheWriteTokens,
1782+
cacheReadTokens,
1783+
),
1784+
})
1785+
}
1786+
19271787
// Need to call here in case the stream was aborted.
19281788
if (this.abort || this.abandoned) {
19291789
throw new Error(`[RooCode#recursivelyMakeRooRequests] task ${this.taskId}.${this.instanceId} aborted`)
@@ -1960,10 +1820,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
19601820
presentAssistantMessage(this)
19611821
}
19621822

1963-
// Note: updateApiReqMsg() is now called from within drainStreamInBackgroundToFindAllUsage
1964-
// to ensure usage data is captured even when the stream is interrupted. The background task
1965-
// uses local variables to accumulate usage data before atomically updating the shared state.
19661823
await this.persistGpt5Metadata(reasoningMessage)
1824+
1825+
updateApiReqMsg()
19671826
await this.saveClineMessages()
19681827
await this.providerRef.deref()?.postStateToWebview()
19691828

0 commit comments

Comments
 (0)