Skip to content

Commit 03909fc

Browse files
committed
feat(rate-limit): better feedback in chat
1 parent 97f9686 commit 03909fc

File tree

24 files changed

+560
-25
lines changed

24 files changed

+560
-25
lines changed

packages/types/src/message.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,21 @@ export const contextCondenseSchema = z.object({
196196

197197
export type ContextCondense = z.infer<typeof contextCondenseSchema>
198198

199+
/**
200+
* RateLimitRetryMetadata
201+
*/
202+
export const rateLimitRetrySchema = z.object({
203+
type: z.literal("rate_limit_retry"),
204+
status: z.enum(["waiting", "retrying", "cancelled"]),
205+
remainingSeconds: z.number().optional(),
206+
attempt: z.number().optional(),
207+
maxAttempts: z.number().optional(),
208+
origin: z.enum(["pre_request", "retry_attempt"]).optional(),
209+
detail: z.string().optional(),
210+
})
211+
212+
export type RateLimitRetryMetadata = z.infer<typeof rateLimitRetrySchema>
213+
199214
/**
200215
* ClineMessage
201216
*/
@@ -225,6 +240,7 @@ export const clineMessageSchema = z.object({
225240
reasoning_summary: z.string().optional(),
226241
})
227242
.optional(),
243+
rateLimitRetry: rateLimitRetrySchema.optional(),
228244
})
229245
.optional(),
230246
})

src/core/task/Task.ts

Lines changed: 176 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds
120120
const FORCED_CONTEXT_REDUCTION_PERCENT = 75 // Keep 75% of context (remove 25%) on context window errors
121121
const MAX_CONTEXT_WINDOW_RETRIES = 3 // Maximum retries for context window errors
122122

123+
interface RateLimitRetryPayload {
124+
type: "rate_limit_retry"
125+
status: "waiting" | "retrying" | "cancelled"
126+
remainingSeconds?: number
127+
attempt?: number
128+
maxAttempts?: number
129+
origin: "pre_request" | "retry_attempt"
130+
detail?: string
131+
}
132+
123133
export interface TaskOptions extends CreateTaskOptions {
124134
provider: ClineProvider
125135
apiConfiguration: ProviderSettings
@@ -1073,8 +1083,12 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
10731083
if (partial !== undefined) {
10741084
const lastMessage = this.clineMessages.at(-1)
10751085

1086+
const isRateLimitUpdate = type === "api_req_retry_delayed" && options.metadata?.rateLimitRetry !== undefined
10761087
const isUpdatingPreviousPartial =
1077-
lastMessage && lastMessage.partial && lastMessage.type === "say" && lastMessage.say === type
1088+
lastMessage &&
1089+
lastMessage.type === "say" &&
1090+
lastMessage.say === type &&
1091+
(lastMessage.partial || isRateLimitUpdate)
10781092

10791093
if (partial) {
10801094
if (isUpdatingPreviousPartial) {
@@ -1083,6 +1097,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
10831097
lastMessage.images = images
10841098
lastMessage.partial = partial
10851099
lastMessage.progressStatus = progressStatus
1100+
if (options.metadata) {
1101+
const messageWithMetadata = lastMessage as ClineMessage & ClineMessageWithMetadata
1102+
if (!messageWithMetadata.metadata) {
1103+
messageWithMetadata.metadata = {}
1104+
}
1105+
Object.assign(messageWithMetadata.metadata, options.metadata)
1106+
}
10861107
this.updateClineMessage(lastMessage)
10871108
} else {
10881109
// This is a new partial message, so add it with partial state.
@@ -1170,6 +1191,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
11701191
images,
11711192
checkpoint,
11721193
contextCondense,
1194+
metadata: options.metadata,
11731195
})
11741196
}
11751197
}
@@ -2556,6 +2578,124 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
25562578

25572579
let rateLimitDelay = 0
25582580

2581+
const sendRateLimitUpdate = async (payload: RateLimitRetryPayload, isPartial: boolean): Promise<void> => {
2582+
await this.say("api_req_retry_delayed", undefined, undefined, isPartial, undefined, undefined, {
2583+
metadata: { rateLimitRetry: payload },
2584+
})
2585+
}
2586+
2587+
const runRateLimitCountdown = async ({
2588+
seconds,
2589+
origin,
2590+
attempt,
2591+
maxAttempts,
2592+
detail,
2593+
}: {
2594+
seconds: number
2595+
origin: RateLimitRetryPayload["origin"]
2596+
attempt?: number
2597+
maxAttempts?: number
2598+
detail?: string
2599+
}): Promise<boolean> => {
2600+
const normalizedSeconds = Math.max(0, Math.ceil(seconds))
2601+
2602+
if (normalizedSeconds <= 0) {
2603+
if (this.abort) {
2604+
await sendRateLimitUpdate(
2605+
{
2606+
type: "rate_limit_retry",
2607+
status: "cancelled",
2608+
remainingSeconds: 0,
2609+
attempt,
2610+
maxAttempts,
2611+
origin,
2612+
detail,
2613+
},
2614+
false,
2615+
)
2616+
return false
2617+
}
2618+
2619+
await sendRateLimitUpdate(
2620+
{
2621+
type: "rate_limit_retry",
2622+
status: "retrying",
2623+
remainingSeconds: 0,
2624+
attempt,
2625+
maxAttempts,
2626+
origin,
2627+
detail,
2628+
},
2629+
false,
2630+
)
2631+
return true
2632+
}
2633+
2634+
for (let i = normalizedSeconds; i > 0; i--) {
2635+
if (this.abort) {
2636+
await sendRateLimitUpdate(
2637+
{
2638+
type: "rate_limit_retry",
2639+
status: "cancelled",
2640+
remainingSeconds: i,
2641+
attempt,
2642+
maxAttempts,
2643+
origin,
2644+
detail,
2645+
},
2646+
false,
2647+
)
2648+
return false
2649+
}
2650+
2651+
await sendRateLimitUpdate(
2652+
{
2653+
type: "rate_limit_retry",
2654+
status: "waiting",
2655+
remainingSeconds: i,
2656+
attempt,
2657+
maxAttempts,
2658+
origin,
2659+
detail,
2660+
},
2661+
true,
2662+
)
2663+
2664+
await delay(1000)
2665+
}
2666+
2667+
if (this.abort) {
2668+
await sendRateLimitUpdate(
2669+
{
2670+
type: "rate_limit_retry",
2671+
status: "cancelled",
2672+
remainingSeconds: 0,
2673+
attempt,
2674+
maxAttempts,
2675+
origin,
2676+
detail,
2677+
},
2678+
false,
2679+
)
2680+
return false
2681+
}
2682+
2683+
await sendRateLimitUpdate(
2684+
{
2685+
type: "rate_limit_retry",
2686+
status: "retrying",
2687+
remainingSeconds: 0,
2688+
attempt,
2689+
maxAttempts,
2690+
origin,
2691+
detail,
2692+
},
2693+
false,
2694+
)
2695+
2696+
return true
2697+
}
2698+
25592699
// Use the shared timestamp so that subtasks respect the same rate-limit
25602700
// window as their parent tasks.
25612701
if (Task.lastGlobalApiRequestTime) {
@@ -2567,11 +2707,16 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
25672707

25682708
// Only show rate limiting message if we're not retrying. If retrying, we'll include the delay there.
25692709
if (rateLimitDelay > 0 && retryAttempt === 0) {
2570-
// Show countdown timer
2571-
for (let i = rateLimitDelay; i > 0; i--) {
2572-
const delayMessage = `Rate limiting for ${i} seconds...`
2573-
await this.say("api_req_retry_delayed", delayMessage, undefined, true)
2574-
await delay(1000)
2710+
const countdownCompleted = await runRateLimitCountdown({
2711+
seconds: rateLimitDelay,
2712+
origin: "pre_request",
2713+
attempt: 1,
2714+
})
2715+
2716+
if (!countdownCompleted) {
2717+
throw new Error(
2718+
`[RooCode#attemptApiRequest] task ${this.taskId}.${this.instanceId} aborted during pre-request rate limit wait`,
2719+
)
25752720
}
25762721
}
25772722

@@ -2723,7 +2868,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
27232868

27242869
// note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely.
27252870
if (autoApprovalEnabled && alwaysApproveResubmit) {
2726-
let errorMsg
2871+
let errorMsg: string
27272872

27282873
if (error.error?.metadata?.raw) {
27292874
errorMsg = JSON.stringify(error.error.metadata.raw, null, 2)
@@ -2755,24 +2900,33 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
27552900
// Wait for the greater of the exponential delay or the rate limit delay
27562901
const finalDelay = Math.max(exponentialDelay, rateLimitDelay)
27572902

2758-
// Show countdown timer with exponential backoff
2759-
for (let i = finalDelay; i > 0; i--) {
2760-
await this.say(
2761-
"api_req_retry_delayed",
2762-
`${errorMsg}\n\nRetry attempt ${retryAttempt + 1}\nRetrying in ${i} seconds...`,
2763-
undefined,
2764-
true,
2903+
const sanitizedDetail = (() => {
2904+
if (!errorMsg) {
2905+
return undefined
2906+
}
2907+
const firstLine = errorMsg
2908+
.split("\n")
2909+
.map((line) => line.trim())
2910+
.find((line) => line.length > 0)
2911+
if (!firstLine) {
2912+
return undefined
2913+
}
2914+
return firstLine.length > 160 ? `${firstLine.slice(0, 157)}…` : firstLine
2915+
})()
2916+
2917+
const countdownCompleted = await runRateLimitCountdown({
2918+
seconds: finalDelay,
2919+
origin: "retry_attempt",
2920+
attempt: retryAttempt + 2,
2921+
detail: sanitizedDetail,
2922+
})
2923+
2924+
if (!countdownCompleted) {
2925+
throw new Error(
2926+
`[RooCode#attemptApiRequest] task ${this.taskId}.${this.instanceId} aborted during rate limit retry wait`,
27652927
)
2766-
await delay(1000)
27672928
}
27682929

2769-
await this.say(
2770-
"api_req_retry_delayed",
2771-
`${errorMsg}\n\nRetry attempt ${retryAttempt + 1}\nRetrying now...`,
2772-
undefined,
2773-
false,
2774-
)
2775-
27762930
// Delegate generator output from the recursive call with
27772931
// incremented retry count.
27782932
yield* this.attemptApiRequest(retryAttempt + 1)

webview-ui/src/components/chat/ChatRow.tsx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ import CodebaseSearchResultsDisplay from "./CodebaseSearchResultsDisplay"
4343
import { appendImages } from "@src/utils/imageUtils"
4444
import { McpExecution } from "./McpExecution"
4545
import { ChatTextArea } from "./ChatTextArea"
46+
import { RateLimitRetryRow } from "./RateLimitRetryRow"
47+
export { RateLimitRetryRow } from "./RateLimitRetryRow"
4648
import { MAX_IMAGES_PER_MESSAGE } from "./ChatView"
4749
import { useSelectedModel } from "../ui/hooks/useSelectedModel"
4850
import {
@@ -264,7 +266,7 @@ export const ChatRowContent = ({
264266
<span style={{ color: successColor, fontWeight: "bold" }}>{t("chat:taskCompleted")}</span>,
265267
]
266268
case "api_req_retry_delayed":
267-
return []
269+
return [null, null]
268270
case "api_req_started":
269271
const getIconSpan = (iconName: string, color: string) => (
270272
<div
@@ -1237,6 +1239,11 @@ export const ChatRowContent = ({
12371239
</div>
12381240
</>
12391241
)
1242+
case "api_req_retry_delayed":
1243+
// Prevent multiple blocks returning, we only need a single block
1244+
// that's constantly updated
1245+
if (!isLast) return null
1246+
return <RateLimitRetryRow metadata={message.metadata?.rateLimitRetry} />
12401247
case "shell_integration_warning":
12411248
return <CommandExecutionError />
12421249
case "checkpoint_saved":

webview-ui/src/components/chat/ChatView.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,14 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
425425
// Don't want to reset since there could be a "say" after
426426
// an "ask" while ask is waiting for response.
427427
switch (lastMessage.say) {
428-
case "api_req_retry_delayed":
429-
setSendingDisabled(true)
428+
case "api_req_retry_delayed": {
429+
if (lastMessage.metadata?.rateLimitRetry?.status === "cancelled") {
430+
setSendingDisabled(false)
431+
} else {
432+
setSendingDisabled(true)
433+
}
430434
break
435+
}
431436
case "api_req_started":
432437
if (secondLastMessage?.ask === "command_output") {
433438
setSendingDisabled(true)

0 commit comments

Comments
 (0)