Skip to content

Commit df99aa5

Browse files
committed
fix: add timeout and recovery mechanisms for resource access
- Implement 40-second timeout with retry logic for resource access - Add graceful error handling for missing API conversation history - Implement exponential backoff for retries (2s initial, 6s max) - Add recovery methods for safely retrieving saved messages - Handle empty API conversation history without throwing errors - Add comprehensive tests for recovery mechanisms Fixes #7641
1 parent c25cfde commit df99aa5

File tree

2 files changed

+497
-4
lines changed

2 files changed

+497
-4
lines changed

src/core/task/Task.ts

Lines changed: 128 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes
119119
const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds
120120
const FORCED_CONTEXT_REDUCTION_PERCENT = 75 // Keep 75% of context (remove 25%) on context window errors
121121
const MAX_CONTEXT_WINDOW_RETRIES = 3 // Maximum retries for context window errors
122+
const RESOURCE_ACCESS_TIMEOUT_MS = 40000 // 40 seconds timeout for resource access
123+
const RESOURCE_ACCESS_RETRY_DELAY_MS = 2000 // 2 seconds initial retry delay
124+
const RESOURCE_ACCESS_MAX_RETRY_DELAY_MS = 6000 // 6 seconds max retry delay
122125

123126
export interface TaskOptions extends CreateTaskOptions {
124127
provider: ClineProvider
@@ -1201,7 +1204,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
12011204
}
12021205
}
12031206

1204-
const modifiedClineMessages = await this.getSavedClineMessages()
1207+
// Get saved messages with recovery mechanism
1208+
const modifiedClineMessages = await this.getSavedClineMessagesWithRecovery()
12051209

12061210
// Check for any stored GPT-5 response IDs in the message history.
12071211
const gpt5Messages = modifiedClineMessages.filter(
@@ -1254,7 +1258,8 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
12541258
// task, and it was because we were waiting for resume).
12551259
// This is important in case the user deletes messages without resuming
12561260
// the task first.
1257-
this.apiConversationHistory = await this.getSavedApiConversationHistory()
1261+
// Get API conversation history with recovery mechanism
1262+
this.apiConversationHistory = await this.getSavedApiConversationHistoryWithRecovery()
12581263

12591264
const lastClineMessage = this.clineMessages
12601265
.slice()
@@ -1283,7 +1288,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
12831288

12841289
// Make sure that the api conversation history can be resumed by the API,
12851290
// even if it goes out of sync with cline messages.
1286-
let existingApiConversationHistory: ApiMessage[] = await this.getSavedApiConversationHistory()
1291+
let existingApiConversationHistory: ApiMessage[] = await this.getSavedApiConversationHistoryWithRecovery()
12871292

12881293
// v2.0 xml tags refactor caveat: since we don't use tools anymore, we need to replace all tool use blocks with a text block since the API disallows conversations with tool uses and no tool schema
12891294
const conversationWithoutToolBlocks = existingApiConversationHistory.map((message) => {
@@ -1399,7 +1404,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
13991404
throw new Error("Unexpected: Last message is not a user or assistant message")
14001405
}
14011406
} else {
1402-
throw new Error("Unexpected: No existing API conversation history")
1407+
// Handle case where there's no existing API conversation history gracefully
1408+
// This can happen when resuming a task that was interrupted before any API calls
1409+
console.warn(
1410+
`[Task#resumeTaskFromHistory] No existing API conversation history for task ${this.taskId}. Starting fresh.`,
1411+
)
1412+
modifiedApiConversationHistory = []
1413+
modifiedOldUserContent = []
14031414
}
14041415

14051416
let newUserContent: Anthropic.Messages.ContentBlockParam[] = [...modifiedOldUserContent]
@@ -2853,4 +2864,117 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
28532864
public get cwd() {
28542865
return this.workspacePath
28552866
}
2867+
2868+
/**
2869+
* Wraps an async operation with a timeout and retry mechanism.
2870+
* If the operation fails or times out, it will retry with exponential backoff.
2871+
* After 40 seconds total, it will mark the resource as lost and continue.
2872+
*
2873+
* @param promiseFactory A function that returns the promise to execute
2874+
* @param timeout Initial timeout in milliseconds (default 5000ms)
2875+
* @param errorMessage Error message to use if all retries fail
2876+
* @returns The result of the promise or a fallback value
2877+
*/
2878+
private async withTimeout<T>(
2879+
promiseFactory: () => Promise<T>,
2880+
timeout: number = 5000,
2881+
errorMessage: string = "Resource access failed",
2882+
): Promise<T | null> {
2883+
const startTime = Date.now()
2884+
let retryDelay = RESOURCE_ACCESS_RETRY_DELAY_MS
2885+
let attemptCount = 0
2886+
2887+
while (Date.now() - startTime < RESOURCE_ACCESS_TIMEOUT_MS) {
2888+
attemptCount++
2889+
2890+
try {
2891+
// Create a new promise instance for each attempt
2892+
const promise = promiseFactory()
2893+
2894+
// Create a timeout promise
2895+
const timeoutPromise = new Promise<never>((_, reject) => {
2896+
setTimeout(() => reject(new Error(`Timeout after ${timeout}ms`)), timeout)
2897+
})
2898+
2899+
// Race between the actual promise and the timeout
2900+
const result = await Promise.race([promise, timeoutPromise])
2901+
2902+
// If we get here, the operation succeeded
2903+
if (attemptCount > 1) {
2904+
console.log(`[Task#withTimeout] Resource access succeeded after ${attemptCount} attempts`)
2905+
}
2906+
return result as T
2907+
} catch (error) {
2908+
const elapsedTime = Date.now() - startTime
2909+
2910+
// Log the retry attempt
2911+
console.warn(
2912+
`[Task#withTimeout] Attempt ${attemptCount} failed after ${elapsedTime}ms: ${
2913+
error instanceof Error ? error.message : String(error)
2914+
}`,
2915+
)
2916+
2917+
// Check if we've exceeded the total timeout
2918+
if (elapsedTime >= RESOURCE_ACCESS_TIMEOUT_MS) {
2919+
console.error(
2920+
`[Task#withTimeout] Resource access failed after ${RESOURCE_ACCESS_TIMEOUT_MS}ms. ` +
2921+
`Marking resource as lost and continuing. Error: ${errorMessage}`,
2922+
)
2923+
2924+
// Return null to indicate failure
2925+
return null
2926+
}
2927+
2928+
// Wait before retrying with exponential backoff
2929+
await delay(retryDelay)
2930+
retryDelay = Math.min(retryDelay * 1.5, RESOURCE_ACCESS_MAX_RETRY_DELAY_MS)
2931+
}
2932+
}
2933+
2934+
// This should not be reached, but just in case
2935+
console.error(`[Task#withTimeout] ${errorMessage} - Unexpected timeout condition`)
2936+
return null
2937+
}
2938+
2939+
/**
2940+
* Safely retrieves saved messages with timeout and retry logic.
2941+
* Returns empty array if retrieval fails after timeout.
2942+
*/
2943+
private async getSavedClineMessagesWithRecovery(): Promise<ClineMessage[]> {
2944+
const result = await this.withTimeout(
2945+
() => this.getSavedClineMessages(),
2946+
5000,
2947+
"Failed to retrieve saved Cline messages",
2948+
)
2949+
2950+
if (result === null) {
2951+
console.warn(
2952+
`[Task#getSavedClineMessagesWithRecovery] Failed to retrieve messages for task ${this.taskId}. Using empty array.`,
2953+
)
2954+
return []
2955+
}
2956+
2957+
return result
2958+
}
2959+
2960+
/**
2961+
* Safely retrieves API conversation history with timeout and retry logic.
2962+
* Returns empty array if retrieval fails after timeout.
2963+
*/
2964+
private async getSavedApiConversationHistoryWithRecovery(): Promise<ApiMessage[]> {
2965+
const result = await this.withTimeout(
2966+
() => this.getSavedApiConversationHistory(),
2967+
5000,
2968+
"Failed to retrieve API conversation history",
2969+
)
2970+
2971+
if (result === null) {
2972+
console.warn(
2973+
`[Task#getSavedApiConversationHistoryWithRecovery] Failed to retrieve history for task ${this.taskId}. Using empty array.`,
2974+
)
2975+
return []
2976+
}
2977+
2978+
return result
2979+
}
28562980
}

0 commit comments

Comments
 (0)