diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index 6bd818c96..e8163debb 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -370,6 +370,238 @@ describe("BackgroundManager.notifyParentSession - release ordering", () => { }) }) +describe("BackgroundManager.pollRunningTasks - stability detection", () => { + const MIN_STABILITY_TIME_MS = 10000 // Must match the constant in manager.ts + + /** + * Simulates the stability detection logic from pollRunningTasks. + * This mirrors the actual implementation to test the algorithm in isolation. + */ + function simulateStabilityCheck( + task: BackgroundTask, + currentMsgCount: number, + elapsedMs: number + ): { shouldComplete: boolean; stableCount: number } { + const progress = task.progress! + + if (progress.lastMsgCount === currentMsgCount && elapsedMs >= MIN_STABILITY_TIME_MS) { + progress.stableCount = (progress.stableCount ?? 0) + 1 + if (progress.stableCount >= 3) { + return { shouldComplete: true, stableCount: progress.stableCount } + } + } else { + progress.stableCount = 0 + } + progress.lastMsgCount = currentMsgCount + + return { shouldComplete: false, stableCount: progress.stableCount ?? 0 } + } + + test("should complete task after 3 stable polls with messages", () => { + // #given - task running for 15 seconds with stable message count + const task = createMockTask({ + id: "task-stable", + sessionID: "session-stable", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 15000), // 15 seconds ago + progress: { toolCalls: 5, lastUpdate: new Date() }, + }) + + // #when - simulate polls with same message count + // Note: First poll sets lastMsgCount, stability counting starts from second poll + const elapsedMs = 15000 + const msgCount = 10 + + // Poll 1 - sets lastMsgCount, stableCount stays 0 + let result = simulateStabilityCheck(task, msgCount, elapsedMs) + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(0) // First poll: lastMsgCount was undefined + + // Poll 2 - first stable poll + result = simulateStabilityCheck(task, msgCount, elapsedMs + 2000) + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(1) + + // Poll 3 - second stable poll + result = simulateStabilityCheck(task, msgCount, elapsedMs + 4000) + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(2) + + // Poll 4 - third stable poll + result = simulateStabilityCheck(task, msgCount, elapsedMs + 6000) + + // #then - task should complete after 3 stable polls + expect(result.shouldComplete).toBe(true) + expect(result.stableCount).toBe(3) + }) + + test("should not prematurely complete slow-starting tasks (before MIN_STABILITY_TIME_MS)", () => { + // #given - task just started (only 5 seconds elapsed, below 10s threshold) + const task = createMockTask({ + id: "task-new", + sessionID: "session-new", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 5000), // 5 seconds ago + progress: { toolCalls: 0, lastUpdate: new Date() }, + }) + + // #when - simulate 5 polls with 0 messages (task still initializing) + const elapsedMs = 5000 // Below MIN_STABILITY_TIME_MS threshold + const msgCount = 0 + + for (let i = 0; i < 5; i++) { + const result = simulateStabilityCheck(task, msgCount, elapsedMs) + // #then - should never complete because elapsed time is below threshold + expect(result.shouldComplete).toBe(false) + } + + // Stability counter should remain at 0 because elapsed < MIN_STABILITY_TIME_MS + expect(task.progress!.stableCount).toBe(0) + }) + + test("should handle tasks that complete with 0 messages after MIN_STABILITY_TIME_MS", () => { + // #given - task running for 15 seconds but no messages (error case or instant completion) + const task = createMockTask({ + id: "task-zero-msgs", + sessionID: "session-zero", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 15000), // 15 seconds ago + progress: { toolCalls: 0, lastUpdate: new Date() }, + }) + + // #when - simulate 4 polls with 0 messages after min time elapsed + // Note: First poll sets lastMsgCount, stability counting starts from second poll + const elapsedMs = 15000 + const msgCount = 0 + + // Poll 1 - sets lastMsgCount + let result = simulateStabilityCheck(task, msgCount, elapsedMs) + expect(result.stableCount).toBe(0) + + // Poll 2-4 - stable polls + result = simulateStabilityCheck(task, msgCount, elapsedMs + 2000) + expect(result.stableCount).toBe(1) + + result = simulateStabilityCheck(task, msgCount, elapsedMs + 4000) + expect(result.stableCount).toBe(2) + + result = simulateStabilityCheck(task, msgCount, elapsedMs + 6000) + + // #then - task should complete even with 0 messages (this was the bug fix!) + expect(result.shouldComplete).toBe(true) + expect(result.stableCount).toBe(3) + }) + + test("should reset stability counter when message count changes", () => { + // #given - task running for 20 seconds + const task = createMockTask({ + id: "task-active", + sessionID: "session-active", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 20000), // 20 seconds ago + progress: { toolCalls: 3, lastUpdate: new Date() }, + }) + + const elapsedMs = 20000 + + // #when - poll with 5 messages (first sets baseline), then 5 again, then 7 (changed) + // Poll 1 - sets lastMsgCount + simulateStabilityCheck(task, 5, elapsedMs) + expect(task.progress!.stableCount).toBe(0) + + // Poll 2 - stable + let result = simulateStabilityCheck(task, 5, elapsedMs + 2000) + expect(result.stableCount).toBe(1) + + // Poll 3 - Message count changes - new activity detected + result = simulateStabilityCheck(task, 7, elapsedMs + 4000) + + // #then - stability counter should reset to 0 + expect(result.stableCount).toBe(0) + expect(result.shouldComplete).toBe(false) + }) + + test("should require exactly 3 stable polls to complete", () => { + // #given + const task = createMockTask({ + id: "task-threshold", + sessionID: "session-threshold", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 20000), + progress: { toolCalls: 2, lastUpdate: new Date() }, + }) + + const elapsedMs = 20000 + const msgCount = 8 + + // #when - poll 3 times (first sets baseline, next 2 are stable but not enough) + // Poll 1 - sets lastMsgCount + let result = simulateStabilityCheck(task, msgCount, elapsedMs) + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(0) + + // Poll 2 - first stable + result = simulateStabilityCheck(task, msgCount, elapsedMs + 2000) + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(1) + + // Poll 3 - second stable + result = simulateStabilityCheck(task, msgCount, elapsedMs + 4000) + // #then - should NOT complete after only 2 stable polls + expect(result.shouldComplete).toBe(false) + expect(result.stableCount).toBe(2) + + // Poll 4 - third stable + result = simulateStabilityCheck(task, msgCount, elapsedMs + 6000) + + // Now it should complete + expect(result.shouldComplete).toBe(true) + expect(result.stableCount).toBe(3) + }) + + test("should handle edge case: stability resets then builds up again", () => { + // #given + const task = createMockTask({ + id: "task-intermittent", + sessionID: "session-intermittent", + parentSessionID: "session-parent", + startedAt: new Date(Date.now() - 30000), + progress: { toolCalls: 1, lastUpdate: new Date() }, + }) + + const elapsedMs = 30000 + + // #when - set baseline, build up to 2, then reset, then build to 3 + // Poll 1 - sets lastMsgCount + simulateStabilityCheck(task, 3, elapsedMs) + expect(task.progress!.stableCount).toBe(0) + + // Poll 2-3 - build stability + simulateStabilityCheck(task, 3, elapsedMs + 2000) + expect(task.progress!.stableCount).toBe(1) + + simulateStabilityCheck(task, 3, elapsedMs + 4000) + expect(task.progress!.stableCount).toBe(2) + + // Poll 4 - Activity detected - reset + simulateStabilityCheck(task, 5, elapsedMs + 6000) + expect(task.progress!.stableCount).toBe(0) + + // Poll 5-7 - Build up again (first sets baseline, next 3 are stable) + simulateStabilityCheck(task, 5, elapsedMs + 8000) + expect(task.progress!.stableCount).toBe(1) + + simulateStabilityCheck(task, 5, elapsedMs + 10000) + expect(task.progress!.stableCount).toBe(2) + + const result = simulateStabilityCheck(task, 5, elapsedMs + 12000) + + // #then - should complete after 3 new stable polls + expect(result.shouldComplete).toBe(true) + expect(result.stableCount).toBe(3) + }) +}) + describe("BackgroundManager.pruneStaleTasksAndNotifications", () => { let manager: MockBackgroundManager diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 87083aad4..1adab8bb3 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -343,43 +343,26 @@ export class BackgroundManager { }).catch(() => {}) } - const message = `[BACKGROUND TASK COMPLETED] Task "${task.description}" finished in ${duration}. Use background_output with task_id="${task.id}" to get results.` + log("[background-agent] Task completed silently:", { taskId: task.id, duration }) - log("[background-agent] Sending notification to parent session:", { parentSessionID: task.parentSessionID }) + // Release concurrency immediately but keep task in memory for output retrieval + if (task.model) { + this.concurrencyManager.release(task.model) + task.model = undefined // Prevent double-release by other handlers during retention + } + this.clearNotificationsForTask(task.id) + // Keep completed tasks for 5 minutes so background_output can retrieve results + // The pruneStaleTasksAndNotifications will clean up old tasks after TASK_TTL_MS const taskId = task.id - setTimeout(async () => { - if (task.model) { - this.concurrencyManager.release(task.model) - } - - try { - const messageDir = getMessageDir(task.parentSessionID) - const prevMessage = messageDir ? findNearestMessageWithFields(messageDir) : null - - const modelContext = task.parentModel ?? prevMessage?.model - const modelField = modelContext?.providerID && modelContext?.modelID - ? { providerID: modelContext.providerID, modelID: modelContext.modelID } - : undefined - - await this.client.session.prompt({ - path: { id: task.parentSessionID }, - body: { - agent: prevMessage?.agent, - model: modelField, - parts: [{ type: "text", text: message }], - }, - query: { directory: this.directory }, - }) - log("[background-agent] Successfully sent prompt to parent session:", { parentSessionID: task.parentSessionID }) - } catch (error) { - log("[background-agent] prompt failed:", String(error)) - } finally { - this.clearNotificationsForTask(taskId) + setTimeout(() => { + // Only delete if task still exists and is completed (not re-used) + const existingTask = this.tasks.get(taskId) + if (existingTask && existingTask.status === "completed") { this.tasks.delete(taskId) - log("[background-agent] Removed completed task from memory:", taskId) + log("[background-agent] Removed completed task from memory after retention:", taskId) } - }, 200) + }, 5 * 60 * 1000) // 5 minute retention for output retrieval } private formatDuration(start: Date, end?: Date): string { @@ -452,11 +435,13 @@ export class BackgroundManager { const sessionStatus = allStatuses[task.sessionID] if (!sessionStatus) { - log("[background-agent] Session not found in status:", task.sessionID) - continue + // Note: Background sessions may not appear in session.status() + // Don't skip - fall through to message-based stability detection + log("[background-agent] Session not found in status, checking via messages:", task.sessionID) + // Removed: early continue that skipped completion logic } - if (sessionStatus.type === "idle") { + if (sessionStatus?.type === "idle") { const hasIncompleteTodos = await this.checkSessionTodos(task.sessionID) if (hasIncompleteTodos) { log("[background-agent] Task has incomplete todos via polling, waiting:", task.id) @@ -504,6 +489,31 @@ export class BackgroundManager { if (!task.progress) { task.progress = { toolCalls: 0, lastUpdate: new Date() } } + + // Stability detection: if message count unchanged for 3 polls, consider complete + // Note: We intentionally allow currentMsgCount === 0 to handle edge cases like + // tasks that error immediately or complete synchronously before first poll. + // The minimum elapsed time check below prevents premature completion of slow-starting tasks. + const currentMsgCount = messages.length + const progress = task.progress! + const elapsedMs = Date.now() - task.startedAt.getTime() + const MIN_STABILITY_TIME_MS = 10000 // 10 seconds minimum before stability detection kicks in + + if (progress.lastMsgCount === currentMsgCount && elapsedMs >= MIN_STABILITY_TIME_MS) { + progress.stableCount = (progress.stableCount ?? 0) + 1 + if (progress.stableCount >= 3) { + log("[background-agent] Task completed via stability detection:", task.id) + task.status = "completed" + task.completedAt = new Date() + this.markForNotification(task) + this.notifyParentSession(task) + continue + } + } else { + progress.stableCount = 0 + } + progress.lastMsgCount = currentMsgCount + task.progress.toolCalls = toolCalls task.progress.lastTool = lastTool task.progress.lastUpdate = new Date() diff --git a/src/features/background-agent/types.ts b/src/features/background-agent/types.ts index 8a697a0e5..b7b050bc8 100644 --- a/src/features/background-agent/types.ts +++ b/src/features/background-agent/types.ts @@ -10,6 +10,8 @@ export interface TaskProgress { lastUpdate: Date lastMessage?: string lastMessageAt?: Date + stableCount?: number + lastMsgCount?: number } export interface BackgroundTask { diff --git a/src/tools/background-task/tools.ts b/src/tools/background-task/tools.ts index b9637e23a..c34290899 100644 --- a/src/tools/background-task/tools.ts +++ b/src/tools/background-task/tools.ts @@ -172,14 +172,8 @@ async function formatTaskResult(task: BackgroundTask, client: OpencodeClient): P return `Error fetching messages: ${messagesResult.error}` } - // Handle both SDK response structures: direct array or wrapped in .data - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const messages = ((messagesResult as any).data ?? messagesResult) as Array<{ - info?: { role?: string } - parts?: Array<{ type?: string; text?: string }> - }> - - if (!Array.isArray(messages) || messages.length === 0) { + const messages = messagesResult.data + if (!messages || messages.length === 0) { return `Task Result Task ID: ${task.id} @@ -192,11 +186,13 @@ Session ID: ${task.sessionID} (No messages found)` } - const assistantMessages = messages.filter( - (m) => m.info?.role === "assistant" - ) + // Match sync pattern: filter by role, sort by time descending, take first + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const lastAssistantMessage = (messages as any[]) + .filter((m) => m.info.role === "assistant") + .sort((a, b) => (b.info.time?.created || 0) - (a.info.time?.created || 0))[0] - if (assistantMessages.length === 0) { + if (!lastAssistantMessage) { return `Task Result Task ID: ${task.id} @@ -209,14 +205,11 @@ Session ID: ${task.sessionID} (No assistant response found)` } - const lastMessage = assistantMessages[assistantMessages.length - 1] - const textParts = lastMessage?.parts?.filter( - (p) => p.type === "text" - ) ?? [] - const textContent = textParts - .map((p) => p.text ?? "") - .filter((text) => text.length > 0) - .join("\n") + // Match sync pattern: direct property access, no optional chaining + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const textParts = lastAssistantMessage.parts.filter((p: any) => p.type === "text") + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const textContent = textParts.map((p: any) => p.text).join("\n") const duration = formatDuration(task.startedAt, task.completedAt)