From c60985630242685537cae32a8e83287cf59ef9c7 Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 13 Mar 2025 00:54:05 -0700 Subject: [PATCH] Fix e2e tests --- e2e/src/suite/modes.test.ts | 46 +++++++++++------- e2e/src/suite/subtasks.test.ts | 17 ++++--- e2e/src/suite/utils.ts | 28 +++++++---- package-lock.json | 1 + src/core/Cline.ts | 62 ++++++++++++++--------- src/core/webview/ClineProvider.ts | 81 +++++++++++-------------------- src/exports/api.ts | 10 +++- src/exports/roo-code.d.ts | 11 ++++- 8 files changed, 143 insertions(+), 113 deletions(-) diff --git a/e2e/src/suite/modes.test.ts b/e2e/src/suite/modes.test.ts index a786513ec0..6e130efd54 100644 --- a/e2e/src/suite/modes.test.ts +++ b/e2e/src/suite/modes.test.ts @@ -1,34 +1,44 @@ import * as assert from "assert" -import { waitForMessage, getMessage } from "./utils" +import { getCompletion, getMessage, sleep, waitForCompletion, waitUntilAborted } from "./utils" suite("Roo Code Modes", () => { test("Should handle switching modes correctly", async function () { const api = globalThis.api - let prompt = - "For each mode (Code, Architect, Ask) respond with the mode name and what it specializes in after switching to that mode, do not start with the current mode, be sure to say 'I AM DONE' after the task is complete." + /** + * Switch modes. + */ + + const switchModesPrompt = + "For each mode (Code, Architect, Ask) respond with the mode name and what it specializes in after switching to that mode. " + + "Do not start with the current mode." await api.setConfiguration({ mode: "Code", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }) - let taskId = await api.startNewTask(prompt) - await waitForMessage({ api, taskId, include: "I AM DONE", exclude: "be sure to say", timeout: 300_000 }) + const switchModesTaskId = await api.startNewTask(switchModesPrompt) + await waitForCompletion({ api, taskId: switchModesTaskId, timeout: 60_000 }) - // Start grading portion of test to grade the response from 1 to 10. - prompt = `Given this prompt: ${prompt} grade the response from 1 to 10 in the format of "Grade: (1-10)": ${api - .getMessages(taskId) - .filter(({ type }) => type === "say") - .map(({ text }) => text ?? "") - .join("\n")}\nBe sure to say 'I AM DONE GRADING' after the task is complete.` + /** + * Grade the response. + */ - await api.setConfiguration({ mode: "Ask" }) - taskId = await api.startNewTask(prompt) - await waitForMessage({ api, taskId, include: "I AM DONE GRADING", exclude: "be sure to say" }) + const gradePrompt = + `Given this prompt: ${switchModesPrompt} grade the response from 1 to 10 in the format of "Grade: (1-10)": ` + + api + .getMessages(switchModesTaskId) + .filter(({ type }) => type === "say") + .map(({ text }) => text ?? "") + .join("\n") - const match = getMessage({ api, taskId, include: "Grade:", exclude: "Grade: (1-10)" })?.text?.match( - /Grade: (\d+)/, - ) + await api.setConfiguration({ mode: "Ask" }) + const gradeTaskId = await api.startNewTask(gradePrompt) + await waitForCompletion({ api, taskId: gradeTaskId, timeout: 60_000 }) + const completion = getCompletion({ api, taskId: gradeTaskId }) + const match = completion?.text?.match(/Grade: (\d+)/) const score = parseInt(match?.[1] ?? "0") - assert.ok(score >= 7 && score <= 10, "Grade must be between 7 and 10.") + assert.ok(score >= 7 && score <= 10, `Grade must be between 7 and 10 - ${completion?.text}`) + + await api.cancelCurrentTask() }) }) diff --git a/e2e/src/suite/subtasks.test.ts b/e2e/src/suite/subtasks.test.ts index a679f1661b..2a62197908 100644 --- a/e2e/src/suite/subtasks.test.ts +++ b/e2e/src/suite/subtasks.test.ts @@ -1,6 +1,6 @@ import * as assert from "assert" -import { sleep, waitForMessage, waitFor, getMessage } from "./utils" +import { sleep, waitFor, getMessage, waitForCompletion } from "./utils" suite("Roo Code Subtasks", () => { test("Should handle subtask cancellation and resumption correctly", async function () { @@ -23,16 +23,16 @@ suite("Roo Code Subtasks", () => { "After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.", ) - let subTaskId: string | undefined = undefined + let spawnedTaskId: string | undefined = undefined // Wait for the subtask to be spawned and then cancel it. - api.on("taskSpawned", (taskId) => (subTaskId = taskId)) - await waitFor(() => !!subTaskId) + api.on("taskSpawned", (_, childTaskId) => (spawnedTaskId = childTaskId)) + await waitFor(() => !!spawnedTaskId) await sleep(2_000) // Give the task a chance to start and populate the history. await api.cancelCurrentTask() // Wait a bit to ensure any task resumption would have happened. - await sleep(5_000) + await sleep(2_000) // The parent task should not have resumed yet, so we shouldn't see // "Parent task resumed". @@ -48,10 +48,10 @@ suite("Roo Code Subtasks", () => { // Start a new task with the same message as the subtask. const anotherTaskId = await api.startNewTask(childPrompt) - await waitForMessage({ taskId: anotherTaskId, api, include: "3" }) + await waitForCompletion({ api, taskId: anotherTaskId }) // Wait a bit to ensure any task resumption would have happened. - await sleep(5_000) + await sleep(2_000) // The parent task should still not have resumed. assert.ok( @@ -65,6 +65,7 @@ suite("Roo Code Subtasks", () => { ) // Clean up - cancel all tasks. - await api.cancelCurrentTask() + await api.clearCurrentTask() + await waitForCompletion({ api, taskId: parentTaskId }) }) }) diff --git a/e2e/src/suite/utils.ts b/e2e/src/suite/utils.ts index caaf158436..a84ddd814f 100644 --- a/e2e/src/suite/utils.ts +++ b/e2e/src/suite/utils.ts @@ -50,19 +50,27 @@ export const waitUntilReady = async ({ api, ...options }: WaitUntilReadyOptions) await waitFor(() => api.isReady(), options) } -type WaitForToolUseOptions = WaitUntilReadyOptions & { +type WaitUntilAbortedOptions = WaitForOptions & { + api: RooCodeAPI taskId: string - toolName: string } -export const waitForToolUse = async ({ api, taskId, toolName, ...options }: WaitForToolUseOptions) => - waitFor( - () => - api - .getMessages(taskId) - .some(({ type, say, text }) => type === "say" && say === "tool" && text && text.includes(toolName)), - options, - ) +export const waitUntilAborted = async ({ api, taskId, ...options }: WaitUntilAbortedOptions) => { + const set = new Set() + api.on("taskAborted", (taskId) => set.add(taskId)) + await waitFor(() => set.has(taskId), options) +} + +export const waitForCompletion = async ({ + api, + taskId, + ...options +}: WaitUntilReadyOptions & { + taskId: string +}) => waitFor(() => !!getCompletion({ api, taskId }), options) + +export const getCompletion = ({ api, taskId }: { api: RooCodeAPI; taskId: string }) => + api.getMessages(taskId).find(({ say, partial }) => say === "completion_result" && partial === false) type WaitForMessageOptions = WaitUntilReadyOptions & { taskId: string diff --git a/package-lock.json b/package-lock.json index 201497bfd4..1d9ebffe5d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12546,6 +12546,7 @@ "resolved": "https://registry.npmjs.org/npm-run-all/-/npm-run-all-4.1.5.tgz", "integrity": "sha512-Oo82gJDAVcaMdi3nuoKFavkIHBRVqQ1qvMb+9LHk/cF4P6B2m8aP04hGf7oL6wZ9BuGwX1onlLhpuoofSyoQDQ==", "dev": true, + "license": "MIT", "dependencies": { "ansi-styles": "^3.2.1", "chalk": "^2.4.1", diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 12a57b9812..c50d48b6cc 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -113,6 +113,7 @@ export type ClineOptions = { export class Cline extends EventEmitter { readonly taskId: string + readonly instanceId: string // Subtasks readonly rootTask: Cline | undefined = undefined @@ -196,6 +197,7 @@ export class Cline extends EventEmitter { }) this.taskId = historyItem ? historyItem.id : crypto.randomUUID() + this.instanceId = crypto.randomUUID().slice(0, 8) this.taskNumber = -1 this.apiConfiguration = apiConfiguration this.api = buildApiHandler(apiConfiguration) @@ -409,9 +411,11 @@ export class Cline extends EventEmitter { // simply removes the reference to this instance, but the instance is // still alive until this promise resolves or rejects.) if (this.abort) { - throw new Error(`Task: ${this.taskNumber} Roo Code instance aborted (#1)`) + throw new Error(`[Cline#ask] task ${this.taskId}.${this.instanceId} aborted`) } + let askTs: number + if (partial !== undefined) { const lastMessage = this.clineMessages.at(-1) const isUpdatingPreviousPartial = @@ -509,7 +513,7 @@ export class Cline extends EventEmitter { progressStatus?: ToolProgressStatus, ): Promise { if (this.abort) { - throw new Error(`Task: ${this.taskNumber} Roo Code instance aborted (#2)`) + throw new Error(`[Cline#say] task ${this.taskId}.${this.instanceId} aborted`) } if (partial !== undefined) { @@ -584,6 +588,9 @@ export class Cline extends EventEmitter { this.isInitialized = true let imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images) + + console.log(`[subtasks] task ${this.taskId}.${this.instanceId} starting`) + await this.initiateTaskLoop([ { type: "text", @@ -841,6 +848,9 @@ export class Cline extends EventEmitter { } await this.overwriteApiConversationHistory(modifiedApiConversationHistory) + + console.log(`[subtasks] task ${this.taskId}.${this.instanceId} resuming from history item`) + await this.initiateTaskLoop(newUserContent) } @@ -857,31 +867,36 @@ export class Cline extends EventEmitter { const didEndLoop = await this.recursivelyMakeClineRequests(nextUserContent, includeFileDetails) includeFileDetails = false // we only need file details the first time - // The way this agentic loop works is that cline will be given a task that he then calls tools to complete. unless there's an attempt_completion call, we keep responding back to him with his tool's responses until he either attempt_completion or does not use anymore tools. If he does not use anymore tools, we ask him to consider if he's completed the task and then call attempt_completion, otherwise proceed with completing the task. - // There is a MAX_REQUESTS_PER_TASK limit to prevent infinite requests, but Cline is prompted to finish the task as efficiently as he can. + // The way this agentic loop works is that cline will be given a + // task that he then calls tools to complete. Unless there's an + // attempt_completion call, we keep responding back to him with his + // tool's responses until he either attempt_completion or does not + // use anymore tools. If he does not use anymore tools, we ask him + // to consider if he's completed the task and then call + // attempt_completion, otherwise proceed with completing the task. + // There is a MAX_REQUESTS_PER_TASK limit to prevent infinite + // requests, but Cline is prompted to finish the task as efficiently + // as he can. - //const totalCost = this.calculateApiCostAnthropic(totalInputTokens, totalOutputTokens) if (didEndLoop) { - // For now a task never 'completes'. This will only happen if the user hits max requests and denies resetting the count. - //this.say("task_completed", `Task completed. Total API usage cost: ${totalCost}`) + // For now a task never 'completes'. This will only happen if + // the user hits max requests and denies resetting the count. break } else { - // this.say( - // "tool", - // "Cline responded with only text blocks but has not called attempt_completion yet. Forcing him to continue with task..." - // ) - nextUserContent = [ - { - type: "text", - text: formatResponse.noToolsUsed(), - }, - ] + nextUserContent = [{ type: "text", text: formatResponse.noToolsUsed() }] this.consecutiveMistakeCount++ } } } async abortTask(isAbandoned = false) { + // if (this.abort) { + // console.log(`[subtasks] already aborted task ${this.taskId}.${this.instanceId}`) + // return + // } + + console.log(`[subtasks] aborting task ${this.taskId}.${this.instanceId}`) + // Will stop any autonomously running promises. if (isAbandoned) { this.abandoned = true @@ -1237,7 +1252,7 @@ export class Cline extends EventEmitter { async presentAssistantMessage() { if (this.abort) { - throw new Error(`Task: ${this.taskNumber} Roo Code instance aborted (#3)`) + throw new Error(`[Cline#presentAssistantMessage] task ${this.taskId}.${this.instanceId} aborted`) } if (this.presentAssistantMessageLocked) { @@ -3113,7 +3128,7 @@ export class Cline extends EventEmitter { includeFileDetails: boolean = false, ): Promise { if (this.abort) { - throw new Error(`Task: ${this.taskNumber} Roo Code instance aborted (#4)`) + throw new Error(`[Cline#recursivelyMakeClineRequests] task ${this.taskId}.${this.instanceId} aborted`) } if (this.consecutiveMistakeCount >= 3) { @@ -3146,9 +3161,9 @@ export class Cline extends EventEmitter { const provider = this.providerRef.deref() if (this.isPaused && provider) { - provider.log(`[subtasks] paused ${this.taskId}`) + provider.log(`[subtasks] paused ${this.taskId}.${this.instanceId}`) await this.waitForResume() - provider.log(`[subtasks] resumed ${this.taskId}`) + provider.log(`[subtasks] resumed ${this.taskId}.${this.instanceId}`) const currentMode = (await provider.getState())?.mode ?? defaultModeSlug if (currentMode !== this.pausedModeSlug) { @@ -3159,7 +3174,7 @@ export class Cline extends EventEmitter { await delay(500) provider.log( - `[subtasks] task ${this.taskId} has switched back to '${this.pausedModeSlug}' from '${currentMode}'`, + `[subtasks] task ${this.taskId}.${this.instanceId} has switched back to '${this.pausedModeSlug}' from '${currentMode}'`, ) } } @@ -3279,6 +3294,7 @@ export class Cline extends EventEmitter { let assistantMessage = "" let reasoningMessage = "" this.isStreaming = true + try { for await (const chunk of stream) { if (!chunk) { @@ -3356,7 +3372,7 @@ export class Cline extends EventEmitter { // need to call here in case the stream was aborted if (this.abort || this.abandoned) { - throw new Error(`Task: ${this.taskNumber} Roo Code instance aborted (#5)`) + throw new Error(`[Cline#recursivelyMakeClineRequests] task ${this.taskId}.${this.instanceId} aborted`) } this.didCompleteReadingStream = true diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 6e669d94db..b98934400d 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -113,6 +113,8 @@ export class ClineProvider implements vscode.WebviewViewProvider { // The instance is pushed to the top of the stack (LIFO order). // When the task is completed, the top instance is removed, reactivating the previous task. async addClineToStack(cline: Cline) { + console.log(`[subtasks] adding task ${cline.taskId}.${cline.instanceId} to stack`) + // Add this cline instance into the stack that represents the order of all the called tasks. this.clineStack.push(cline) @@ -132,49 +134,24 @@ export class ClineProvider implements vscode.WebviewViewProvider { } // Pop the top Cline instance from the stack. - var clineToBeRemoved = this.clineStack.pop() + var cline = this.clineStack.pop() + + if (cline) { + console.log(`[subtasks] removing task ${cline.taskId}.${cline.instanceId} from stack`) - if (clineToBeRemoved) { try { // Abort the running task and set isAbandoned to true so // all running promises will exit as well. - await clineToBeRemoved.abortTask(true) + await cline.abortTask(true) } catch (e) { - this.log(`[subtasks] encountered error while aborting task ${clineToBeRemoved.taskId}: ${e.message}`) + this.log( + `[subtasks] encountered error while aborting task ${cline.taskId}.${cline.instanceId}: ${e.message}`, + ) } // Make sure no reference kept, once promises end it will be // garbage collected. - this.log(`[subtasks] task ${clineToBeRemoved.taskId} stopped`) - clineToBeRemoved = undefined - } - } - - // remove the cline object with the received clineId, and all the cline objects bove it in the stack - // for each cline object removed, pop it from the stack, abort the task and set it to undefined - async removeClineWithIdFromStack(clineId: string) { - try { - if (typeof clineId !== "string" || !clineId.trim()) { - throw new Error("Error Invalid clineId provided.") - } - - const index = this.clineStack.findIndex((c) => c.taskId === clineId) - - if (index === -1) { - this.log(`[subtasks] no task found with id ${clineId}`) - return - } - - for (let i = this.clineStack.length - 1; i >= index; i--) { - try { - await this.removeClineFromStack() - } catch (removalError) { - this.log(`[subtasks] error removing task at stack index ${i}: ${removalError.message}`) - } - } - } catch (error) { - this.log(`[subtasks] unexpected error in removeClineWithIdFromStack: ${error.message}`) - throw error + cline = undefined } } @@ -196,15 +173,11 @@ export class ClineProvider implements vscode.WebviewViewProvider { // and resume the previous task/cline instance (if it exists) // this is used when a sub task is finished and the parent task needs to be resumed async finishSubTask(lastMessage?: string) { - try { - // remove the last cline instance from the stack (this is the finished sub task) - await this.removeClineFromStack() - // resume the last cline instance in the stack (if it exists - this is the 'parnt' calling task) - this.getCurrentCline()?.resumePausedTask(lastMessage) - } catch (error) { - this.log(`Error in finishSubTask: ${error.message}`) - throw error - } + console.log(`[subtasks] finishing subtask ${lastMessage}`) + // remove the last cline instance from the stack (this is the finished sub task) + await this.removeClineFromStack() + // resume the last cline instance in the stack (if it exists - this is the 'parnt' calling task) + this.getCurrentCline()?.resumePausedTask(lastMessage) } /* @@ -483,7 +456,9 @@ export class ClineProvider implements vscode.WebviewViewProvider { }) await this.addClineToStack(cline) - this.log(`[subtasks] ${cline.parentTask ? "child" : "parent"} task ${cline.taskId} started`) + this.log( + `[subtasks] ${cline.parentTask ? "child" : "parent"} task ${cline.taskId}.${cline.instanceId} instantiated`, + ) return cline } @@ -546,7 +521,9 @@ export class ClineProvider implements vscode.WebviewViewProvider { }) await this.addClineToStack(cline) - this.log(`[subtasks] ${cline.parentTask ? "child" : "parent"} task ${cline.taskId} started`) + this.log( + `[subtasks] ${cline.parentTask ? "child" : "parent"} task ${cline.taskId}.${cline.instanceId} instantiated`, + ) return cline } @@ -2006,20 +1983,20 @@ export class ClineProvider implements vscode.WebviewViewProvider { } async cancelTask() { - const currentCline = this.getCurrentCline() + const cline = this.getCurrentCline() - if (!currentCline) { + if (!cline) { return } - console.log(`[subtasks] cancelling task ${currentCline.taskId}`) + console.log(`[subtasks] cancelling task ${cline.taskId}.${cline.instanceId}`) - const { historyItem } = await this.getTaskWithId(currentCline.taskId) + const { historyItem } = await this.getTaskWithId(cline.taskId) // Preserve parent and root task information for history item. - const rootTask = currentCline.rootTask - const parentTask = currentCline.parentTask + const rootTask = cline.rootTask + const parentTask = cline.parentTask - currentCline.abortTask() + cline.abortTask() await pWaitFor( () => diff --git a/src/exports/api.ts b/src/exports/api.ts index b66d50aa2d..91817fcc58 100644 --- a/src/exports/api.ts +++ b/src/exports/api.ts @@ -39,11 +39,19 @@ export class API extends EventEmitter implements RooCodeAPI { const cline = await this.provider.initClineWithTask(text, images) cline.on("message", (message) => this.emit("message", { taskId: cline.taskId, ...message })) - cline.on("taskSpawned", (taskId) => this.emit("taskSpawned", taskId)) + cline.on("taskStarted", () => this.emit("taskStarted", cline.taskId)) + cline.on("taskPaused", () => this.emit("taskPaused", cline.taskId)) + cline.on("taskUnpaused", () => this.emit("taskUnpaused", cline.taskId)) + cline.on("taskAborted", () => this.emit("taskAborted", cline.taskId)) + cline.on("taskSpawned", (taskId) => this.emit("taskSpawned", cline.taskId, taskId)) return cline.taskId } + public async clearCurrentTask(lastMessage?: string) { + await this.provider.finishSubTask(lastMessage) + } + public async cancelCurrentTask() { await this.provider.cancelTask() } diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 0007c106d8..e5338bd737 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -2,7 +2,11 @@ import { EventEmitter } from "events" export interface RooCodeEvents { message: [{ taskId: string; action: "created" | "updated"; message: ClineMessage }] - taskSpawned: [taskId: string] + taskStarted: [taskId: string] + taskPaused: [taskId: string] + taskUnpaused: [taskId: string] + taskAborted: [taskId: string] + taskSpawned: [taskId: string, childTaskId: string] } export interface RooCodeAPI extends EventEmitter { @@ -14,6 +18,11 @@ export interface RooCodeAPI extends EventEmitter { */ startNewTask(task?: string, images?: string[]): Promise + /** + * Clears the current task. + */ + clearCurrentTask(lastMessage?: string): Promise + /** * Cancels the current task. */