Skip to content

Commit 3ec881d

Browse files
authored
Fix aiAct execution/report misalignment in recorder flows (#2204)
* test(core): reproduce aiAct timeline mismatch * fix(core): isolate aiAct history per action * fix(report): sort executions by stable start time * fix(report): move execution sorting from sidebar view to store layer Sorting only in the sidebar tableData left replay-all, keyboard navigation (Cmd+Up/Down), and useAllCurrentTasks() consuming the unsorted raw executions array. This caused the player animation order to diverge from the sidebar when concurrent executions reordered by timestamp. Move sortExecutions() into setGroupedDump so that dump.executions is sorted once at the data entry point, making sidebar rendering, replay-all playback, and keyboard traversal all share the same chronological order. * fix(report): preserve execution array order in store * fix(report): revert store changes from pr
1 parent 5c65b1e commit 3ec881d

File tree

4 files changed

+153
-16
lines changed

4 files changed

+153
-16
lines changed

packages/core/src/agent/tasks.ts

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ export class TaskExecutor {
7373

7474
private readonly taskBuilder: TaskBuilder;
7575

76-
private conversationHistory: ConversationHistory;
77-
7876
onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];
7977

8078
private readonly hooks?: TaskExecutorHooks;
@@ -111,7 +109,6 @@ export class TaskExecutor {
111109
this.waitAfterAction = opts.waitAfterAction;
112110
this.useDeviceTimestamp = opts.useDeviceTimestamp;
113111
this.hooks = opts.hooks;
114-
this.conversationHistory = new ConversationHistory();
115112
this.providedActionSpace = opts.actionSpace;
116113
this.taskBuilder = new TaskBuilder({
117114
interfaceInstance,
@@ -295,7 +292,7 @@ export class TaskExecutor {
295292
| undefined
296293
>
297294
> {
298-
this.conversationHistory.reset();
295+
const conversationHistory = new ConversationHistory();
299296

300297
const session = this.createExecutionSession(
301298
taskTitleStr('Act', userPrompt),
@@ -324,12 +321,10 @@ export class TaskExecutor {
324321
}
325322

326323
// Get sub-goal status text if available
327-
const subGoalStatus =
328-
this.conversationHistory.subGoalsToText() || undefined;
324+
const subGoalStatus = conversationHistory.subGoalsToText() || undefined;
329325

330326
// Get memories text if available
331-
const memoriesStatus =
332-
this.conversationHistory.memoriesToText() || undefined;
327+
const memoriesStatus = conversationHistory.memoriesToText() || undefined;
333328

334329
const result = await session.appendAndRun(
335330
{
@@ -376,7 +371,7 @@ export class TaskExecutor {
376371
interfaceType: this.interface.interfaceType as InterfaceType,
377372
actionSpace,
378373
modelConfig: modelConfigForPlanning,
379-
conversationHistory: this.conversationHistory,
374+
conversationHistory,
380375
includeBbox: includeBboxInPlanning,
381376
imagesIncludeCount,
382377
deepThink,
@@ -479,24 +474,24 @@ export class TaskExecutor {
479474
)}`,
480475
);
481476
}
482-
if (this.conversationHistory.pendingFeedbackMessage) {
477+
if (conversationHistory.pendingFeedbackMessage) {
483478
console.warn(
484479
'unconsumed pending feedback message detected, this may lead to unexpected planning result:',
485-
this.conversationHistory.pendingFeedbackMessage,
480+
conversationHistory.pendingFeedbackMessage,
486481
);
487482
}
488483

489484
// Set initial time context for the first planning call
490485
const initialTimeString = await this.getTimeString();
491-
this.conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;
486+
conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;
492487

493488
try {
494489
await session.appendAndRun(executables.tasks);
495490
} catch (error: any) {
496491
// errorFlag = true;
497492
errorCountInOnePlanningLoop++;
498493
const timeString = await this.getTimeString();
499-
this.conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;
494+
conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;
500495
debug(
501496
'error when executing running tasks, but continue to run if it is not too many errors:',
502497
error instanceof Error ? error.message : String(error),
@@ -529,9 +524,9 @@ export class TaskExecutor {
529524
return session.appendErrorPlan(errorMsg);
530525
}
531526

532-
if (!this.conversationHistory.pendingFeedbackMessage) {
527+
if (!conversationHistory.pendingFeedbackMessage) {
533528
const timeString = await this.getTimeString();
534-
this.conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;
529+
conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;
535530
}
536531
}
537532

packages/core/src/task-runner.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ export class TaskRunner {
4747
| ((runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void)
4848
| undefined;
4949

50+
private readonly executionLogTime: number;
51+
5052
constructor(
5153
name: string,
5254
uiContextBuilder: () => Promise<UIContext>,
@@ -61,6 +63,7 @@ export class TaskRunner {
6163
this.onTaskStart = options?.onTaskStart;
6264
this.uiContextBuilder = uiContextBuilder;
6365
this.onTaskUpdate = options?.onTaskUpdate;
66+
this.executionLogTime = Date.now();
6467
}
6568

6669
private async emitOnTaskUpdate(error?: TaskExecutionError): Promise<void> {
@@ -380,7 +383,7 @@ export class TaskRunner {
380383

381384
dump(): ExecutionDump {
382385
return new ExecutionDump({
383-
logTime: Date.now(),
386+
logTime: this.executionLogTime,
384387
name: this.name,
385388
tasks: this.tasks,
386389
});
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import { TaskExecutor } from '@/agent/tasks';
2+
import type { AbstractInterface } from '@/device';
3+
import { ScreenshotItem } from '@/screenshot-item';
4+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
5+
import type Service from '../../src';
6+
7+
vi.mock('@/ai-model/llm-planning', () => ({
8+
plan: vi.fn(),
9+
}));
10+
11+
import { plan } from '@/ai-model/llm-planning';
12+
13+
const validBase64Image =
14+
'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
15+
16+
const createDeferred = () => {
17+
let resolve!: () => void;
18+
const promise = new Promise<void>((res) => {
19+
resolve = res;
20+
});
21+
22+
return { promise, resolve };
23+
};
24+
25+
describe('TaskExecutor concurrency isolation', () => {
26+
let taskExecutor: TaskExecutor;
27+
let mockInterface: AbstractInterface;
28+
let mockService: Service;
29+
30+
beforeEach(() => {
31+
mockInterface = {
32+
interfaceType: 'web',
33+
actionSpace: vi.fn().mockReturnValue([]),
34+
} as unknown as AbstractInterface;
35+
36+
mockService = {
37+
contextRetrieverFn: vi.fn().mockResolvedValue({
38+
screenshot: ScreenshotItem.create(validBase64Image, Date.now()),
39+
shotSize: { width: 1920, height: 1080 },
40+
shrunkShotToLogicalRatio: 1,
41+
tree: {
42+
id: 'root',
43+
attributes: {},
44+
children: [],
45+
},
46+
}),
47+
} as unknown as Service;
48+
49+
taskExecutor = new TaskExecutor(mockInterface, mockService, {
50+
replanningCycleLimit: 1,
51+
actionSpace: [],
52+
});
53+
54+
vi.spyOn(taskExecutor, 'convertPlanToExecutable').mockResolvedValue({
55+
tasks: [],
56+
yamlFlow: [],
57+
} as any);
58+
});
59+
60+
afterEach(() => {
61+
vi.restoreAllMocks();
62+
});
63+
64+
it('should isolate conversation history between concurrent action calls', async () => {
65+
const waitForBothCalls = createDeferred();
66+
const releasePlans = createDeferred();
67+
68+
const seenHistories: any[] = [];
69+
70+
vi.mocked(plan).mockImplementation(async (_instruction, opts: any) => {
71+
seenHistories.push(opts.conversationHistory);
72+
if (seenHistories.length === 2) {
73+
waitForBothCalls.resolve();
74+
}
75+
76+
if (seenHistories.length < 2) {
77+
await releasePlans.promise;
78+
}
79+
80+
return {
81+
actions: [],
82+
yamlFlow: [],
83+
shouldContinuePlanning: false,
84+
log: '',
85+
rawResponse: '',
86+
finalizeSuccess: true,
87+
finalizeMessage: 'done',
88+
};
89+
});
90+
91+
const actionPromiseA = taskExecutor.action(
92+
'first prompt',
93+
{ modelName: 'planning-model' } as any,
94+
{ modelName: 'default-model' } as any,
95+
true,
96+
);
97+
const actionPromiseB = taskExecutor.action(
98+
'second prompt',
99+
{ modelName: 'planning-model' } as any,
100+
{ modelName: 'default-model' } as any,
101+
true,
102+
);
103+
104+
await waitForBothCalls.promise;
105+
expect(seenHistories).toHaveLength(2);
106+
expect(seenHistories[0]).not.toBe(seenHistories[1]);
107+
108+
releasePlans.resolve();
109+
await Promise.all([actionPromiseA, actionPromiseB]);
110+
});
111+
});
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { ScreenshotItem } from '@/screenshot-item';
2+
import { TaskRunner } from '@/task-runner';
3+
import type { UIContext } from '@/types';
4+
import { describe, expect, it } from 'vitest';
5+
6+
const fakeUIContextBuilder = async () => {
7+
const screenshot = ScreenshotItem.create('', Date.now());
8+
return {
9+
screenshot,
10+
tree: { node: null, children: [] },
11+
shotSize: { width: 0, height: 0 },
12+
shrunkShotToLogicalRatio: 1,
13+
} as unknown as UIContext;
14+
};
15+
16+
describe('TaskRunner dump logTime stability', () => {
17+
it('should keep the same logTime across repeated dumps of the same runner', async () => {
18+
const runner = new TaskRunner('stable-log-time', fakeUIContextBuilder);
19+
20+
const firstDump = runner.dump();
21+
22+
await new Promise((resolve) => setTimeout(resolve, 10));
23+
24+
const secondDump = runner.dump();
25+
26+
expect(secondDump.logTime).toBe(firstDump.logTime);
27+
});
28+
});

0 commit comments

Comments
 (0)