Skip to content

Commit 363b9b1

Browse files
authored
fix(engine): set correct run status after requeue and snapshot status after dequeue (#2367)
* set correct run status on snapshot after dequeue * set run status back to PENDING when we requeue * remove retrying after failure status from v4 and fix tests * fix one last test
1 parent 51305b6 commit 363b9b1

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

internal-packages/run-engine/src/engine/systems/dequeueSystem.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ export class DequeueSystem {
385385
{
386386
run: {
387387
id: runId,
388-
status: snapshot.runStatus,
388+
status: lockedTaskRun.status,
389389
attemptNumber: lockedTaskRun.attemptNumber,
390390
},
391391
snapshot: {

internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -926,7 +926,6 @@ export class RunAttemptSystem {
926926
id: runId,
927927
},
928928
data: {
929-
status: "RETRYING_AFTER_FAILURE",
930929
machinePreset: retryResult.machine,
931930
},
932931
include: {
@@ -1136,7 +1135,7 @@ export class RunAttemptSystem {
11361135
const prisma = tx ?? this.$.prisma;
11371136

11381137
return await this.$.runLock.lock("tryNackAndRequeue", [run.id], async () => {
1139-
//we nack the message, this allows another work to pick up the run
1138+
//we nack the message, this allows another worker to pick up the run
11401139
const gotRequeued = await this.$.runQueue.nackMessage({
11411140
orgId,
11421141
messageId: run.id,
@@ -1152,8 +1151,22 @@ export class RunAttemptSystem {
11521151
return { wasRequeued: false, ...result };
11531152
}
11541153

1154+
const requeuedRun = await prisma.taskRun.update({
1155+
where: {
1156+
id: run.id,
1157+
},
1158+
data: {
1159+
status: "PENDING",
1160+
},
1161+
select: {
1162+
id: true,
1163+
status: true,
1164+
attemptNumber: true,
1165+
},
1166+
});
1167+
11551168
const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot(prisma, {
1156-
run: run,
1169+
run: requeuedRun,
11571170
snapshot: {
11581171
executionStatus: "QUEUED",
11591172
description: "Requeued the run after a failure",

internal-packages/run-engine/src/engine/tests/attemptFailures.test.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,15 @@ describe("RunEngine attempt failures", () => {
107107
});
108108
expect(result.attemptStatus).toBe("RETRY_IMMEDIATELY");
109109
expect(result.snapshot.executionStatus).toBe("EXECUTING");
110-
expect(result.run.status).toBe("RETRYING_AFTER_FAILURE");
110+
expect(result.run.status).toBe("EXECUTING");
111111

112112
//state should be pending
113113
const executionData3 = await engine.getRunExecutionData({ runId: run.id });
114114
assertNonNullable(executionData3);
115115
expect(executionData3.snapshot.executionStatus).toBe("EXECUTING");
116116
//only when the new attempt is created, should the attempt be increased
117117
expect(executionData3.run.attemptNumber).toBe(1);
118-
expect(executionData3.run.status).toBe("RETRYING_AFTER_FAILURE");
118+
expect(executionData3.run.status).toBe("EXECUTING");
119119

120120
//create a second attempt
121121
const attemptResult2 = await engine.startRunAttempt({
@@ -600,14 +600,14 @@ describe("RunEngine attempt failures", () => {
600600
// The run should be retried with a larger machine
601601
expect(result.attemptStatus).toBe("RETRY_QUEUED");
602602
expect(result.snapshot.executionStatus).toBe("QUEUED");
603-
expect(result.run.status).toBe("RETRYING_AFTER_FAILURE");
603+
expect(result.run.status).toBe("PENDING");
604604

605605
//state should be pending
606606
const executionData = await engine.getRunExecutionData({ runId: run.id });
607607
assertNonNullable(executionData);
608608
expect(executionData.snapshot.executionStatus).toBe("QUEUED");
609609
expect(executionData.run.attemptNumber).toBe(1);
610-
expect(executionData.run.status).toBe("RETRYING_AFTER_FAILURE");
610+
expect(executionData.run.status).toBe("PENDING");
611611

612612
//create a second attempt
613613
const attemptResult2 = await engine.startRunAttempt({
@@ -761,14 +761,14 @@ describe("RunEngine attempt failures", () => {
761761
// The run should be retried with a larger machine
762762
expect(result.attemptStatus).toBe("RETRY_QUEUED");
763763
expect(result.snapshot.executionStatus).toBe("QUEUED");
764-
expect(result.run.status).toBe("RETRYING_AFTER_FAILURE");
764+
expect(result.run.status).toBe("PENDING");
765765

766766
//state should be queued
767767
const executionData = await engine.getRunExecutionData({ runId: run.id });
768768
assertNonNullable(executionData);
769769
expect(executionData.snapshot.executionStatus).toBe("QUEUED");
770770
expect(executionData.run.attemptNumber).toBe(1);
771-
expect(executionData.run.status).toBe("RETRYING_AFTER_FAILURE");
771+
expect(executionData.run.status).toBe("PENDING");
772772

773773
await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id);
774774

internal-packages/run-engine/src/engine/tests/waitpoints.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,13 @@ describe("RunEngine Waitpoints", () => {
240240
expect(failResult.attemptStatus).toBe("RETRY_IMMEDIATELY");
241241
expect(failResult.snapshot.executionStatus).toBe("EXECUTING");
242242
expect(failResult.run.attemptNumber).toBe(1);
243-
expect(failResult.run.status).toBe("RETRYING_AFTER_FAILURE");
243+
expect(failResult.run.status).toBe("EXECUTING");
244244

245245
const executionData2 = await engine.getRunExecutionData({ runId: run.id });
246246
assertNonNullable(executionData2);
247247
expect(executionData2.snapshot.executionStatus).toBe("EXECUTING");
248248
expect(executionData2.run.attemptNumber).toBe(1);
249-
expect(executionData2.run.status).toBe("RETRYING_AFTER_FAILURE");
249+
expect(executionData2.run.status).toBe("EXECUTING");
250250
expect(executionData2.completedWaitpoints.length).toBe(0);
251251

252252
//check there are no waitpoints blocking the parent run

0 commit comments

Comments
 (0)