Skip to content

Commit c58b78f

Browse files
authored
Exit the process if status is finished and not in the middle of completing (#2378)
This solves an issue where a run is system failed from the platform and the worker is never shut down in the cluster.
1 parent a968c23 commit c58b78f

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

packages/cli-v3/src/entryPoints/managed/execution.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export class RunExecution {
7676
private isShuttingDown = false;
7777
private shutdownReason?: string;
7878

79+
private isCompletingRun = false;
80+
7981
private supervisorSocket: SupervisorSocket;
8082
private notifier?: RunNotifier;
8183
private metadataClient?: MetadataClient;
@@ -292,7 +294,13 @@ export class RunExecution {
292294
case "FINISHED": {
293295
this.sendDebugLog("run is finished", snapshotMetadata);
294296

295-
// This can sometimes be called before the handleCompletionResult, so we don't need to do anything here
297+
// We are finishing the run in handleCompletionResult, so we don't need to do anything here
298+
if (this.isCompletingRun) {
299+
this.sendDebugLog("run is finished but we're completing it, skipping", snapshotMetadata);
300+
return;
301+
}
302+
303+
await this.exitTaskRunProcessWithoutFailingRun({ flush: true, reason: "re-queued" });
296304
return;
297305
}
298306
case "QUEUED_EXECUTING":
@@ -377,6 +385,9 @@ export class RunExecution {
377385
throw new Error("Cannot start attempt: missing run or snapshot manager");
378386
}
379387

388+
// Reset this for the new attempt
389+
this.isCompletingRun = false;
390+
380391
this.sendDebugLog("starting attempt", { isWarmStart: String(isWarmStart) });
381392

382393
const attemptStartedAt = Date.now();
@@ -655,6 +666,8 @@ export class RunExecution {
655666
throw new Error("cannot complete run: missing run or snapshot manager");
656667
}
657668

669+
this.isCompletingRun = true;
670+
658671
const completionResult = await this.httpClient.completeRunAttempt(
659672
this.runFriendlyId,
660673
this.snapshotManager.snapshotId,

0 commit comments

Comments
 (0)