Skip to content

Commit 8798352

Browse files
committed
Super-warm starts in deployed tasks proof
1 parent 1a1e70a commit 8798352

File tree

4 files changed

+111
-51
lines changed

4 files changed

+111
-51
lines changed

apps/supervisor/src/util.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
import { isMacOS, isWindows } from "std-env";
22

3+
export function normalizeDockerHostUrl(url: string) {
4+
const $url = new URL(url);
5+
6+
if ($url.host === "localhost") {
7+
$url.host = getDockerHostDomain();
8+
}
9+
10+
return $url.toString();
11+
}
12+
313
export function getDockerHostDomain() {
414
return isMacOS || isWindows ? "host.docker.internal" : "localhost";
515
}

apps/supervisor/src/workloadManager/docker.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
type WorkloadManagerOptions,
66
} from "./types.js";
77
import { env } from "../env.js";
8-
import { getDockerHostDomain, getRunnerId } from "../util.js";
8+
import { getDockerHostDomain, getRunnerId, normalizeDockerHostUrl } from "../util.js";
99
import Docker from "dockerode";
1010
import { tryCatch } from "@trigger.dev/core";
1111

@@ -78,7 +78,7 @@ export class DockerWorkloadManager implements WorkloadManager {
7878
];
7979

8080
if (this.opts.warmStartUrl) {
81-
envVars.push(`TRIGGER_WARM_START_URL=${this.opts.warmStartUrl}`);
81+
envVars.push(`TRIGGER_WARM_START_URL=${normalizeDockerHostUrl(this.opts.warmStartUrl)}`);
8282
}
8383

8484
if (this.opts.metadataUrl) {

packages/cli-v3/src/entryPoints/managed/execution.ts

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ export class RunExecution {
115115
* Kills the current execution.
116116
*/
117117
public async kill({ exitExecution = true }: { exitExecution?: boolean } = {}) {
118-
await this.taskRunProcess?.kill("SIGKILL");
118+
if (this.taskRunProcess) {
119+
await this.taskRunProcessProvider.handleProcessAbort(this.taskRunProcess);
120+
}
119121

120122
if (exitExecution) {
121123
this.shutdown("kill");
@@ -135,13 +137,9 @@ export class RunExecution {
135137
throw new Error("prepareForExecution called after process was already created");
136138
}
137139

138-
this.taskRunProcess = this.taskRunProcessProvider.getProcess({
139-
taskRunEnv: opts.taskRunEnv,
140-
isWarmStart: true,
141-
});
142-
143-
// Attach event handlers to the process
144-
this.attachTaskRunProcessHandlers(this.taskRunProcess);
140+
// Store the environment for later use, don't create process yet
141+
// The process will be created when needed in executeRun
142+
this.currentTaskRunEnv = opts.taskRunEnv;
145143

146144
return this;
147145
}
@@ -182,15 +180,16 @@ export class RunExecution {
182180
}
183181

184182
/**
185-
* Returns true if no run has been started yet and the process is prepared for the next run.
183+
* Returns true if no run has been started yet and we're prepared for the next run.
186184
*/
187185
get canExecute(): boolean {
188186
// If we've ever had a run ID, this execution can't be reused
189187
if (this._runFriendlyId) {
190188
return false;
191189
}
192190

193-
return !!this.taskRunProcess?.isPreparedForNextRun;
191+
// We can execute if we have the task run environment ready
192+
return !!this.currentTaskRunEnv;
194193
}
195194

196195
/**
@@ -557,35 +556,35 @@ export class RunExecution {
557556
metrics: TaskRunExecutionMetrics;
558557
isWarmStart?: boolean;
559558
}) {
560-
// For immediate retries, we need to ensure the task run process is prepared for the next attempt
561-
if (
562-
this.runFriendlyId &&
563-
this.taskRunProcess &&
564-
!this.taskRunProcess.isPreparedForNextAttempt
565-
) {
566-
this.sendDebugLog("killing existing task run process before executing next attempt");
567-
await this.kill({ exitExecution: false }).catch(() => {});
568-
}
559+
const isImmediateRetry = !!this.runFriendlyId;
569560

570-
// To skip this step and eagerly create the task run process, run prepareForExecution first
571-
if (!this.taskRunProcess || !this.taskRunProcess.isPreparedForNextRun) {
572-
this.taskRunProcess = this.taskRunProcessProvider.getProcess({
573-
taskRunEnv: { ...envVars, TRIGGER_PROJECT_REF: execution.project.ref },
574-
isWarmStart,
575-
});
576-
this.attachTaskRunProcessHandlers(this.taskRunProcess);
561+
if (isImmediateRetry) {
562+
await this.taskRunProcessProvider.handleImmediateRetry();
577563
}
578564

565+
const taskRunEnv = this.currentTaskRunEnv ?? envVars;
566+
567+
this.taskRunProcess = await this.taskRunProcessProvider.getProcess({
568+
taskRunEnv: { ...taskRunEnv, TRIGGER_PROJECT_REF: execution.project.ref },
569+
isWarmStart,
570+
});
571+
572+
this.attachTaskRunProcessHandlers(this.taskRunProcess);
573+
579574
this.sendDebugLog("executing task run process", { runId: execution.run.id });
580575

581-
// Set up an abort handler that will cleanup the task run process
582-
this.executionAbortController.signal.addEventListener("abort", async () => {
576+
const abortHandler = async () => {
583577
this.sendDebugLog("execution aborted during task run, cleaning up process", {
584578
runId: execution.run.id,
585579
});
586580

587-
await this.taskRunProcess?.cleanup(true);
588-
});
581+
if (this.taskRunProcess) {
582+
await this.taskRunProcessProvider.handleProcessAbort(this.taskRunProcess);
583+
}
584+
};
585+
586+
// Set up an abort handler that will cleanup the task run process
587+
this.executionAbortController.signal.addEventListener("abort", abortHandler);
589588

590589
const completion = await this.taskRunProcess.execute(
591590
{
@@ -600,18 +599,19 @@ export class RunExecution {
600599
isWarmStart
601600
);
602601

602+
this.executionAbortController.signal.removeEventListener("abort", abortHandler);
603+
603604
// If we get here, the task completed normally
604605
this.sendDebugLog("completed run attempt", { attemptSuccess: completion.ok });
605606

606-
// Return the process to the provider for potential reuse
607-
this.taskRunProcessProvider.returnProcess(this.taskRunProcess);
608-
609-
// The execution has finished, so we can cleanup the task run process if not being reused
610-
const [error] = await tryCatch(this.taskRunProcess.cleanup(true));
607+
// Return the process to the provider - this handles all cleanup logic
608+
const [returnError] = await tryCatch(
609+
this.taskRunProcessProvider.returnProcess(this.taskRunProcess)
610+
);
611611

612-
if (error) {
613-
this.sendDebugLog("failed to cleanup task run process, submitting completion anyway", {
614-
error: error.message,
612+
if (returnError) {
613+
this.sendDebugLog("failed to return task run process, submitting completion anyway", {
614+
error: returnError.message,
615615
});
616616
}
617617

packages/cli-v3/src/entryPoints/managed/taskRunProcessProvider.ts

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,26 @@ export class TaskRunProcessProvider {
3535
this.processKeepAliveMaxExecutionCount = opts.processKeepAliveMaxExecutionCount;
3636
}
3737

38+
async handleImmediateRetry(): Promise<void> {
39+
if (!this.processKeepAliveEnabled) {
40+
// For immediate retries, we need to ensure we have a clean process
41+
if (this.persistentProcess) {
42+
// If the process is not prepared for the next attempt, we need to get a fresh one
43+
if (!this.persistentProcess.isPreparedForNextAttempt) {
44+
this.sendDebugLog(
45+
"existing task run process not prepared for retry, will get fresh process"
46+
);
47+
await this.persistentProcess.kill("SIGKILL");
48+
this.persistentProcess = null;
49+
}
50+
}
51+
}
52+
}
53+
3854
/**
3955
* Gets a TaskRunProcess, either by reusing an existing one or creating a new one
4056
*/
41-
getProcess(opts: GetProcessOptions): TaskRunProcess {
57+
async getProcess(opts: GetProcessOptions): Promise<TaskRunProcess> {
4258
this.sendDebugLog("Getting TaskRunProcess", {
4359
processKeepAliveEnabled: this.processKeepAliveEnabled,
4460
hasPersistentProcess: !!this.persistentProcess,
@@ -47,16 +63,22 @@ export class TaskRunProcessProvider {
4763
isWarmStart: opts.isWarmStart,
4864
});
4965

66+
// If process keep-alive is disabled, always create a new process
67+
if (!this.processKeepAliveEnabled) {
68+
this.sendDebugLog("Creating new TaskRunProcess (keep-alive disabled)");
69+
return this.createTaskRunProcess(opts);
70+
}
71+
5072
// If process keep-alive is enabled and we have a healthy persistent process, reuse it
51-
if (this.processKeepAliveEnabled && this.shouldReusePersistentProcess()) {
73+
if (this.shouldReusePersistentProcess()) {
5274
this.sendDebugLog("Reusing persistent TaskRunProcess", {
5375
executionCount: this.executionCount,
5476
});
5577

5678
return this.persistentProcess!;
5779
}
5880

59-
// Create new process
81+
// Create new process (keep-alive enabled but no reusable process available)
6082
this.sendDebugLog("Creating new TaskRunProcess", {
6183
hadPersistentProcess: !!this.persistentProcess,
6284
reason: this.processKeepAliveEnabled
@@ -66,41 +88,69 @@ export class TaskRunProcessProvider {
6688

6789
// Clean up old persistent process if it exists
6890
if (this.persistentProcess) {
69-
this.cleanupPersistentProcess();
91+
await this.cleanupPersistentProcess();
7092
}
7193

7294
const newProcess = this.createTaskRunProcess(opts);
7395
return newProcess;
7496
}
7597

7698
/**
77-
* Returns a process after execution, handling keep-alive logic
99+
* Returns a process after execution, handling keep-alive logic and cleanup
78100
*/
79-
returnProcess(process: TaskRunProcess): void {
101+
async returnProcess(process: TaskRunProcess): Promise<void> {
102+
this.sendDebugLog("Returning TaskRunProcess", {
103+
processKeepAliveEnabled: this.processKeepAliveEnabled,
104+
executionCount: this.executionCount,
105+
maxExecutionCount: this.processKeepAliveMaxExecutionCount,
106+
});
107+
80108
if (!this.processKeepAliveEnabled) {
81-
this.sendDebugLog("Keep-alive disabled, not preserving process");
109+
// Keep-alive disabled - immediately cleanup the process
110+
this.sendDebugLog("Keep-alive disabled, cleaning up process immediately");
111+
await process.cleanup(true);
82112
return;
83113
}
84114

115+
// Keep-alive enabled - check if we should keep the process alive
85116
if (this.shouldKeepProcessAlive(process)) {
86117
this.sendDebugLog("Keeping TaskRunProcess alive for next run", {
87118
executionCount: this.executionCount,
88119
maxExecutionCount: this.processKeepAliveMaxExecutionCount,
89120
});
90121

122+
// Call cleanup(false) to prepare for next run but keep process alive
123+
await process.cleanup(false);
91124
this.persistentProcess = process;
92125
this.executionCount++;
93126
} else {
94-
this.sendDebugLog("Not keeping TaskRunProcess alive", {
127+
this.sendDebugLog("Not keeping TaskRunProcess alive, cleaning up", {
95128
executionCount: this.executionCount,
96129
maxExecutionCount: this.processKeepAliveMaxExecutionCount,
97130
isHealthy: this.isProcessHealthy(process),
98131
});
99132

100-
// Don't set as persistent, it will be cleaned up normally
133+
// Cleanup the process completely
134+
await process.cleanup(true);
101135
}
102136
}
103137

138+
/**
139+
* Handles process abort/kill scenarios
140+
*/
141+
async handleProcessAbort(process: TaskRunProcess): Promise<void> {
142+
this.sendDebugLog("Handling process abort");
143+
144+
// If this was our persistent process, clear it
145+
if (this.persistentProcess?.pid === process.pid) {
146+
this.persistentProcess = null;
147+
this.executionCount = 0;
148+
}
149+
150+
// Kill the process
151+
await process.cleanup(true);
152+
}
153+
104154
/**
105155
* Forces cleanup of any persistent process
106156
*/
@@ -173,12 +223,12 @@ export class TaskRunProcessProvider {
173223
return process.isPreparedForNextRun || process.isPreparedForNextAttempt;
174224
}
175225

176-
private cleanupPersistentProcess(): void {
226+
private async cleanupPersistentProcess(): Promise<void> {
177227
if (this.persistentProcess) {
178228
this.sendDebugLog("Cleaning up persistent TaskRunProcess");
179229

180230
// Don't await this - let it cleanup in the background
181-
this.persistentProcess.kill("SIGKILL").catch(() => {});
231+
await this.persistentProcess.kill("SIGKILL").catch(() => {});
182232
this.persistentProcess = null;
183233
this.executionCount = 0;
184234
}

0 commit comments

Comments
 (0)