Skip to content

Commit 2c16e39

Browse files
committed
Improve TaskRunProcess health detection so we don't try and reuse an unhealthy process
This was happening after the process was killed internally, like by an OOM error
1 parent 0c71dc7 commit 2c16e39

File tree

4 files changed

+58
-11
lines changed

4 files changed

+58
-11
lines changed

packages/cli-v3/src/dev/taskRunProcessPool.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,18 @@ export class TaskRunProcessPool {
208208

209209
private isProcessHealthy(process: TaskRunProcess): boolean {
210210
// Basic health checks - we can expand this later
211-
return !process.isBeingKilled && process.pid !== undefined;
211+
return process.isHealthy;
212212
}
213213

214214
private async killProcess(process: TaskRunProcess): Promise<void> {
215+
if (!process.isHealthy) {
216+
logger.debug("[TaskRunProcessPool] Process is not healthy, skipping cleanup", {
217+
processId: process.pid,
218+
});
219+
220+
return;
221+
}
222+
215223
try {
216224
await process.cleanup(true);
217225
} catch (error) {

packages/cli-v3/src/entryPoints/managed/taskRunProcessProvider.ts

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ export class TaskRunProcessProvider {
135135
this.sendDebugLog("Not keeping TaskRunProcess alive, cleaning up", {
136136
executionCount: this.executionCount,
137137
maxExecutionCount: this.processKeepAliveMaxExecutionCount,
138-
isHealthy: this.isProcessHealthy(process),
138+
isHealthy: process.isHealthy,
139139
});
140140

141141
// Cleanup the process completely
@@ -284,19 +284,12 @@ export class TaskRunProcessProvider {
284284
return (
285285
!!this.persistentProcess &&
286286
this.executionCount < this.processKeepAliveMaxExecutionCount &&
287-
this.isProcessHealthy(this.persistentProcess)
287+
this.persistentProcess.isHealthy
288288
);
289289
}
290290

291291
private shouldKeepProcessAlive(process: TaskRunProcess): boolean {
292-
return (
293-
this.executionCount < this.processKeepAliveMaxExecutionCount && this.isProcessHealthy(process)
294-
);
295-
}
296-
297-
private isProcessHealthy(process: TaskRunProcess): boolean {
298-
// Basic health check - TaskRunProcess will handle more detailed internal health checks
299-
return !process.isBeingKilled && process.pid !== undefined;
292+
return this.executionCount < this.processKeepAliveMaxExecutionCount && process.isHealthy;
300293
}
301294

302295
private async cleanupProcess(taskRunProcess: TaskRunProcess): Promise<void> {

packages/cli-v3/src/executions/taskRunProcess.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,10 +442,26 @@ export class TaskRunProcess {
442442
return this._isBeingKilled || this._child?.killed;
443443
}
444444

445+
get isBeingSuspended() {
446+
return this._isBeingSuspended;
447+
}
448+
445449
get pid() {
446450
return this._childPid;
447451
}
448452

453+
get isHealthy() {
454+
if (!this._child) {
455+
return false;
456+
}
457+
458+
if (this.isBeingKilled || this.isBeingSuspended) {
459+
return false;
460+
}
461+
462+
return this._child.connected;
463+
}
464+
449465
static parseExecuteError(error: unknown, dockerMode = true): TaskRunInternalError {
450466
if (error instanceof CancelledProcessError) {
451467
return {

references/hello-world/src/trigger/oom.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,33 @@ export const oomTask = task({
5555
}
5656
},
5757
});
58+
59+
export const oomTask2 = task({
60+
id: "oom-task-2",
61+
machine: "micro",
62+
run: async (payload: any, { ctx }) => {
63+
await runMemoryLeakScenario();
64+
},
65+
});
66+
67+
async function runMemoryLeakScenario() {
68+
console.log("🧠 Starting memory leak simulation");
69+
const memoryHogs = [];
70+
let iteration = 0;
71+
72+
while (iteration < 1000) {
73+
// Allocate large chunks of memory
74+
const bigArray = new Array(10000000).fill(`memory-leak-data-${iteration}`);
75+
memoryHogs.push(bigArray);
76+
77+
await setTimeout(200);
78+
iteration++;
79+
80+
const memUsage = process.memoryUsage();
81+
console.log(
82+
`🧠 Memory leak iteration ${iteration}, RSS: ${Math.round(memUsage.rss / 1024 / 1024)} MB`
83+
);
84+
}
85+
86+
console.log("🧠 Memory leak scenario completed");
87+
}

0 commit comments

Comments
 (0)