Skip to content

Commit 53551e0

Browse files
committed
request worker exit on fatal errors
1 parent 2e8bc83 commit 53551e0

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

apps/webapp/app/v3/services/finalizeTaskRun.server.ts

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,17 @@ import { type Prisma, type TaskRun } from "@trigger.dev/database";
33
import { logger } from "~/services/logger.server";
44
import { marqs, sanitizeQueueName } from "~/v3/marqs/index.server";
55
import { generateFriendlyId } from "../friendlyIdentifiers";
6-
import { FINAL_ATTEMPT_STATUSES, isFailedRunStatus, type FINAL_RUN_STATUSES } from "../taskStatus";
6+
import {
7+
FINAL_ATTEMPT_STATUSES,
8+
isFailedRunStatus,
9+
isFatalRunStatus,
10+
type FINAL_RUN_STATUSES,
11+
} from "../taskStatus";
712
import { PerformTaskRunAlertsService } from "./alerts/performTaskRunAlerts.server";
813
import { BaseService } from "./baseService.server";
914
import { ResumeDependentParentsService } from "./resumeDependentParents.server";
1015
import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server";
16+
import { socketIo } from "../handleSocketIo.server";
1117

1218
type BaseInput = {
1319
id: string;
@@ -90,6 +96,42 @@ export class FinalizeTaskRunService extends BaseService {
9096
await PerformTaskRunAlertsService.enqueue(run.id, this._prisma);
9197
}
9298

99+
if (isFatalRunStatus(run.status)) {
100+
logger.error("FinalizeTaskRunService: Fatal status", { runId: run.id, status: run.status });
101+
102+
const extendedRun = await this._prisma.taskRun.findFirst({
103+
where: { id: run.id },
104+
select: {
105+
id: true,
106+
lockedToVersion: {
107+
select: {
108+
supportsLazyAttempts: true,
109+
},
110+
},
111+
runtimeEnvironment: {
112+
select: {
113+
type: true,
114+
},
115+
},
116+
},
117+
});
118+
119+
if (extendedRun && extendedRun.runtimeEnvironment.type !== "DEVELOPMENT") {
120+
logger.error("FinalizeTaskRunService: Fatal status, requesting worker exit", {
121+
runId: run.id,
122+
status: run.status,
123+
});
124+
125+
// Signal to exit any leftover containers
126+
socketIo.coordinatorNamespace.emit("REQUEST_RUN_CANCELLATION", {
127+
version: "v1",
128+
runId: run.id,
129+
// Give the run a few seconds to exit to complete any flushing etc
130+
delayInMs: extendedRun.lockedToVersion?.supportsLazyAttempts ? 5_000 : undefined,
131+
});
132+
}
133+
}
134+
93135
return run as Output<T>;
94136
}
95137

apps/webapp/app/v3/taskStatus.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ export const FAILED_RUN_STATUSES = [
5151

5252
export type FAILED_RUN_STATUSES = (typeof FAILED_RUN_STATUSES)[number];
5353

54+
export const FATAL_RUN_STATUSES = ["SYSTEM_FAILURE", "CRASHED"] satisfies TaskRunStatus[];
55+
56+
export type FATAL_RUN_STATUSES = (typeof FAILED_RUN_STATUSES)[number];
57+
5458
export const CANCELLABLE_RUN_STATUSES = NON_FINAL_RUN_STATUSES;
5559
export const CANCELLABLE_ATTEMPT_STATUSES = NON_FINAL_ATTEMPT_STATUSES;
5660

@@ -76,6 +80,10 @@ export function isFailedRunStatus(status: TaskRunStatus): boolean {
7680
return FAILED_RUN_STATUSES.includes(status);
7781
}
7882

83+
export function isFatalRunStatus(status: TaskRunStatus): boolean {
84+
return FATAL_RUN_STATUSES.includes(status);
85+
}
86+
7987
export function isCancellableRunStatus(status: TaskRunStatus): boolean {
8088
return CANCELLABLE_RUN_STATUSES.includes(status);
8189
}

0 commit comments

Comments
 (0)