Skip to content

Commit 757cc21

Browse files
committed
decrease oom false positives
1 parent 5df1807 commit 757cc21

File tree

1 file changed

+101
-51
lines changed

1 file changed

+101
-51
lines changed

packages/core/src/v3/errors.ts

Lines changed: 101 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ export function shouldRetryError(error: TaskRunError): boolean {
147147
case "COULD_NOT_IMPORT_TASK":
148148
case "CONFIGURED_INCORRECTLY":
149149
case "TASK_ALREADY_RUNNING":
150-
case "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE":
151150
case "TASK_PROCESS_SIGKILL_TIMEOUT":
152151
case "TASK_PROCESS_SIGSEGV":
153152
case "TASK_PROCESS_SIGTERM":
@@ -168,6 +167,7 @@ export function shouldRetryError(error: TaskRunError): boolean {
168167
case "TASK_EXECUTION_FAILED":
169168
case "TASK_RUN_CRASHED":
170169
case "TASK_RUN_HEARTBEAT_TIMEOUT":
170+
case "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE":
171171
return true;
172172

173173
default:
@@ -418,39 +418,73 @@ const prettyInternalErrors: Partial<
418418
},
419419
};
420420

421+
const getPrettyTaskRunError = (code: TaskRunInternalError["code"]): TaskRunInternalError => {
422+
return {
423+
type: "INTERNAL_ERROR" as const,
424+
code,
425+
...prettyInternalErrors[code],
426+
};
427+
};
428+
429+
const getPrettyExceptionEvent = (code: TaskRunInternalError["code"]): ExceptionEventProperties => {
430+
return {
431+
type: code,
432+
...prettyInternalErrors[code],
433+
};
434+
};
435+
436+
const findSignalInMessage = (message?: string, truncateLength = 100) => {
437+
if (!message) {
438+
return;
439+
}
440+
441+
const trunc = truncateLength ? message.slice(0, truncateLength) : message;
442+
443+
if (trunc.includes("SIGTERM")) {
444+
return "SIGTERM";
445+
} else if (trunc.includes("SIGSEGV")) {
446+
return "SIGSEGV";
447+
} else if (trunc.includes("SIGKILL")) {
448+
return "SIGKILL";
449+
} else {
450+
return;
451+
}
452+
};
453+
421454
export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError<TaskRunError> {
422455
switch (error.type) {
423456
case "BUILT_IN_ERROR": {
424457
if (error.name === "UnexpectedExitError") {
425458
if (error.message.startsWith("Unexpected exit with code -1")) {
426-
if (error.message.includes("SIGTERM")) {
427-
return {
428-
type: "INTERNAL_ERROR",
429-
code: TaskRunErrorCodes.TASK_PROCESS_SIGTERM,
430-
...prettyInternalErrors.TASK_PROCESS_SIGTERM,
431-
};
432-
} else if (error.message.includes("SIGSEGV")) {
433-
return {
434-
type: "INTERNAL_ERROR",
435-
code: TaskRunErrorCodes.TASK_PROCESS_SIGSEGV,
436-
...prettyInternalErrors.TASK_PROCESS_SIGSEGV,
437-
};
459+
const signal = findSignalInMessage(error.stackTrace);
460+
461+
switch (signal) {
462+
case "SIGTERM":
463+
return {
464+
...getPrettyTaskRunError("TASK_PROCESS_SIGTERM"),
465+
};
466+
case "SIGSEGV":
467+
return {
468+
...getPrettyTaskRunError("TASK_PROCESS_SIGSEGV"),
469+
};
470+
case "SIGKILL":
471+
return {
472+
...getPrettyTaskRunError("TASK_PROCESS_MAYBE_OOM_KILLED"),
473+
};
474+
default:
475+
return {
476+
...getPrettyTaskRunError("TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE"),
477+
message: error.message,
478+
stackTrace: error.stackTrace,
479+
};
438480
}
439-
440-
return {
441-
type: "INTERNAL_ERROR",
442-
code: TaskRunErrorCodes.TASK_PROCESS_MAYBE_OOM_KILLED,
443-
...prettyInternalErrors.TASK_PROCESS_MAYBE_OOM_KILLED,
444-
};
445481
}
446482
}
447483

448484
if (error.name === "Error") {
449485
if (error.message === "ffmpeg was killed with signal SIGKILL") {
450486
return {
451-
type: "INTERNAL_ERROR",
452-
code: TaskRunErrorCodes.TASK_PROCESS_OOM_KILLED,
453-
...prettyInternalErrors.TASK_PROCESS_OOM_KILLED,
487+
...getPrettyTaskRunError("TASK_PROCESS_OOM_KILLED"),
454488
};
455489
}
456490
}
@@ -464,25 +498,29 @@ export function taskRunErrorEnhancer(error: TaskRunError): EnhanceError<TaskRunE
464498
}
465499
case "INTERNAL_ERROR": {
466500
if (error.code === TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE) {
467-
if (error.message?.includes("SIGTERM")) {
468-
return {
469-
type: "INTERNAL_ERROR",
470-
code: TaskRunErrorCodes.TASK_PROCESS_SIGTERM,
471-
...prettyInternalErrors.TASK_PROCESS_SIGTERM,
472-
};
473-
} else if (error.message?.includes("SIGSEGV")) {
474-
return {
475-
type: "INTERNAL_ERROR",
476-
code: TaskRunErrorCodes.TASK_PROCESS_SIGSEGV,
477-
...prettyInternalErrors.TASK_PROCESS_SIGSEGV,
478-
};
479-
}
501+
const signal = findSignalInMessage(error.message);
480502

481-
return {
482-
type: "INTERNAL_ERROR",
483-
code: TaskRunErrorCodes.TASK_PROCESS_MAYBE_OOM_KILLED,
484-
...prettyInternalErrors.TASK_PROCESS_MAYBE_OOM_KILLED,
485-
};
503+
switch (signal) {
504+
case "SIGTERM":
505+
return {
506+
...getPrettyTaskRunError("TASK_PROCESS_SIGTERM"),
507+
};
508+
case "SIGSEGV":
509+
return {
510+
...getPrettyTaskRunError("TASK_PROCESS_SIGSEGV"),
511+
};
512+
case "SIGKILL":
513+
return {
514+
...getPrettyTaskRunError("TASK_PROCESS_MAYBE_OOM_KILLED"),
515+
};
516+
default: {
517+
return {
518+
...getPrettyTaskRunError("TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE"),
519+
message: error.message,
520+
stackTrace: error.stackTrace,
521+
};
522+
}
523+
}
486524
}
487525

488526
const prettyError = prettyInternalErrors[error.code];
@@ -516,18 +554,30 @@ export function exceptionEventEnhancer(
516554
}
517555
case "Internal error": {
518556
if (exception.message?.startsWith(TaskRunErrorCodes.TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE)) {
519-
if (exception.message?.includes("SIGTERM")) {
520-
return {
521-
...exception,
522-
...prettyInternalErrors.TASK_PROCESS_SIGTERM,
523-
};
524-
}
557+
const signal = findSignalInMessage(exception.message);
525558

526-
return {
527-
...exception,
528-
...prettyInternalErrors.TASK_PROCESS_MAYBE_OOM_KILLED,
529-
type: TaskRunErrorCodes.TASK_PROCESS_MAYBE_OOM_KILLED,
530-
};
559+
switch (signal) {
560+
case "SIGTERM":
561+
return {
562+
...exception,
563+
...getPrettyExceptionEvent("TASK_PROCESS_SIGTERM"),
564+
};
565+
case "SIGSEGV":
566+
return {
567+
...exception,
568+
...getPrettyExceptionEvent("TASK_PROCESS_SIGSEGV"),
569+
};
570+
case "SIGKILL":
571+
return {
572+
...exception,
573+
...getPrettyExceptionEvent("TASK_PROCESS_MAYBE_OOM_KILLED"),
574+
};
575+
default:
576+
return {
577+
...exception,
578+
...getPrettyExceptionEvent("TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE"),
579+
};
580+
}
531581
}
532582
break;
533583
}

0 commit comments

Comments
 (0)