@@ -11,6 +11,7 @@ import {
1111  exceptionEventEnhancer , 
1212  flattenAttributes , 
1313  internalErrorFromUnexpectedExit , 
14+   isManualOutOfMemoryError , 
1415  sanitizeError , 
1516  shouldRetryError , 
1617  taskRunErrorEnhancer , 
@@ -691,20 +692,38 @@ async function findAttempt(prismaClient: PrismaClientOrTransaction, friendlyId:
691692} 
692693
693694function  isOOMError ( error : TaskRunError )  { 
694-   if  ( error . type  !==  "INTERNAL_ERROR" )  return  false ; 
695-   if  ( error . code  ===  "TASK_PROCESS_OOM_KILLED"  ||  error . code  ===  "TASK_PROCESS_MAYBE_OOM_KILLED" )  { 
696-     return  true ; 
695+   if  ( error . type  ===  "INTERNAL_ERROR" )  { 
696+     if  ( 
697+       error . code  ===  "TASK_PROCESS_OOM_KILLED"  || 
698+       error . code  ===  "TASK_PROCESS_MAYBE_OOM_KILLED" 
699+     )  { 
700+       return  true ; 
701+     } 
702+ 
703+     // For the purposes of retrying on a larger machine, we're going to treat this is an OOM error. 
704+     // This is what they look like if we're executing using k8s. They then get corrected later, but it's too late. 
705+     // {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."} 
706+     if  ( 
707+       error . code  ===  "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE"  && 
708+       error . message  && 
709+       error . message . includes ( "SIGKILL" )  && 
710+       error . message . includes ( "-1" ) 
711+     )  { 
712+       return  true ; 
713+     } 
714+   } 
715+ 
716+   if  ( error . type  ===  "BUILT_IN_ERROR" )  { 
717+     // ffmpeg also does weird stuff 
718+     // { "name": "Error", "type": "BUILT_IN_ERROR", "message": "ffmpeg was killed with signal SIGKILL" } 
719+     if  ( error . message  &&  error . message . includes ( "ffmpeg was killed with signal SIGKILL" ) )  { 
720+       return  true ; 
721+     } 
697722  } 
698723
699-   // For the purposes of retrying on a larger machine, we're going to treat this is an OOM error. 
700-   // This is what they look like if we're executing using k8s. They then get corrected later, but it's too late. 
701-   // {"code": "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE", "type": "INTERNAL_ERROR", "message": "Process exited with code -1 after signal SIGKILL."} 
702-   if  ( 
703-     error . code  ===  "TASK_PROCESS_EXITED_WITH_NON_ZERO_CODE"  && 
704-     error . message  && 
705-     error . message . includes ( "SIGKILL" )  && 
706-     error . message . includes ( "-1" ) 
707-   )  { 
724+   // Special `OutOfMemoryError` for doing a manual OOM kill. 
725+   // Useful if a native library does an OOM but doesn't actually crash the run and you want to manually 
726+   if  ( isManualOutOfMemoryError ( error ) )  { 
708727    return  true ; 
709728  } 
710729
0 commit comments