@@ -18,7 +18,15 @@ import {
1818 CheckpointClient ,
1919 isKubernetesEnvironment ,
2020} from "@trigger.dev/core/v3/serverOnly" ;
21- import { createK8sApi , RUNTIME_ENV } from "./clients/kubernetes.js" ;
21+ import { createK8sApi } from "./clients/kubernetes.js" ;
22+ import { collectDefaultMetrics } from "prom-client" ;
23+ import { register } from "./metrics.js" ;
24+ import { PodCleaner } from "./services/podCleaner.js" ;
25+ import { FailedPodHandler } from "./services/failedPodHandler.js" ;
26+
27+ if ( env . METRICS_COLLECT_DEFAULTS ) {
28+ collectDefaultMetrics ( { register } ) ;
29+ }
2230
2331class ManagedSupervisor {
2432 private readonly workerSession : SupervisorSession ;
@@ -29,6 +37,9 @@ class ManagedSupervisor {
2937 private readonly resourceMonitor : ResourceMonitor ;
3038 private readonly checkpointClient ?: CheckpointClient ;
3139
40+ private readonly podCleaner ?: PodCleaner ;
41+ private readonly failedPodHandler ?: FailedPodHandler ;
42+
3243 private readonly isKubernetes = isKubernetesEnvironment ( env . KUBERNETES_FORCE_ENABLED ) ;
3344 private readonly warmStartUrl = env . TRIGGER_WARM_START_URL ;
3445
@@ -37,6 +48,21 @@ class ManagedSupervisor {
3748 const workloadApiDomain = env . TRIGGER_WORKLOAD_API_DOMAIN ;
3849 const workloadApiPortExternal = env . TRIGGER_WORKLOAD_API_PORT_EXTERNAL ;
3950
51+ if ( env . POD_CLEANER_ENABLED ) {
52+ this . podCleaner = new PodCleaner ( {
53+ namespace : env . KUBERNETES_NAMESPACE ,
54+ batchSize : env . POD_CLEANER_BATCH_SIZE ,
55+ intervalMs : env . POD_CLEANER_INTERVAL_MS ,
56+ } ) ;
57+ }
58+
59+ if ( env . FAILED_POD_HANDLER_ENABLED ) {
60+ this . failedPodHandler = new FailedPodHandler ( {
61+ namespace : env . KUBERNETES_NAMESPACE ,
62+ reconnectIntervalMs : env . FAILED_POD_HANDLER_RECONNECT_INTERVAL_MS ,
63+ } ) ;
64+ }
65+
4066 if ( this . warmStartUrl ) {
4167 this . logger . log ( "[ManagedWorker] 🔥 Warm starts enabled" , {
4268 warmStartUrl : this . warmStartUrl ,
@@ -273,6 +299,14 @@ class ManagedSupervisor {
273299 async start ( ) {
274300 this . logger . log ( "[ManagedWorker] Starting up" ) ;
275301
302+ if ( this . podCleaner ) {
303+ await this . podCleaner . start ( ) ;
304+ }
305+
306+ if ( this . failedPodHandler ) {
307+ await this . failedPodHandler . start ( ) ;
308+ }
309+
276310 if ( env . TRIGGER_WORKLOAD_API_ENABLED ) {
277311 this . logger . log ( "[ManagedWorker] Workload API enabled" , {
278312 protocol : env . TRIGGER_WORKLOAD_API_PROTOCOL ,
@@ -292,6 +326,14 @@ class ManagedSupervisor {
292326 async stop ( ) {
293327 this . logger . log ( "[ManagedWorker] Shutting down" ) ;
294328 await this . httpServer . stop ( ) ;
329+
330+ if ( this . podCleaner ) {
331+ await this . podCleaner . stop ( ) ;
332+ }
333+
334+ if ( this . failedPodHandler ) {
335+ await this . failedPodHandler . stop ( ) ;
336+ }
295337 }
296338}
297339
0 commit comments