Skip to content

Commit 00d32ed

Browse files
authored
feat(webapp): add support for running web services (api, engine, webapp) in cluster mode for better perf (#2472)
* feat(webapp): add support for running web services (api, engine, webapp) in cluster mode for better perf * cleaned up signal handling and resolved some valid 🐇 issues
1 parent a1e9738 commit 00d32ed

9 files changed

+317
-162
lines changed

apps/webapp/app/eventLoopMonitor.server.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { env } from "./env.server";
55
import { context, Context } from "@opentelemetry/api";
66
import { performance } from "node:perf_hooks";
77
import { logger } from "./services/logger.server";
8+
import { signalsEmitter } from "./services/signals.server";
89

910
const THRESHOLD_NS = env.EVENT_LOOP_MONITOR_THRESHOLD_MS * 1e6;
1011

@@ -110,6 +111,13 @@ function startEventLoopUtilizationMonitoring() {
110111
lastEventLoopUtilization = currentEventLoopUtilization;
111112
}, env.EVENT_LOOP_MONITOR_UTILIZATION_INTERVAL_MS);
112113

114+
signalsEmitter.on("SIGTERM", () => {
115+
clearInterval(interval);
116+
});
117+
signalsEmitter.on("SIGINT", () => {
118+
clearInterval(interval);
119+
});
120+
113121
return () => {
114122
clearInterval(interval);
115123
};

apps/webapp/app/services/realtime/relayRealtimeStreams.server.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { AuthenticatedEnvironment } from "../apiAuth.server";
22
import { logger } from "../logger.server";
3+
import { signalsEmitter } from "../signals.server";
34
import { StreamIngestor, StreamResponder } from "./types";
45
import { LineTransformStream } from "./utils.server";
56
import { v1RealtimeStreams } from "./v1StreamsGlobal.server";
@@ -243,12 +244,17 @@ export class RelayRealtimeStreams implements StreamIngestor, StreamResponder {
243244
}
244245

245246
function initializeRelayRealtimeStreams() {
246-
return new RelayRealtimeStreams({
247+
const service = new RelayRealtimeStreams({
247248
ttl: 1000 * 60 * 5, // 5 minutes
248249
cleanupInterval: 1000 * 60, // 1 minute
249250
fallbackIngestor: v1RealtimeStreams,
250251
fallbackResponder: v1RealtimeStreams,
251252
});
253+
254+
signalsEmitter.on("SIGTERM", service.close.bind(service));
255+
signalsEmitter.on("SIGINT", service.close.bind(service));
256+
257+
return service;
252258
}
253259

254260
export const relayRealtimeStreams = singleton(

apps/webapp/app/services/runsReplicationInstance.server.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import invariant from "tiny-invariant";
33
import { env } from "~/env.server";
44
import { singleton } from "~/utils/singleton";
55
import { provider } from "~/v3/tracer.server";
6-
import { logger } from "./logger.server";
76
import { RunsReplicationService } from "./runsReplicationService.server";
7+
import { signalsEmitter } from "./signals.server";
88

99
export const runsReplicationInstance = singleton(
1010
"runsReplicationInstance",
@@ -80,8 +80,8 @@ function initializeRunsReplicationInstance() {
8080
});
8181
});
8282

83-
process.on("SIGTERM", service.shutdown.bind(service));
84-
process.on("SIGINT", service.shutdown.bind(service));
83+
signalsEmitter.on("SIGTERM", service.shutdown.bind(service));
84+
signalsEmitter.on("SIGINT", service.shutdown.bind(service));
8585
}
8686

8787
return service;

apps/webapp/app/services/runsReplicationService.server.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ export class RunsReplicationService {
204204
}
205205

206206
public async shutdown() {
207+
if (this._isShuttingDown) return;
208+
207209
this._isShuttingDown = true;
208210

209211
this.logger.info("Initiating shutdown of runs replication service");
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { EventEmitter } from "events";
2+
import { singleton } from "~/utils/singleton";
3+
4+
export type SignalsEvents = {
5+
SIGTERM: [
6+
{
7+
time: Date;
8+
signal: NodeJS.Signals;
9+
}
10+
];
11+
SIGINT: [
12+
{
13+
time: Date;
14+
signal: NodeJS.Signals;
15+
}
16+
];
17+
};
18+
19+
export type SignalsEventArgs<T extends keyof SignalsEvents> = SignalsEvents[T];
20+
21+
export type SignalsEmitter = EventEmitter<SignalsEvents>;
22+
23+
function initializeSignalsEmitter() {
24+
const emitter = new EventEmitter<SignalsEvents>();
25+
26+
process.on("SIGTERM", () => emitter.emit("SIGTERM", { time: new Date(), signal: "SIGTERM" }));
27+
process.on("SIGINT", () => emitter.emit("SIGINT", { time: new Date(), signal: "SIGINT" }));
28+
29+
return emitter;
30+
}
31+
32+
export const signalsEmitter = singleton("signalsEmitter", initializeSignalsEmitter);

apps/webapp/app/v3/dynamicFlushScheduler.server.ts

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Logger } from "@trigger.dev/core/logger";
22
import { nanoid } from "nanoid";
33
import pLimit from "p-limit";
4+
import { signalsEmitter } from "~/services/signals.server";
45

56
export type DynamicFlushSchedulerConfig<T> = {
67
batchSize: number;
@@ -22,6 +23,7 @@ export class DynamicFlushScheduler<T> {
2223
private readonly BATCH_SIZE: number;
2324
private readonly FLUSH_INTERVAL: number;
2425
private flushTimer: NodeJS.Timeout | null;
26+
private metricsReporterTimer: NodeJS.Timeout | undefined;
2527
private readonly callback: (flushId: string, batch: T[]) => Promise<void>;
2628

2729
// New properties for dynamic scaling
@@ -41,6 +43,7 @@ export class DynamicFlushScheduler<T> {
4143
droppedEvents: 0,
4244
droppedEventsByKind: new Map<string, number>(),
4345
};
46+
private isShuttingDown: boolean = false;
4447

4548
// New properties for load shedding
4649
private readonly loadSheddingThreshold: number;
@@ -75,6 +78,7 @@ export class DynamicFlushScheduler<T> {
7578

7679
this.startFlushTimer();
7780
this.startMetricsReporter();
81+
this.setupShutdownHandlers();
7882
}
7983

8084
addToBatch(items: T[]): void {
@@ -119,8 +123,8 @@ export class DynamicFlushScheduler<T> {
119123
this.currentBatch.push(...itemsToAdd);
120124
this.totalQueuedItems += itemsToAdd.length;
121125

122-
// Check if we need to create a batch
123-
if (this.currentBatch.length >= this.currentBatchSize) {
126+
// Check if we need to create a batch (if we are shutting down, create a batch immediately because the flush timer is stopped)
127+
if (this.currentBatch.length >= this.currentBatchSize || this.isShuttingDown) {
124128
this.createBatch();
125129
}
126130

@@ -137,6 +141,23 @@ export class DynamicFlushScheduler<T> {
137141
this.resetFlushTimer();
138142
}
139143

144+
private setupShutdownHandlers(): void {
145+
signalsEmitter.on("SIGTERM", () =>
146+
this.shutdown().catch((error) => {
147+
this.logger.error("Error shutting down dynamic flush scheduler", {
148+
error,
149+
});
150+
})
151+
);
152+
signalsEmitter.on("SIGINT", () =>
153+
this.shutdown().catch((error) => {
154+
this.logger.error("Error shutting down dynamic flush scheduler", {
155+
error,
156+
});
157+
})
158+
);
159+
}
160+
140161
private startFlushTimer(): void {
141162
this.flushTimer = setInterval(() => this.checkAndFlush(), this.FLUSH_INTERVAL);
142163
}
@@ -145,6 +166,9 @@ export class DynamicFlushScheduler<T> {
145166
if (this.flushTimer) {
146167
clearInterval(this.flushTimer);
147168
}
169+
170+
if (this.isShuttingDown) return;
171+
148172
this.startFlushTimer();
149173
}
150174

@@ -226,7 +250,7 @@ export class DynamicFlushScheduler<T> {
226250
}
227251

228252
private lastConcurrencyAdjustment: number = Date.now();
229-
253+
230254
private adjustConcurrency(backOff: boolean = false): void {
231255
const currentConcurrency = this.limiter.concurrency;
232256
let newConcurrency = currentConcurrency;
@@ -281,7 +305,7 @@ export class DynamicFlushScheduler<T> {
281305

282306
private startMetricsReporter(): void {
283307
// Report metrics every 30 seconds
284-
setInterval(() => {
308+
this.metricsReporterTimer = setInterval(() => {
285309
const droppedByKind: Record<string, number> = {};
286310
this.metrics.droppedEventsByKind.forEach((count, kind) => {
287311
droppedByKind[kind] = count;
@@ -356,10 +380,18 @@ export class DynamicFlushScheduler<T> {
356380

357381
// Graceful shutdown
358382
async shutdown(): Promise<void> {
383+
if (this.isShuttingDown) return;
384+
385+
this.isShuttingDown = true;
386+
359387
if (this.flushTimer) {
360388
clearInterval(this.flushTimer);
361389
}
362390

391+
if (this.metricsReporterTimer) {
392+
clearInterval(this.metricsReporterTimer);
393+
}
394+
363395
// Flush any remaining items
364396
if (this.currentBatch.length > 0) {
365397
this.createBatch();

apps/webapp/app/v3/marqs/index.server.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import z from "zod";
2424
import { env } from "~/env.server";
2525
import { AuthenticatedEnvironment } from "~/services/apiAuth.server";
2626
import { logger } from "~/services/logger.server";
27+
import { signalsEmitter } from "~/services/signals.server";
2728
import { singleton } from "~/utils/singleton";
2829
import { legacyRunEngineWorker } from "../legacyRunEngineWorker.server";
2930
import { concurrencyTracker } from "../services/taskRunConcurrencyTracker.server";
@@ -112,6 +113,7 @@ export class MarQS {
112113
private queueDequeueCooloffPeriod: Map<string, number> = new Map();
113114
private queueDequeueCooloffCounts: Map<string, number> = new Map();
114115
private clearCooloffPeriodInterval: NodeJS.Timeout;
116+
isShuttingDown: boolean = false;
115117

116118
constructor(private readonly options: MarQSOptions) {
117119
this.redis = options.redis;
@@ -151,11 +153,14 @@ export class MarQS {
151153
}
152154

153155
#setupShutdownHandlers() {
154-
process.on("SIGTERM", () => this.shutdown("SIGTERM"));
155-
process.on("SIGINT", () => this.shutdown("SIGINT"));
156+
signalsEmitter.on("SIGTERM", () => this.shutdown("SIGTERM"));
157+
signalsEmitter.on("SIGINT", () => this.shutdown("SIGINT"));
156158
}
157159

158160
async shutdown(signal: NodeJS.Signals) {
161+
if (this.isShuttingDown) return;
162+
this.isShuttingDown = true;
163+
159164
console.log("👇 Shutting down marqs", this.name, signal);
160165
clearInterval(this.clearCooloffPeriodInterval);
161166
this.#rebalanceWorkers.forEach((worker) => worker.stop());

apps/webapp/app/v3/tracing.server.ts

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -41,33 +41,14 @@ export async function startSpanWithEnv<T>(
4141
fn: (span: Span) => Promise<T>,
4242
options?: SpanOptions
4343
): Promise<T> {
44-
return startSpan(
45-
tracer,
46-
name,
47-
async (span) => {
48-
try {
49-
return await fn(span);
50-
} catch (e) {
51-
if (e instanceof Error) {
52-
span.recordException(e);
53-
} else {
54-
span.recordException(new Error(String(e)));
55-
}
56-
57-
throw e;
58-
} finally {
59-
span.end();
60-
}
44+
return startSpan(tracer, name, fn, {
45+
...options,
46+
attributes: {
47+
...attributesFromAuthenticatedEnv(env),
48+
...options?.attributes,
6149
},
62-
{
63-
attributes: {
64-
...attributesFromAuthenticatedEnv(env),
65-
...options?.attributes,
66-
},
67-
kind: SpanKind.SERVER,
68-
...options,
69-
}
70-
);
50+
kind: SpanKind.SERVER,
51+
});
7152
}
7253

7354
export async function emitDebugLog(

0 commit comments

Comments
 (0)