From 50fa94d7667474b134e179c2c92e06fea8811146 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 3 Sep 2025 10:31:38 +0100 Subject: [PATCH 1/2] perf(webapp): add event loop utilization metric --- apps/webapp/app/v3/tracer.server.ts | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/apps/webapp/app/v3/tracer.server.ts b/apps/webapp/app/v3/tracer.server.ts index d084f78a7f..71e14521e5 100644 --- a/apps/webapp/app/v3/tracer.server.ts +++ b/apps/webapp/app/v3/tracer.server.ts @@ -57,6 +57,7 @@ import { flattenAttributes } from "@trigger.dev/core/v3"; import { prisma } from "~/db.server"; import { metricsRegister } from "~/metrics.server"; import type { Prisma } from "@trigger.dev/database"; +import { performance } from "node:perf_hooks"; export const SEMINTATTRS_FORCE_RECORDING = "forceRecording"; @@ -602,10 +603,17 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) { description: "Event loop 99th percentile delay", unit: "s", }); + // ELU observable gauge (unit is a ratio, 0..1) + const eluGauge = meter.createObservableGauge("nodejs.event_loop.utilization", { + description: "Event loop utilization over the last collection interval", + unit: "1", // OpenTelemetry convention for ratios + }); // Get UV threadpool size (defaults to 4 if not set) const uvThreadpoolSize = parseInt(process.env.UV_THREADPOOL_SIZE || "4", 10); + let lastEventLoopUtilization = performance.eventLoopUtilization(); + // Single helper to read metrics from prom-client async function readNodeMetrics() { const metrics = await metricsRegister.getMetricsAsJSON(); @@ -648,6 +656,16 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) { } } + const currentEventLoopUtilization = performance.eventLoopUtilization(); + // Diff over [lastSnapshot, current] + const diff = performance.eventLoopUtilization( + currentEventLoopUtilization, + lastEventLoopUtilization + ); + + // diff.utilization is between 0 and 1 (fraction of time "active") + const utilization = Number.isFinite(diff.utilization) ? diff.utilization : 0; + return { threadpoolSize: uvThreadpoolSize, handlesByType, @@ -661,6 +679,7 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) { p50: eventLoopLagP50?.values?.[0]?.value ?? 0, p90: eventLoopLagP90?.values?.[0]?.value ?? 0, p99: eventLoopLagP99?.values?.[0]?.value ?? 0, + utilization, }, }; } @@ -698,6 +717,7 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) { res.observe(eventLoopLagP50Gauge, eventLoop.p50); res.observe(eventLoopLagP90Gauge, eventLoop.p90); res.observe(eventLoopLagP99Gauge, eventLoop.p99); + res.observe(eluGauge, eventLoop.utilization); }, [ uvThreadpoolSizeGauge, @@ -711,6 +731,7 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) { eventLoopLagP50Gauge, eventLoopLagP90Gauge, eventLoopLagP99Gauge, + eluGauge, ] ); } From 6d95fad20e0b222af13c9afcb4e9678c62d05154 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 3 Sep 2025 10:59:04 +0100 Subject: [PATCH 2/2] add event loop utilization logging as well --- apps/webapp/app/env.server.ts | 2 ++ apps/webapp/app/eventLoopMonitor.server.ts | 32 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 350463a5b0..059195d543 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -1167,6 +1167,8 @@ const EnvironmentSchema = z AI_RUN_FILTER_MODEL: z.string().optional(), EVENT_LOOP_MONITOR_THRESHOLD_MS: z.coerce.number().int().default(100), + EVENT_LOOP_MONITOR_UTILIZATION_INTERVAL_MS: z.coerce.number().int().default(1000), + EVENT_LOOP_MONITOR_UTILIZATION_SAMPLE_RATE: z.coerce.number().default(0.05), VERY_SLOW_QUERY_THRESHOLD_MS: z.coerce.number().int().optional(), }) diff --git a/apps/webapp/app/eventLoopMonitor.server.ts b/apps/webapp/app/eventLoopMonitor.server.ts index 42e982bdb9..1d8603a4af 100644 --- a/apps/webapp/app/eventLoopMonitor.server.ts +++ b/apps/webapp/app/eventLoopMonitor.server.ts @@ -3,6 +3,8 @@ import { singleton } from "./utils/singleton"; import { tracer } from "./v3/tracer.server"; import { env } from "./env.server"; import { context, Context } from "@opentelemetry/api"; +import { performance } from "node:perf_hooks"; +import { logger } from "./services/logger.server"; const THRESHOLD_NS = env.EVENT_LOOP_MONITOR_THRESHOLD_MS * 1e6; @@ -69,16 +71,46 @@ function after(asyncId: number) { export const eventLoopMonitor = singleton("eventLoopMonitor", () => { const hook = createHook({ init, before, after, destroy }); + let stopEventLoopUtilizationMonitoring: () => void; + return { enable: () => { console.log("🥸 Initializing event loop monitor"); hook.enable(); + + stopEventLoopUtilizationMonitoring = startEventLoopUtilizationMonitoring(); }, disable: () => { console.log("🥸 Disabling event loop monitor"); hook.disable(); + + stopEventLoopUtilizationMonitoring?.(); }, }; }); + +function startEventLoopUtilizationMonitoring() { + let lastEventLoopUtilization = performance.eventLoopUtilization(); + + const interval = setInterval(() => { + const currentEventLoopUtilization = performance.eventLoopUtilization(); + + const diff = performance.eventLoopUtilization( + currentEventLoopUtilization, + lastEventLoopUtilization + ); + const utilization = Number.isFinite(diff.utilization) ? diff.utilization : 0; + + if (Math.random() < env.EVENT_LOOP_MONITOR_UTILIZATION_SAMPLE_RATE) { + logger.info("nodejs.event_loop.utilization", { utilization }); + } + + lastEventLoopUtilization = currentEventLoopUtilization; + }, env.EVENT_LOOP_MONITOR_UTILIZATION_INTERVAL_MS); + + return () => { + clearInterval(interval); + }; +}