Skip to content

Commit 2bed1d2

Browse files
authored
Merge branch 'main' into fix/invite-email-case-insensitive
2 parents bb73f70 + 034058b commit 2bed1d2

3 files changed

Lines changed: 74 additions & 5 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
area: webapp
3+
type: improvement
4+
---
5+
6+
Emit metrics for task metadata cache resolution on the trigger path, surfacing the cache hit rate and how often a read replica returned empty for a row the primary had.

apps/webapp/app/runEngine/concerns/queues.server.ts

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ import { createCache, createLRUMemoryStore, DefaultStatefulContext, Namespace }
2020
import { singleton } from "~/utils/singleton";
2121
import type { TaskMetadataCache, TaskMetadataEntry } from "~/services/taskMetadataCache.server";
2222
import { taskMetadataCacheInstance } from "~/services/taskMetadataCacheInstance.server";
23+
import {
24+
recordTaskMetaResolve,
25+
type TaskMetaResolveSource,
26+
} from "~/services/taskMetadataCacheTelemetry.server";
2327

2428
// LRU cache for environment queue sizes to reduce Redis calls
2529
const queueSizeCache = singleton("queueSizeCache", () => {
@@ -266,7 +270,10 @@ export class DefaultQueueManager implements QueueManager {
266270
slug: string
267271
): Promise<TaskMetadataEntry | null> {
268272
const cached = await this.taskMetaCache.getByWorker(workerId, slug);
269-
if (cached) return cached;
273+
if (cached) {
274+
recordTaskMetaResolve("locked", "cache");
275+
return cached;
276+
}
270277

271278
// Cache miss. Read the row from the replica first; if the replica comes
272279
// back empty, re-check the writer before concluding the task is missing.
@@ -277,11 +284,13 @@ export class DefaultQueueManager implements QueueManager {
277284
// registered. The writer read only runs on this rare miss-then-empty path,
278285
// never on the hot path.
279286
let row = await this.findLockedTaskRow(this.replicaPrisma, workerId, environmentId, slug);
287+
let source: TaskMetaResolveSource = "replica";
280288

281289
if (!row && this.replicaPrisma !== this.prisma) {
282290
row = await this.findLockedTaskRow(this.prisma, workerId, environmentId, slug);
283291

284292
if (row) {
293+
source = "writer";
285294
logger.warn("Locked task metadata missing on replica but found on writer", {
286295
workerId,
287296
environmentId,
@@ -290,7 +299,12 @@ export class DefaultQueueManager implements QueueManager {
290299
}
291300
}
292301

293-
if (!row) return null;
302+
if (!row) {
303+
recordTaskMetaResolve("locked", "miss");
304+
return null;
305+
}
306+
307+
recordTaskMetaResolve("locked", source);
294308

295309
const entry: TaskMetadataEntry = {
296310
slug,
@@ -336,14 +350,20 @@ export class DefaultQueueManager implements QueueManager {
336350
slug: string
337351
): Promise<TaskMetadataEntry | null> {
338352
const cached = await this.taskMetaCache.getCurrent(environment.id, slug);
339-
if (cached) return cached;
353+
if (cached) {
354+
recordTaskMetaResolve("current", "cache");
355+
return cached;
356+
}
340357

341358
// Cold cache: discover the current worker for the env. Replica is fine —
342359
// the adjacent BackgroundWorkerTask lookup below uses `replicaPrisma` too
343360
// (replica lag for "just deployed" is bounded the same way for both
344361
// queries; reading from the writer here would only widen the window).
345362
const worker = await findCurrentWorkerFromEnvironment(environment, this.replicaPrisma);
346-
if (!worker) return null;
363+
if (!worker) {
364+
recordTaskMetaResolve("current", "miss");
365+
return null;
366+
}
347367

348368
const row = await this.replicaPrisma.backgroundWorkerTask.findFirst({
349369
where: { workerId: worker.id, runtimeEnvironmentId: environment.id, slug },
@@ -354,7 +374,12 @@ export class DefaultQueueManager implements QueueManager {
354374
},
355375
});
356376

357-
if (!row) return null;
377+
if (!row) {
378+
recordTaskMetaResolve("current", "miss");
379+
return null;
380+
}
381+
382+
recordTaskMetaResolve("current", "replica");
358383

359384
const entry: TaskMetadataEntry = {
360385
slug,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { getMeter } from "@internal/tracing";
2+
3+
const meter = getMeter("task-meta-cache");
4+
5+
/**
6+
* One counter for every task-metadata resolution on the trigger path, with two
7+
* bounded labels:
8+
*
9+
* path: "locked" - lockToVersion / triggerAndWait (reads the by-worker hash)
10+
* "current" - default trigger (reads the env hash)
11+
* source: where the metadata was resolved from:
12+
* "cache" - Redis hit (warm)
13+
* "replica" - cache miss, the read replica had the row
14+
* "writer" - cache miss + replica empty, the primary had the row
15+
* (i.e. the replica was stale for an existing row)
16+
* "miss" - not found anywhere (genuinely not registered)
17+
*
18+
* Derived signals:
19+
* cache / total -> cache hit rate (the inverse is coldness)
20+
* writer / total -> how often the replica returned empty for
21+
* a row the primary had
22+
*
23+
* No env / worker / slug labels: those are unbounded in production.
24+
*/
25+
const resolveCounter = meter.createCounter("task_meta_cache.resolve", {
26+
description:
27+
"Task metadata resolutions on the trigger path, by lookup path and the source that satisfied them",
28+
});
29+
30+
export type TaskMetaResolvePath = "locked" | "current";
31+
export type TaskMetaResolveSource = "cache" | "replica" | "writer" | "miss";
32+
33+
export function recordTaskMetaResolve(
34+
path: TaskMetaResolvePath,
35+
source: TaskMetaResolveSource
36+
): void {
37+
resolveCounter.add(1, { path, source });
38+
}

0 commit comments

Comments
 (0)