Skip to content

Commit fd43068

Browse files
WC-2695 Add better visibility for router-worker (#6941)
This commit instruments the router worker with analytics such as request time, colo metadata, error, etc, in order for us to have better visibility into the router worker. These changes were tested using gradual rollouts with a 0% version and a Cloudflare-Workers-Version-Overrides header.
1 parent 51aedd4 commit fd43068

File tree

6 files changed

+170
-2
lines changed

6 files changed

+170
-2
lines changed

.changeset/poor-shoes-tickle.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@cloudflare/workers-shared": minor
3+
---
4+
5+
feat: Add observability to router-worker
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import type { Environment, ReadyAnalytics } from "./types";
2+
3+
// This will allow us to make breaking changes to the analytic schema
4+
const VERSION = 1;
5+
6+
export enum DISPATCH_TYPE {
7+
ASSETS = "asset",
8+
WORKER = "worker",
9+
}
10+
11+
// When adding new columns please update the schema
12+
type Data = {
13+
// -- Doubles --
14+
// double1 - The time it takes for the whole request to complete in milliseconds
15+
requestTime?: number;
16+
// double2 - Colo ID
17+
coloId?: number;
18+
// double3 - Metal ID
19+
metalId?: number;
20+
// double4 - Colo tier (e.g. tier 1, tier 2, tier 3)
21+
coloTier?: number;
22+
23+
// -- Blobs --
24+
// blob1 - Hostname of the request
25+
hostname?: string;
26+
// blob2 - Dispatch type - what kind of thing did we dispatch
27+
dispatchtype?: DISPATCH_TYPE;
28+
// blob3 - Error message
29+
error?: string;
30+
// blob4 - The current version UUID of router-server
31+
version?: string;
32+
// blob5 - Region of the colo (e.g. WEUR)
33+
coloRegion?: string;
34+
};
35+
36+
export class Analytics {
37+
private data: Data = {};
38+
39+
setData(newData: Partial<Data>) {
40+
this.data = { ...this.data, ...newData };
41+
}
42+
43+
getData(key: keyof Data) {
44+
return this.data[key];
45+
}
46+
47+
write(env: Environment, readyAnalytics?: ReadyAnalytics, hostname?: string) {
48+
if (!readyAnalytics) {
49+
return;
50+
}
51+
52+
readyAnalytics.logEvent({
53+
version: VERSION,
54+
accountId: 0, // TODO: need to plumb through
55+
indexId: hostname,
56+
doubles: [
57+
this.data.requestTime ?? -1, // double1
58+
this.data.coloId ?? -1, // double2
59+
this.data.metalId ?? -1, // double3
60+
this.data.coloTier ?? -1, // double4
61+
],
62+
blobs: [
63+
this.data.hostname?.substring(0, 256), // blob1 - trim to 256 bytes
64+
this.data.dispatchtype, // blob2
65+
this.data.error?.substring(0, 256), // blob3 - trim to 256 bytes
66+
this.data.version, // blob4
67+
this.data.coloRegion, // blob5
68+
],
69+
});
70+
}
71+
}

packages/workers-shared/router-worker/src/index.ts

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
11
import { setupSentry } from "../../utils/sentry";
2+
import { Analytics, DISPATCH_TYPE } from "./analytics";
3+
import { PerformanceTimer } from "./performance";
24
import type AssetWorker from "../../asset-worker/src/index";
35
import type { RoutingConfig } from "../../utils/types";
6+
import type {
7+
ColoMetadata,
8+
Environment,
9+
ReadyAnalytics,
10+
UnsafePerformanceTimer,
11+
} from "./types";
412

513
interface Env {
614
ASSET_WORKER: Service<AssetWorker>;
715
USER_WORKER: Fetcher;
816
CONFIG: RoutingConfig;
917

1018
SENTRY_DSN: string;
19+
ENVIRONMENT: Environment;
20+
ANALYTICS: ReadyAnalytics;
21+
COLO_METADATA: ColoMetadata;
22+
UNSAFE_PERFORMANCE: UnsafePerformanceTimer;
23+
VERSION_METADATA: WorkerVersionMetadata;
1124

1225
SENTRY_ACCESS_CLIENT_ID: string;
1326
SENTRY_ACCESS_CLIENT_SECRET: string;
@@ -16,7 +29,9 @@ interface Env {
1629
export default {
1730
async fetch(request: Request, env: Env, ctx: ExecutionContext) {
1831
let sentry: ReturnType<typeof setupSentry> | undefined;
19-
const maybeSecondRequest = request.clone();
32+
const analytics = new Analytics();
33+
const performance = new PerformanceTimer(env.UNSAFE_PERFORMANCE);
34+
const startTimeMs = performance.now();
2035

2136
try {
2237
sentry = setupSentry(
@@ -27,21 +42,50 @@ export default {
2742
env.SENTRY_ACCESS_CLIENT_SECRET
2843
);
2944

45+
const url = new URL(request.url);
46+
if (sentry) {
47+
sentry.setUser({ username: url.hostname });
48+
sentry.setTag("colo", env.COLO_METADATA.coloId);
49+
sentry.setTag("metal", env.COLO_METADATA.metalId);
50+
}
51+
52+
if (env.COLO_METADATA && env.VERSION_METADATA) {
53+
analytics.setData({
54+
coloId: env.COLO_METADATA.coloId,
55+
metalId: env.COLO_METADATA.metalId,
56+
coloTier: env.COLO_METADATA.coloTier,
57+
coloRegion: env.COLO_METADATA.coloRegion,
58+
hostname: url.hostname,
59+
version: env.VERSION_METADATA.id,
60+
});
61+
}
62+
63+
const maybeSecondRequest = request.clone();
3064
if (env.CONFIG.has_user_worker) {
3165
if (await env.ASSET_WORKER.unstable_canFetch(request)) {
66+
analytics.setData({ dispatchtype: DISPATCH_TYPE.ASSETS });
3267
return await env.ASSET_WORKER.fetch(maybeSecondRequest);
3368
} else {
69+
analytics.setData({ dispatchtype: DISPATCH_TYPE.WORKER });
3470
return env.USER_WORKER.fetch(maybeSecondRequest);
3571
}
3672
}
3773

74+
analytics.setData({ dispatchtype: DISPATCH_TYPE.ASSETS });
3875
return await env.ASSET_WORKER.fetch(request);
3976
} catch (err) {
77+
if (err instanceof Error) {
78+
analytics.setData({ error: err.message });
79+
}
80+
4081
// Log to Sentry if we can
4182
if (sentry) {
4283
sentry.captureException(err);
4384
}
4485
throw err;
86+
} finally {
87+
analytics.setData({ requestTime: performance.now() - startTimeMs });
88+
analytics.write(env.ENVIRONMENT, env.ANALYTICS);
4589
}
4690
},
4791
};
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import type { UnsafePerformanceTimer } from "./types";
2+
3+
export class PerformanceTimer {
4+
private performanceTimer;
5+
6+
constructor(performanceTimer?: UnsafePerformanceTimer) {
7+
this.performanceTimer = performanceTimer;
8+
}
9+
10+
now() {
11+
if (this.performanceTimer) {
12+
return this.performanceTimer.timeOrigin + this.performanceTimer.now();
13+
}
14+
return Date.now();
15+
}
16+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
export type Environment = "production" | "staging";
2+
3+
export interface ReadyAnalytics {
4+
logEvent: (e: ReadyAnalyticsEvent) => void;
5+
}
6+
7+
export interface ColoMetadata {
8+
metalId: number;
9+
coloId: number;
10+
coloRegion: string;
11+
coloTier: number;
12+
}
13+
14+
export interface UnsafePerformanceTimer {
15+
readonly timeOrigin: number;
16+
now: () => number;
17+
}
18+
19+
export interface ReadyAnalyticsEvent {
20+
accountId?: number;
21+
indexId?: string;
22+
version?: number;
23+
doubles?: (number | undefined)[];
24+
blobs?: (string | undefined)[];
25+
}

packages/workers-shared/router-worker/wrangler.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ workers_dev = false
1313
main = "src/index.ts"
1414
compatibility_date = "2024-07-31"
1515

16+
[version_metadata]
17+
binding = "VERSION_METADATA"
18+
1619
[[unsafe.bindings]]
1720
name = "CONFIG"
1821
type = "param"
@@ -29,4 +32,8 @@ type = "origin"
2932

3033
[unsafe.metadata.build_options]
3134
stable_id = "cloudflare/cf_router_worker"
32-
networks = ["cf","jdc"]
35+
networks = ["cf","jdc"]
36+
37+
[[unsafe.bindings]]
38+
name = "workers-router-worker"
39+
type = "internal_capability_grants"

0 commit comments

Comments
 (0)