Skip to content

Commit 62200f4

Browse files
committed
Merge branch 'copilot/add-graphql-mongo-metrics' into stage
2 parents 8e1a8a8 + d3f9bb9 commit 62200f4

File tree

5 files changed

+149
-63
lines changed

5 files changed

+149
-63
lines changed

docs/METRICS.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ Duration of HTTP requests in seconds, labeled by:
5555
- `route` - Request route/path
5656
- `status_code` - HTTP status code
5757

58-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
58+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
5959

6060
#### http_requests_total (Counter)
6161

@@ -74,7 +74,7 @@ Labels:
7474
- `operation_name` - Name of the GraphQL operation
7575
- `operation_type` - Type of operation (query, mutation, subscription)
7676

77-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
77+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
7878

7979
**Purpose**: Identify slow API operations (P95/P99 latency).
8080

@@ -97,25 +97,34 @@ Labels:
9797
- `field_name` - Field name being resolved
9898
- `operation_name` - Name of the GraphQL operation
9999

100-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5 seconds
100+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5 seconds
101101

102102
**Purpose**: Find slow or CPU-intensive resolvers that degrade overall performance.
103103

104104
### MongoDB Metrics
105105

106106
#### hawk_mongo_command_duration_seconds (Histogram)
107107

108-
Histogram of MongoDB command duration by command, collection, and database.
108+
Histogram of MongoDB command duration by command, collection family, and database.
109109

110110
Labels:
111111
- `command` - MongoDB command name (find, insert, update, etc.)
112-
- `collection` - Collection name
112+
- `collection_family` - Collection family name (extracted from dynamic collection names to reduce cardinality)
113113
- `db` - Database name
114114

115-
Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
115+
Buckets: 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds
116116

117117
**Purpose**: Detect slow queries and high-latency collections.
118118

119+
**Note on Collection Families**: To reduce metric cardinality, dynamic collection names are grouped into families. For example:
120+
- `events:projectId``events`
121+
- `dailyEvents:projectId``dailyEvents`
122+
- `repetitions:projectId``repetitions`
123+
- `membership:userId``membership`
124+
- `team:workspaceId``team`
125+
126+
This prevents metric explosion when dealing with thousands of projects, users, or workspaces, while still providing meaningful insights into collection performance patterns.
127+
119128
#### hawk_mongo_command_errors_total (Counter)
120129

121130
Counter of failed MongoDB commands grouped by command and error code.
@@ -173,6 +182,7 @@ The metrics implementation uses the `prom-client` library and consists of:
173182
- Implements MongoDB command monitoring
174183
- Tracks command duration and errors
175184
- Uses MongoDB's command monitoring events
185+
- Extracts collection families from dynamic collection names to reduce cardinality
176186

177187
4. **Integration** (`src/index.ts`, `src/mongo.ts`):
178188
- Adds GraphQL metrics plugin to Apollo Server

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "hawk.api",
3-
"version": "1.1.43",
3+
"version": "1.2.0",
44
"main": "index.ts",
55
"license": "BUSL-1.1",
66
"scripts": {

src/metrics/graphql.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export const gqlOperationDuration = new client.Histogram({
1010
name: 'hawk_gql_operation_duration_seconds',
1111
help: 'Histogram of total GraphQL operation duration by operation name and type',
1212
labelNames: ['operation_name', 'operation_type'],
13-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
13+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
1414
});
1515

1616
/**
@@ -31,31 +31,31 @@ export const gqlResolverDuration = new client.Histogram({
3131
name: 'hawk_gql_resolver_duration_seconds',
3232
help: 'Histogram of resolver execution time per type, field, and operation',
3333
labelNames: ['type_name', 'field_name', 'operation_name'],
34-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5],
34+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5],
3535
});
3636

3737
/**
3838
* Apollo Server plugin to track GraphQL metrics
3939
*/
4040
export const graphqlMetricsPlugin: ApolloServerPlugin = {
41-
async requestDidStart(requestContext: GraphQLRequestContext): Promise<GraphQLRequestListener> {
41+
async requestDidStart(_requestContext: GraphQLRequestContext): Promise<GraphQLRequestListener> {
4242
const startTime = Date.now();
4343
let operationName = 'unknown';
4444
let operationType = 'unknown';
4545

4646
return {
47-
async didResolveOperation(requestContext: GraphQLRequestContext) {
48-
operationName = requestContext.operationName || 'anonymous';
49-
operationType = requestContext.operation?.operation || 'unknown';
47+
async didResolveOperation(ctx: GraphQLRequestContext): Promise<void> {
48+
operationName = ctx.operationName || 'anonymous';
49+
operationType = ctx.operation?.operation || 'unknown';
5050
},
5151

52-
async executionDidStart() {
52+
async executionDidStart(): Promise<GraphQLRequestListener> {
5353
return {
5454
// eslint-disable-next-line @typescript-eslint/no-explicit-any
55-
willResolveField({ info }: any) {
55+
willResolveField({ info }: any): () => void {
5656
const fieldStartTime = Date.now();
5757

58-
return () => {
58+
return (): void => {
5959
const duration = (Date.now() - fieldStartTime) / 1000;
6060

6161
gqlResolverDuration
@@ -70,16 +70,16 @@ export const graphqlMetricsPlugin: ApolloServerPlugin = {
7070
};
7171
},
7272

73-
async willSendResponse(requestContext: GraphQLRequestContext) {
73+
async willSendResponse(ctx: GraphQLRequestContext): Promise<void> {
7474
const duration = (Date.now() - startTime) / 1000;
7575

7676
gqlOperationDuration
7777
.labels(operationName, operationType)
7878
.observe(duration);
7979

8080
// Track errors if any
81-
if (requestContext.errors && requestContext.errors.length > 0) {
82-
requestContext.errors.forEach((error: GraphQLError) => {
81+
if (ctx.errors && ctx.errors.length > 0) {
82+
ctx.errors.forEach((error: GraphQLError) => {
8383
const errorType = error.extensions?.code || error.name || 'unknown';
8484

8585
gqlOperationErrors

src/metrics/index.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ const httpRequestDuration = new client.Histogram({
2121
name: 'http_request_duration_seconds',
2222
help: 'Duration of HTTP requests in seconds',
2323
labelNames: ['method', 'route', 'status_code'],
24-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
24+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
2525
registers: [ register ],
2626
});
2727

@@ -51,6 +51,9 @@ register.registerMetric(mongoCommandErrors);
5151

5252
/**
5353
* Express middleware to track HTTP metrics
54+
* @param req - Express request object
55+
* @param res - Express response object
56+
* @param next - Express next function
5457
*/
5558
export function metricsMiddleware(req: express.Request, res: express.Response, next: express.NextFunction): void {
5659
const start = Date.now();

src/metrics/mongodb.ts

Lines changed: 116 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,106 @@
1-
import client from 'prom-client';
1+
import promClient from 'prom-client';
22
import { MongoClient, MongoClientOptions } from 'mongodb';
33

44
/**
55
* MongoDB command duration histogram
6-
* Tracks MongoDB command duration by command, collection, and database
6+
* Tracks MongoDB command duration by command, collection family, and database
77
*/
8-
export const mongoCommandDuration = new client.Histogram({
8+
export const mongoCommandDuration = new promClient.Histogram({
99
name: 'hawk_mongo_command_duration_seconds',
10-
help: 'Histogram of MongoDB command duration by command, collection, and db',
11-
labelNames: ['command', 'collection', 'db'],
12-
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
10+
help: 'Histogram of MongoDB command duration by command, collection family, and db',
11+
labelNames: ['command', 'collection_family', 'db'],
12+
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5, 10],
1313
});
1414

1515
/**
1616
* MongoDB command errors counter
1717
* Tracks failed MongoDB commands grouped by command and error code
1818
*/
19-
export const mongoCommandErrors = new client.Counter({
19+
export const mongoCommandErrors = new promClient.Counter({
2020
name: 'hawk_mongo_command_errors_total',
2121
help: 'Counter of failed MongoDB commands grouped by command and error code',
2222
labelNames: ['command', 'error_code'],
2323
});
2424

25+
/**
26+
* Extract collection name from MongoDB command
27+
* Handles different command types and their collection name locations
28+
* @param command - MongoDB command object
29+
* @param commandName - Name of the command (find, insert, getMore, etc.)
30+
* @returns Raw collection identifier or null
31+
*/
32+
function extractCollectionFromCommand(command: any, commandName: string): unknown {
33+
if (!command) {
34+
return null;
35+
}
36+
37+
// Special handling for getMore command - collection is in a different field
38+
if (commandName === 'getMore') {
39+
return command.collection || null;
40+
}
41+
42+
/*
43+
* For most commands, collection name is the value of the command name key
44+
* e.g., { find: "users" } -> collection is "users"
45+
*/
46+
return command[commandName] || null;
47+
}
48+
49+
/**
50+
* Normalize collection value to string
51+
* Handles BSON types and other non-string values
52+
* @param collection - Collection value from MongoDB command
53+
* @returns Normalized string or 'unknown'
54+
*/
55+
function normalizeCollectionName(collection: unknown): string {
56+
if (!collection) {
57+
return 'unknown';
58+
}
59+
60+
// Handle string values directly
61+
if (typeof collection === 'string') {
62+
return collection;
63+
}
64+
65+
// Handle BSON types and objects with toString method
66+
if (typeof collection === 'object' && 'toString' in collection) {
67+
try {
68+
const str = String(collection);
69+
70+
// Skip if toString returns object representation like [object Object]
71+
if (!str.startsWith('[object') && str !== 'unknown') {
72+
return str;
73+
}
74+
} catch (e) {
75+
console.error('Error normalizing collection name', e);
76+
// Ignore conversion errors
77+
}
78+
}
79+
80+
return 'unknown';
81+
}
82+
83+
/**
84+
* Extract collection family from full collection name
85+
* Reduces cardinality by grouping dynamic collections
86+
* @param collectionName - Full collection name (e.g., "events:projectId")
87+
* @returns Collection family (e.g., "events")
88+
*/
89+
function getCollectionFamily(collectionName: string): string {
90+
if (collectionName === 'unknown') {
91+
return 'unknown';
92+
}
93+
94+
// Extract prefix before colon for dynamic collections
95+
const colonIndex = collectionName.indexOf(':');
96+
97+
if (colonIndex > 0) {
98+
return collectionName.substring(0, colonIndex);
99+
}
100+
101+
return collectionName;
102+
}
103+
25104
/**
26105
* Enhance MongoClient options with monitoring
27106
* @param options - Original MongoDB connection options
@@ -40,71 +119,65 @@ export function withMongoMetrics(options: MongoClientOptions = {}): MongoClientO
40119
*/
41120
export function setupMongoMetrics(client: MongoClient): void {
42121
client.on('commandStarted', (event) => {
43-
// Store start time for this command
44-
const startTimeKey = `${event.requestId}`;
122+
// Store start time and metadata for this command
123+
const metadataKey = `${event.requestId}`;
124+
125+
// Extract collection name from the command
126+
const collectionRaw = extractCollectionFromCommand(event.command, event.commandName);
127+
const collection = normalizeCollectionName(collectionRaw);
128+
const collectionFamily = getCollectionFamily(collection);
129+
130+
const db = event.databaseName || 'unknown';
45131

46132
// eslint-disable-next-line @typescript-eslint/no-explicit-any
47-
(client as any)[startTimeKey] = Date.now();
133+
(client as any)[metadataKey] = {
134+
startTime: Date.now(),
135+
collectionFamily,
136+
db,
137+
commandName: event.commandName,
138+
};
48139
});
49140

50141
client.on('commandSucceeded', (event) => {
51-
const startTimeKey = `${event.requestId}`;
142+
const metadataKey = `${event.requestId}`;
52143
// eslint-disable-next-line @typescript-eslint/no-explicit-any
53-
const startTime = (client as any)[startTimeKey];
54-
55-
if (startTime) {
56-
const duration = (Date.now() - startTime) / 1000;
144+
const metadata = (client as any)[metadataKey];
57145

58-
/**
59-
* Extract collection name from the command
60-
* For most commands, the collection name is the value of the command name key
61-
* e.g., { find: "users" } -> collection is "users"
62-
*/
63-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
64-
const collection = event.command ? ((event.command as any)[event.commandName] || 'unknown') : 'unknown';
65-
const db = event.databaseName || 'unknown';
146+
if (metadata) {
147+
const duration = (Date.now() - metadata.startTime) / 1000;
66148

67149
mongoCommandDuration
68-
.labels(event.commandName, collection, db)
150+
.labels(metadata.commandName, metadata.collectionFamily, metadata.db)
69151
.observe(duration);
70152

71-
// Clean up start time
153+
// Clean up metadata
72154
// eslint-disable-next-line @typescript-eslint/no-explicit-any
73-
delete (client as any)[startTimeKey];
155+
delete (client as any)[metadataKey];
74156
}
75157
});
76158

77159
client.on('commandFailed', (event) => {
78-
const startTimeKey = `${event.requestId}`;
160+
const metadataKey = `${event.requestId}`;
79161
// eslint-disable-next-line @typescript-eslint/no-explicit-any
80-
const startTime = (client as any)[startTimeKey];
162+
const metadata = (client as any)[metadataKey];
81163

82-
if (startTime) {
83-
const duration = (Date.now() - startTime) / 1000;
84-
85-
/**
86-
* Extract collection name from the command
87-
* For most commands, the collection name is the value of the command name key
88-
* e.g., { find: "users" } -> collection is "users"
89-
*/
90-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
91-
const collection = event.command ? ((event.command as any)[event.commandName] || 'unknown') : 'unknown';
92-
const db = event.databaseName || 'unknown';
164+
if (metadata) {
165+
const duration = (Date.now() - metadata.startTime) / 1000;
93166

94167
mongoCommandDuration
95-
.labels(event.commandName, collection, db)
168+
.labels(metadata.commandName, metadata.collectionFamily, metadata.db)
96169
.observe(duration);
97170

98171
// Track error
99172
const errorCode = event.failure?.code?.toString() || 'unknown';
100173

101174
mongoCommandErrors
102-
.labels(event.commandName, errorCode)
175+
.labels(metadata.commandName, errorCode)
103176
.inc();
104177

105-
// Clean up start time
178+
// Clean up metadata
106179
// eslint-disable-next-line @typescript-eslint/no-explicit-any
107-
delete (client as any)[startTimeKey];
180+
delete (client as any)[metadataKey];
108181
}
109182
});
110183
}

0 commit comments

Comments
 (0)