Skip to content
Merged
77 changes: 76 additions & 1 deletion docs/METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,68 @@ Total number of HTTP requests, labeled by:
- `route` - Request route/path
- `status_code` - HTTP status code

### GraphQL Metrics

#### hawk_gql_operation_duration_seconds (Histogram)

Histogram of total GraphQL operation duration by operation name and type.

Labels:
- `operation_name` - Name of the GraphQL operation
- `operation_type` - Type of operation (query, mutation, subscription)

Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds

**Purpose**: Identify slow API operations (P95/P99 latency).

#### hawk_gql_operation_errors_total (Counter)

Counter of failed GraphQL operations grouped by operation name and error class.

Labels:
- `operation_name` - Name of the GraphQL operation
- `error_type` - Type/class of the error

**Purpose**: Detect increased error rates and failing operations.

#### hawk_gql_resolver_duration_seconds (Histogram)

Histogram of resolver execution time per type, field, and operation.

Labels:
- `type_name` - GraphQL type name
- `field_name` - Field name being resolved
- `operation_name` - Name of the GraphQL operation

Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5 seconds

**Purpose**: Find slow or CPU-intensive resolvers that degrade overall performance.

### MongoDB Metrics

#### hawk_mongo_command_duration_seconds (Histogram)

Histogram of MongoDB command duration by command, collection, and database.

Labels:
- `command` - MongoDB command name (find, insert, update, etc.)
- `collection` - Collection name
- `db` - Database name

Buckets: 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10 seconds

**Purpose**: Detect slow queries and high-latency collections.

#### hawk_mongo_command_errors_total (Counter)

Counter of failed MongoDB commands grouped by command and error code.

Labels:
- `command` - MongoDB command name
- `error_code` - MongoDB error code

**Purpose**: Track transient or persistent database errors.

## Testing

### Manual Testing
Expand Down Expand Up @@ -98,11 +160,24 @@ The metrics implementation uses the `prom-client` library and consists of:
- Initializes a Prometheus registry
- Configures default Node.js metrics collection
- Defines custom HTTP metrics (duration histogram and request counter)
- Registers GraphQL and MongoDB metrics
- Provides middleware for tracking HTTP requests
- Creates a separate Express app for serving metrics

2. **Integration** (`src/index.ts`):
2. **GraphQL Metrics** (`src/metrics/graphql.ts`):
- Implements Apollo Server plugin for tracking GraphQL operations
- Tracks operation duration, errors, and resolver execution time
- Automatically captures operation name, type, and field information

3. **MongoDB Metrics** (`src/metrics/mongodb.ts`):
- Implements MongoDB command monitoring
- Tracks command duration and errors
- Uses MongoDB's command monitoring events

4. **Integration** (`src/index.ts`, `src/mongo.ts`):
- Adds GraphQL metrics plugin to Apollo Server
- Adds metrics middleware to the main Express app
- Enables MongoDB command monitoring on database clients
- Starts metrics server on a separate port
- Keeps metrics server isolated from main API traffic

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hawk.api",
"version": "1.1.42",
"version": "1.1.43",
"main": "index.ts",
"license": "BUSL-1.1",
"scripts": {
Expand Down
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import BusinessOperationsFactory from './models/businessOperationsFactory';
import schema from './schema';
import { graphqlUploadExpress } from 'graphql-upload';
import morgan from 'morgan';
import { metricsMiddleware, createMetricsServer } from './metrics';
import { metricsMiddleware, createMetricsServer, graphqlMetricsPlugin } from './metrics';

/**
* Option to enable playground
Expand Down Expand Up @@ -122,6 +122,7 @@ class HawkAPI {
process.env.NODE_ENV === 'production'
? ApolloServerPluginLandingPageDisabled()
: ApolloServerPluginLandingPageGraphQLPlayground(),
graphqlMetricsPlugin,
],
context: ({ req }): ResolverContextBase => req.context,
formatError: (error): GraphQLError => {
Expand Down
93 changes: 93 additions & 0 deletions src/metrics/graphql.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import client from 'prom-client';
import { GraphQLRequestContext } from 'apollo-server-plugin-base';
import { GraphQLError } from 'graphql';

/**
* GraphQL operation duration histogram
* Tracks GraphQL operation duration by operation name and type
*/
export const gqlOperationDuration = new client.Histogram({
name: 'hawk_gql_operation_duration_seconds',
help: 'Histogram of total GraphQL operation duration by operation name and type',
labelNames: ['operation_name', 'operation_type'],
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
});

/**
* GraphQL operation errors counter
* Tracks failed GraphQL operations grouped by operation name and error class
*/
export const gqlOperationErrors = new client.Counter({
name: 'hawk_gql_operation_errors_total',
help: 'Counter of failed GraphQL operations grouped by operation name and error class',
labelNames: ['operation_name', 'error_type'],
});

/**
* GraphQL resolver duration histogram
* Tracks resolver execution time per type, field, and operation
*/
export const gqlResolverDuration = new client.Histogram({
name: 'hawk_gql_resolver_duration_seconds',
help: 'Histogram of resolver execution time per type, field, and operation',
labelNames: ['type_name', 'field_name', 'operation_name'],
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5],
});

/**
* Apollo Server plugin to track GraphQL metrics
*/
export const graphqlMetricsPlugin = {
async requestDidStart(requestContext: GraphQLRequestContext) {
const startTime = Date.now();
let operationName = 'unknown';
let operationType = 'unknown';

return {
async didResolveOperation(requestContext: GraphQLRequestContext) {
operationName = requestContext.operationName || 'anonymous';
operationType = requestContext.operation?.operation || 'unknown';
},

async executionDidStart() {
return {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
willResolveField({ info }: any) {
const fieldStartTime = Date.now();

return () => {
const duration = (Date.now() - fieldStartTime) / 1000;

gqlResolverDuration
.labels(
info.parentType.name,
info.fieldName,
operationName
)
.observe(duration);
};
},
};
},

async willSendResponse(requestContext: GraphQLRequestContext) {
const duration = (Date.now() - startTime) / 1000;

gqlOperationDuration
.labels(operationName, operationType)
.observe(duration);

// Track errors if any
if (requestContext.errors && requestContext.errors.length > 0) {
requestContext.errors.forEach((error: GraphQLError) => {
const errorType = error.extensions?.code || error.name || 'unknown';

gqlOperationErrors
.labels(operationName, errorType as string)
.inc();
});
}
},
};
},
};
21 changes: 21 additions & 0 deletions src/metrics/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import client from 'prom-client';
import express from 'express';
import { gqlOperationDuration, gqlOperationErrors, gqlResolverDuration } from './graphql';
import { mongoCommandDuration, mongoCommandErrors } from './mongodb';

/**
* Create a Registry to register the metrics
Expand Down Expand Up @@ -34,6 +36,19 @@ const httpRequestCounter = new client.Counter({
registers: [ register ],
});

/**
* Register GraphQL metrics
*/
register.registerMetric(gqlOperationDuration);
register.registerMetric(gqlOperationErrors);
register.registerMetric(gqlResolverDuration);

/**
* Register MongoDB metrics
*/
register.registerMetric(mongoCommandDuration);
register.registerMetric(mongoCommandErrors);

/**
* Express middleware to track HTTP metrics
*/
Expand Down Expand Up @@ -71,3 +86,9 @@ export function createMetricsServer(): express.Application {

return metricsApp;
}

/**
* Export GraphQL metrics plugin and MongoDB metrics setup
*/
export { graphqlMetricsPlugin } from './graphql';
export { setupMongoMetrics, withMongoMetrics } from './mongodb';
96 changes: 96 additions & 0 deletions src/metrics/mongodb.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import client from 'prom-client';
import { MongoClient, MongoClientOptions } from 'mongodb';

/**
* MongoDB command duration histogram
* Tracks MongoDB command duration by command, collection, and database
*/
export const mongoCommandDuration = new client.Histogram({
name: 'hawk_mongo_command_duration_seconds',
help: 'Histogram of MongoDB command duration by command, collection, and db',
labelNames: ['command', 'collection', 'db'],
buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
});

/**
* MongoDB command errors counter
* Tracks failed MongoDB commands grouped by command and error code
*/
export const mongoCommandErrors = new client.Counter({
name: 'hawk_mongo_command_errors_total',
help: 'Counter of failed MongoDB commands grouped by command and error code',
labelNames: ['command', 'error_code'],
});

/**
* Enhance MongoClient options with monitoring
* @param options - Original MongoDB connection options
* @returns Enhanced options with monitoring enabled
*/
export function withMongoMetrics(options: MongoClientOptions = {}): MongoClientOptions {
return {
...options,
monitorCommands: true,
};
}

/**
* Setup MongoDB metrics monitoring on a MongoClient
* @param client - MongoDB client to monitor
*/
export function setupMongoMetrics(client: MongoClient): void {
client.on('commandStarted', (event) => {
// Store start time for this command
const startTimeKey = `${event.requestId}`;

// eslint-disable-next-line @typescript-eslint/no-explicit-any
(client as any)[startTimeKey] = Date.now();
});

client.on('commandSucceeded', (event) => {
const startTimeKey = `${event.requestId}`;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const startTime = (client as any)[startTimeKey];

if (startTime) {
const duration = (Date.now() - startTime) / 1000;
const collection = event.command?.collection || event.command?.[event.commandName] || 'unknown';
const db = event.databaseName || 'unknown';

mongoCommandDuration
.labels(event.commandName, collection, db)
.observe(duration);

// Clean up start time
// eslint-disable-next-line @typescript-eslint/no-explicit-any
delete (client as any)[startTimeKey];
}
});

client.on('commandFailed', (event) => {
const startTimeKey = `${event.requestId}`;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const startTime = (client as any)[startTimeKey];

if (startTime) {
const duration = (Date.now() - startTime) / 1000;
const collection = event.command?.collection || event.command?.[event.commandName] || 'unknown';
const db = event.databaseName || 'unknown';

mongoCommandDuration
.labels(event.commandName, collection, db)
.observe(duration);

// Track error
const errorCode = event.failure?.code?.toString() || 'unknown';

mongoCommandErrors
.labels(event.commandName, errorCode)
.inc();

// Clean up start time
// eslint-disable-next-line @typescript-eslint/no-explicit-any
delete (client as any)[startTimeKey];
}
});
}
9 changes: 7 additions & 2 deletions src/mongo.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Db, MongoClient, MongoClientOptions } from 'mongodb';
import HawkCatcher from '@hawk.so/nodejs';
import { setupMongoMetrics, withMongoMetrics } from './metrics';

const hawkDBUrl = process.env.MONGO_HAWK_DB_URL || 'mongodb://localhost:27017/hawk';
const eventsDBUrl = process.env.MONGO_EVENTS_DB_URL || 'mongodb://localhost:27017/events';
Expand Down Expand Up @@ -53,10 +54,10 @@ export const mongoClients: MongoClients = {
/**
* Common params for all connections
*/
const connectionConfig: MongoClientOptions = {
const connectionConfig: MongoClientOptions = withMongoMetrics({
useNewUrlParser: true,
useUnifiedTopology: true,
};
});

/**
* Setups connections to the databases (hawk api and events databases)
Expand All @@ -73,6 +74,10 @@ export async function setupConnections(): Promise<void> {

databases.hawk = hawkMongoClient.db();
databases.events = eventsMongoClient.db();

// Setup metrics monitoring for both clients
setupMongoMetrics(hawkMongoClient);
setupMongoMetrics(eventsMongoClient);
} catch (e) {
/** Catch start Mongo errors */
HawkCatcher.send(e as Error);
Expand Down
Loading
Loading