Skip to content

Commit 648bd9f

Browse files
committed
deat: Metrics and alarms
1 parent 230fc38 commit 648bd9f

File tree

5 files changed

+209
-68
lines changed

5 files changed

+209
-68
lines changed

lib/cell-stack.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export class CellStack extends cdk.Stack {
1313
router: EventRouter;
1414
producer: EventProducer;
1515
consumers: EventConsumer[] = [];
16+
deadLetterQueues: Queue[] = [];
1617
monitoring: EventMonitoring;
1718

1819
constructor(scope: Construct, id: string, props?: cdk.StackProps) {
@@ -35,16 +36,27 @@ export class CellStack extends cdk.Stack {
3536
// connect consumers to the router
3637
this.consumers.forEach(consumer => {
3738
const target = this.router.targets.find(target => target.type == consumer.type)!;
39+
40+
const deadLetterQueue = new Queue(this, consumer.node.id + consumer.type + 'DeadLetterQueue');
3841
target.topic.addSubscription(new SqsSubscription(consumer.queue, {
39-
deadLetterQueue: new Queue(this, consumer.node.id + consumer.type + 'DeadLetterQueue')
42+
deadLetterQueue: deadLetterQueue
4043
}));
44+
45+
this.deadLetterQueues.push(deadLetterQueue);
4146
});
4247

48+
const deadLetterQueues = [
49+
...this.deadLetterQueues,
50+
this.router.deadLetterQueue,
51+
...this.consumers?.map(consumer => consumer.deadLetterQueue)
52+
]
53+
4354
// create Dashboards and Alarms
4455
this.monitoring = new EventMonitoring(this, id + 'Dashboard', {
4556
router: this.router,
4657
producer: this.producer,
47-
consumers: this.consumers
58+
consumers: this.consumers,
59+
deadLetterQueues: deadLetterQueues
4860
});
4961
}
5062
}

lib/eventConsumer.ts

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,40 +11,30 @@ export type EventConsumerProps = {
1111
}
1212

1313
export class EventConsumer extends Construct {
14-
public queue: Queue;
15-
public type: EventQueueConsumerEventType;
14+
queue: Queue;
15+
deadLetterQueue: Queue;
16+
type: EventQueueConsumerEventType;
17+
lambda: Function;
1618

1719
constructor(scope: Construct, id: string, props: EventConsumerProps = { type: 'ALL' }) {
1820
super(scope, id);
1921

2022
this.type = props.type;
2123

24+
const deadLetterQueue = new Queue(this, id + 'DeadLetterQueue', {
25+
queueName: PhysicalName.GENERATE_IF_NEEDED
26+
});
27+
2228
// create a queue with a dead letter queue attached
2329
this.queue = new Queue(this, id + 'EventsQueue', {
2430
visibilityTimeout: Duration.seconds(30),
2531
deadLetterQueue: {
2632
maxReceiveCount: 3,
27-
queue: new Queue(this, id + 'DeadLetterQueue', {
28-
queueName: PhysicalName.GENERATE_IF_NEEDED
29-
})
33+
queue: deadLetterQueue
3034
}
3135
});
3236

33-
// Create custom CloudWatch metrics for queue monitoring
34-
const approximateAgeOfOldestMessage = new Metric({
35-
namespace: 'AWS/SQS',
36-
metricName: 'ApproximateAgeOfOldestMessage',
37-
dimensionsMap: { QueueName: this.queue.queueName },
38-
statistic: 'Maximum',
39-
period: Duration.minutes(1)
40-
});
41-
42-
// Create alarm for message processing delays
43-
approximateAgeOfOldestMessage.createAlarm(this, 'OldMessageAlarm', {
44-
threshold: 60, // seconds
45-
evaluationPeriods: 2,
46-
alarmDescription: 'Messages are getting old in the queue'
47-
});
37+
this.deadLetterQueue = deadLetterQueue;
4838

4939
// CDK code for observable Lambda consumer
5040
const processingFunction = new aws_lambda.Function(this, 'EventProcessingFunction', {

lib/eventMonitoring.ts

Lines changed: 162 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@ import { Construct } from 'constructs';
33
import { EventRouter } from './eventRouter';
44
import { EventProducer } from './eventProducer';
55
import { EventConsumer } from './eventConsumer';
6+
import { Alarm, Metric, TreatMissingData } from 'aws-cdk-lib/aws-cloudwatch';
7+
import { Queue } from 'aws-cdk-lib/aws-sqs';
68

79
export interface EventMonitoringProps {
810
router: EventRouter;
911
producer: EventProducer;
1012
consumers: EventConsumer[];
13+
deadLetterQueues: Queue[];
1114
}
1215

1316
export class EventMonitoring extends Construct {
@@ -17,8 +20,164 @@ export class EventMonitoring extends Construct {
1720
constructor(scope: Construct, id: string, props?: EventMonitoringProps) {
1821
super(scope, id);
1922

20-
// Initialize your construct logic here
21-
}
23+
// Create alarms on API Gateway
24+
props?.producer.api.metricClientError().createAlarm(scope, 'HighClientErrorAlarm', {
25+
evaluationPeriods: 3,
26+
threshold: 20,
27+
alarmDescription: 'Alert when client error rate exceeds 20%'
28+
});
29+
30+
props?.producer.api.metricServerError().createAlarm(scope, 'HighServerErrorAlarm', {
31+
evaluationPeriods: 3,
32+
threshold: 1,
33+
alarmDescription: 'Alert when server error rate exceeds 1%'
34+
});
35+
36+
props?.producer.api.metricLatency().createAlarm(scope, 'HighLatencyAlarm', {
37+
evaluationPeriods: 3,
38+
threshold: 1000,
39+
alarmDescription: 'Alert when latency exceeds 1 second'
40+
});
41+
42+
// Create metrics on EventBridge Bus
43+
const busInvocations = new Metric({
44+
namespace: 'AWS/Events',
45+
metricName: 'Invocations',
46+
dimensionsMap: {
47+
EventBusName: props?.router.bus.eventBusName!
48+
},
49+
period: cdk.Duration.minutes(5),
50+
statistic: 'Sum'
51+
});
52+
53+
const busFailedInvocations = new Metric({
54+
namespace: 'AWS/Events',
55+
metricName: 'FailedInvocations',
56+
dimensionsMap: {
57+
EventBusName: props?.router.bus.eventBusName!
58+
},
59+
period: cdk.Duration.minutes(5),
60+
statistic: 'Sum'
61+
});
62+
63+
// Create alarms on EventBridge Bus
64+
new Alarm(this, 'EventBusFailedInvocationsAlarm', {
65+
metric: busFailedInvocations,
66+
threshold: 1,
67+
evaluationPeriods: 3,
68+
datapointsToAlarm: 2,
69+
alarmDescription: 'Alert when EventBridge bus has failed invocations',
70+
treatMissingData: TreatMissingData.NOT_BREACHING
71+
});
72+
73+
// For each rule, create metrics and alarms
74+
props?.router.rules?.forEach((rule, index) => {
75+
const ruleInvocations = new Metric({
76+
namespace: 'AWS/Events',
77+
metricName: 'Invocations',
78+
dimensionsMap: {
79+
RuleName: rule.ruleName
80+
},
81+
period: cdk.Duration.minutes(5),
82+
statistic: 'Sum'
83+
});
84+
85+
const ruleFailedInvocations = new Metric({
86+
namespace: 'AWS/Events',
87+
metricName: 'FailedInvocations',
88+
dimensionsMap: {
89+
RuleName: rule.ruleName
90+
},
91+
period: cdk.Duration.minutes(5),
92+
statistic: 'Sum'
93+
});
94+
95+
const ruleThrottledRules = new Metric({
96+
namespace: 'AWS/Events',
97+
metricName: 'ThrottledRules',
98+
dimensionsMap: {
99+
RuleName: rule.ruleName
100+
},
101+
period: cdk.Duration.minutes(5),
102+
statistic: 'Sum'
103+
});
104+
105+
// Rule Failed Invocations Alarm
106+
new Alarm(this, `RuleFailedInvocationsAlarm-${index}`, {
107+
metric: ruleFailedInvocations,
108+
threshold: 1,
109+
evaluationPeriods: 2,
110+
datapointsToAlarm: 2,
111+
alarmDescription: `Alert when EventBridge rule ${rule.ruleName} has failed invocations`,
112+
treatMissingData: TreatMissingData.NOT_BREACHING
113+
});
22114

23-
// Add any helper methods here
115+
// Rule Throttled Events Alarm
116+
new Alarm(this, `RuleThrottledEventsAlarm-${index}`, {
117+
metric: ruleThrottledRules,
118+
threshold: 1,
119+
evaluationPeriods: 1,
120+
alarmDescription: `Alert when EventBridge rule ${rule.ruleName} is being throttled`,
121+
treatMissingData: TreatMissingData.NOT_BREACHING
122+
});
123+
});
124+
125+
// Create alarms for SNS topics
126+
props?.router.topics?.forEach((topic, index) => {
127+
const topicNumberOfNotificationsFailed = topic.metricNumberOfNotificationsFailed();
128+
129+
new Alarm(this, `TopicFailedNotificationsAlarm-${index}`, {
130+
threshold: 1,
131+
evaluationPeriods: 1,
132+
alarmDescription: `Alert when any notifications fail to deliver for topic ${topic.topicName}`,
133+
comparisonOperator: cdk.aws_cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
134+
metric: topicNumberOfNotificationsFailed
135+
});
136+
});
137+
138+
// Create alarms for Consumer SQS queues
139+
props?.consumers?.forEach((consumer, index) => {
140+
const queueApproximateNumberOfMessagesVisible = consumer.queue.metricApproximateNumberOfMessagesVisible();
141+
142+
new Alarm(this, `QueueApproximateNumberOfMessagesVisibleAlarm-${index}`, {
143+
threshold: 100,
144+
evaluationPeriods: 1,
145+
alarmDescription: `Alert when the approximate number of messages visible in the queue ${consumer.queue.queueName} exceeds 100`,
146+
comparisonOperator: cdk.aws_cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
147+
metric: queueApproximateNumberOfMessagesVisible
148+
});
149+
150+
const queueApproximateAgeOfOldestMessage = consumer.queue.metricApproximateAgeOfOldestMessage();
151+
new Alarm(this, `QueueApproximateAgeOfOldestMessageAlarm-${index}`, {
152+
threshold: 900,
153+
evaluationPeriods: 1,
154+
alarmDescription: `Alert when the approximate age of the oldest message in the queue ${consumer.queue.queueName} exceeds 900 seconds`,
155+
comparisonOperator: cdk.aws_cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
156+
metric: queueApproximateAgeOfOldestMessage
157+
});
158+
});
159+
160+
// Create alarms on Dead Letter Queues
161+
props?.deadLetterQueues.forEach((queue, index) => {
162+
const queueApproximateNumberOfMessagesVisible = queue.metricApproximateNumberOfMessagesVisible();
163+
164+
new Alarm(this, `DeadLetterQueueApproximateNumberOfMessagesVisibleAlarm-${index}`, {
165+
threshold: 1,
166+
evaluationPeriods: 1,
167+
alarmDescription: `Alert when the approximate number of messages visible in the dead letter queue ${queue.queueName} exceeds 1`,
168+
comparisonOperator: cdk.aws_cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
169+
metric: queueApproximateNumberOfMessagesVisible
170+
});
171+
172+
// make an alarm on the age of the oldest message > 1 day
173+
const queueApproximateAgeOfOldestMessage = queue.metricApproximateAgeOfOldestMessage();
174+
new Alarm(this, `DeadLetterQueueApproximateAgeOfOldestMessageAlarm-${index}`, {
175+
threshold: 86400,
176+
evaluationPeriods: 1,
177+
alarmDescription: `Alert when the approximate age of the oldest message in the dead letter queue ${queue.queueName} exceeds 1 day`,
178+
comparisonOperator: cdk.aws_cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
179+
metric: queueApproximateAgeOfOldestMessage
180+
});
181+
});
182+
}
24183
}

lib/eventProducer.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ export type EventProducerProps = {
1111
}
1212

1313
export class EventProducer extends Construct {
14+
api: RestApi;
15+
1416
constructor(scope: Construct, id: string, props: EventProducerProps) {
1517
super(scope, id);
1618

@@ -57,6 +59,7 @@ export class EventProducer extends Construct {
5759

5860
// add dependency between api gateway and the account/role for CloudWatch
5961
(api.node.defaultChild as CfnRestApi).addDependency(apiGatewayAccount);
62+
this.api = api;
6063

6164
// Create service role for API Gateway granting access to EventBridge
6265
const role = new Role(scope, 'role', {

0 commit comments

Comments
 (0)