@@ -3,11 +3,14 @@ import { Construct } from 'constructs';
33import { EventRouter } from './eventRouter' ;
44import { EventProducer } from './eventProducer' ;
55import { EventConsumer } from './eventConsumer' ;
6+ import { Alarm , Metric , TreatMissingData } from 'aws-cdk-lib/aws-cloudwatch' ;
7+ import { Queue } from 'aws-cdk-lib/aws-sqs' ;
68
79export interface EventMonitoringProps {
810 router : EventRouter ;
911 producer : EventProducer ;
1012 consumers : EventConsumer [ ] ;
13+ deadLetterQueues : Queue [ ] ;
1114}
1215
1316export class EventMonitoring extends Construct {
@@ -17,8 +20,164 @@ export class EventMonitoring extends Construct {
1720 constructor ( scope : Construct , id : string , props ?: EventMonitoringProps ) {
1821 super ( scope , id ) ;
1922
20- // Initialize your construct logic here
21- }
23+ // Create alarms on API Gateway
24+ props ?. producer . api . metricClientError ( ) . createAlarm ( scope , 'HighClientErrorAlarm' , {
25+ evaluationPeriods : 3 ,
26+ threshold : 20 ,
27+ alarmDescription : 'Alert when client error rate exceeds 20%'
28+ } ) ;
29+
30+ props ?. producer . api . metricServerError ( ) . createAlarm ( scope , 'HighServerErrorAlarm' , {
31+ evaluationPeriods : 3 ,
32+ threshold : 1 ,
33+ alarmDescription : 'Alert when server error rate exceeds 1%'
34+ } ) ;
35+
36+ props ?. producer . api . metricLatency ( ) . createAlarm ( scope , 'HighLatencyAlarm' , {
37+ evaluationPeriods : 3 ,
38+ threshold : 1000 ,
39+ alarmDescription : 'Alert when latency exceeds 1 second'
40+ } ) ;
41+
42+ // Create metrics on EventBridge Bus
43+ const busInvocations = new Metric ( {
44+ namespace : 'AWS/Events' ,
45+ metricName : 'Invocations' ,
46+ dimensionsMap : {
47+ EventBusName : props ?. router . bus . eventBusName !
48+ } ,
49+ period : cdk . Duration . minutes ( 5 ) ,
50+ statistic : 'Sum'
51+ } ) ;
52+
53+ const busFailedInvocations = new Metric ( {
54+ namespace : 'AWS/Events' ,
55+ metricName : 'FailedInvocations' ,
56+ dimensionsMap : {
57+ EventBusName : props ?. router . bus . eventBusName !
58+ } ,
59+ period : cdk . Duration . minutes ( 5 ) ,
60+ statistic : 'Sum'
61+ } ) ;
62+
63+ // Create alarms on EventBridge Bus
64+ new Alarm ( this , 'EventBusFailedInvocationsAlarm' , {
65+ metric : busFailedInvocations ,
66+ threshold : 1 ,
67+ evaluationPeriods : 3 ,
68+ datapointsToAlarm : 2 ,
69+ alarmDescription : 'Alert when EventBridge bus has failed invocations' ,
70+ treatMissingData : TreatMissingData . NOT_BREACHING
71+ } ) ;
72+
73+ // For each rule, create metrics and alarms
74+ props ?. router . rules ?. forEach ( ( rule , index ) => {
75+ const ruleInvocations = new Metric ( {
76+ namespace : 'AWS/Events' ,
77+ metricName : 'Invocations' ,
78+ dimensionsMap : {
79+ RuleName : rule . ruleName
80+ } ,
81+ period : cdk . Duration . minutes ( 5 ) ,
82+ statistic : 'Sum'
83+ } ) ;
84+
85+ const ruleFailedInvocations = new Metric ( {
86+ namespace : 'AWS/Events' ,
87+ metricName : 'FailedInvocations' ,
88+ dimensionsMap : {
89+ RuleName : rule . ruleName
90+ } ,
91+ period : cdk . Duration . minutes ( 5 ) ,
92+ statistic : 'Sum'
93+ } ) ;
94+
95+ const ruleThrottledRules = new Metric ( {
96+ namespace : 'AWS/Events' ,
97+ metricName : 'ThrottledRules' ,
98+ dimensionsMap : {
99+ RuleName : rule . ruleName
100+ } ,
101+ period : cdk . Duration . minutes ( 5 ) ,
102+ statistic : 'Sum'
103+ } ) ;
104+
105+ // Rule Failed Invocations Alarm
106+ new Alarm ( this , `RuleFailedInvocationsAlarm-${ index } ` , {
107+ metric : ruleFailedInvocations ,
108+ threshold : 1 ,
109+ evaluationPeriods : 2 ,
110+ datapointsToAlarm : 2 ,
111+ alarmDescription : `Alert when EventBridge rule ${ rule . ruleName } has failed invocations` ,
112+ treatMissingData : TreatMissingData . NOT_BREACHING
113+ } ) ;
22114
23- // Add any helper methods here
115+ // Rule Throttled Events Alarm
116+ new Alarm ( this , `RuleThrottledEventsAlarm-${ index } ` , {
117+ metric : ruleThrottledRules ,
118+ threshold : 1 ,
119+ evaluationPeriods : 1 ,
120+ alarmDescription : `Alert when EventBridge rule ${ rule . ruleName } is being throttled` ,
121+ treatMissingData : TreatMissingData . NOT_BREACHING
122+ } ) ;
123+ } ) ;
124+
125+ // Create alarms for SNS topics
126+ props ?. router . topics ?. forEach ( ( topic , index ) => {
127+ const topicNumberOfNotificationsFailed = topic . metricNumberOfNotificationsFailed ( ) ;
128+
129+ new Alarm ( this , `TopicFailedNotificationsAlarm-${ index } ` , {
130+ threshold : 1 ,
131+ evaluationPeriods : 1 ,
132+ alarmDescription : `Alert when any notifications fail to deliver for topic ${ topic . topicName } ` ,
133+ comparisonOperator : cdk . aws_cloudwatch . ComparisonOperator . GREATER_THAN_THRESHOLD ,
134+ metric : topicNumberOfNotificationsFailed
135+ } ) ;
136+ } ) ;
137+
138+ // Create alarms for Consumer SQS queues
139+ props ?. consumers ?. forEach ( ( consumer , index ) => {
140+ const queueApproximateNumberOfMessagesVisible = consumer . queue . metricApproximateNumberOfMessagesVisible ( ) ;
141+
142+ new Alarm ( this , `QueueApproximateNumberOfMessagesVisibleAlarm-${ index } ` , {
143+ threshold : 100 ,
144+ evaluationPeriods : 1 ,
145+ alarmDescription : `Alert when the approximate number of messages visible in the queue ${ consumer . queue . queueName } exceeds 100` ,
146+ comparisonOperator : cdk . aws_cloudwatch . ComparisonOperator . GREATER_THAN_THRESHOLD ,
147+ metric : queueApproximateNumberOfMessagesVisible
148+ } ) ;
149+
150+ const queueApproximateAgeOfOldestMessage = consumer . queue . metricApproximateAgeOfOldestMessage ( ) ;
151+ new Alarm ( this , `QueueApproximateAgeOfOldestMessageAlarm-${ index } ` , {
152+ threshold : 900 ,
153+ evaluationPeriods : 1 ,
154+ alarmDescription : `Alert when the approximate age of the oldest message in the queue ${ consumer . queue . queueName } exceeds 900 seconds` ,
155+ comparisonOperator : cdk . aws_cloudwatch . ComparisonOperator . GREATER_THAN_THRESHOLD ,
156+ metric : queueApproximateAgeOfOldestMessage
157+ } ) ;
158+ } ) ;
159+
160+ // Create alarms on Dead Letter Queues
161+ props ?. deadLetterQueues . forEach ( ( queue , index ) => {
162+ const queueApproximateNumberOfMessagesVisible = queue . metricApproximateNumberOfMessagesVisible ( ) ;
163+
164+ new Alarm ( this , `DeadLetterQueueApproximateNumberOfMessagesVisibleAlarm-${ index } ` , {
165+ threshold : 1 ,
166+ evaluationPeriods : 1 ,
167+ alarmDescription : `Alert when the approximate number of messages visible in the dead letter queue ${ queue . queueName } exceeds 1` ,
168+ comparisonOperator : cdk . aws_cloudwatch . ComparisonOperator . GREATER_THAN_THRESHOLD ,
169+ metric : queueApproximateNumberOfMessagesVisible
170+ } ) ;
171+
172+ // make an alarm on the age of the oldest message > 1 day
173+ const queueApproximateAgeOfOldestMessage = queue . metricApproximateAgeOfOldestMessage ( ) ;
174+ new Alarm ( this , `DeadLetterQueueApproximateAgeOfOldestMessageAlarm-${ index } ` , {
175+ threshold : 86400 ,
176+ evaluationPeriods : 1 ,
177+ alarmDescription : `Alert when the approximate age of the oldest message in the dead letter queue ${ queue . queueName } exceeds 1 day` ,
178+ comparisonOperator : cdk . aws_cloudwatch . ComparisonOperator . GREATER_THAN_THRESHOLD ,
179+ metric : queueApproximateAgeOfOldestMessage
180+ } ) ;
181+ } ) ;
182+ }
24183}
0 commit comments