Skip to content

Commit 550546e

Browse files
PedroSoaresNHSsteph-torres-nhsMohammadIqbalAD-NHS
authored
[PRMT-466] Create alarm for old messages in the queue (#341)
--------- Co-authored-by: steph-torres-nhs <[email protected]> Co-authored-by: Mohammad Iqbal <[email protected]>
1 parent b8d81d5 commit 550546e

File tree

5 files changed

+179
-38
lines changed

5 files changed

+179
-38
lines changed

infrastructure/alarms.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,4 +81,4 @@ resource "aws_sns_topic_subscription" "alarm_notifications_sns_topic_subscriptio
8181

8282
data "aws_ssm_parameter" "cloud_security_notification_email_list" {
8383
name = "/prs/${var.environment}/user-input/cloud-security-notification-email-list"
84-
}
84+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
output "endpoint" {
2+
description = "The SQS queue ARN e.g. for use when setting the queue as the endpoint of an SNS topic."
3+
value = aws_sqs_queue.sqs_queue.arn
4+
}
5+
6+
output "sqs_arn" {
7+
description = "Amazon Resource Name (ARN) of the primary SQS queue."
8+
value = aws_sqs_queue.sqs_queue.arn
9+
}
10+
11+
output "sqs_id" {
12+
description = "ID of the main SQS queue."
13+
value = aws_sqs_queue.sqs_queue.id
14+
}
15+
16+
output "sqs_url" {
17+
description = "URL of the SQS queue for use with API clients or AWS SDKs."
18+
value = aws_sqs_queue.sqs_queue.url
19+
}
20+
21+
output "sqs_read_policy_document" {
22+
description = "IAM policy document granting read access to the SQS queue."
23+
value = data.aws_iam_policy_document.sqs_read_policy.json
24+
}
25+
26+
output "sqs_write_policy_document" {
27+
description = "IAM policy document granting write access to the SQS queue."
28+
value = data.aws_iam_policy_document.sqs_write_policy.json
29+
}
30+
31+
output "dlq_name" {
32+
description = "Name of the dead-letter queue (DLQ), if created."
33+
value = var.enable_dlq ? aws_sqs_queue.queue_deadletter[0].name : null
34+
}
35+
36+
output "sqs_name" {
37+
description = "Name of the queue"
38+
value = aws_sqs_queue.sqs_queue.name
39+
}

infrastructure/modules/sqs/variable.tf

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -90,39 +90,4 @@ variable "environment" {
9090
variable "owner" {
9191
description = "Owner tag used for identifying resource ownership."
9292
type = string
93-
}
94-
95-
output "endpoint" {
96-
description = "The SQS queue ARN e.g. for use when setting the queue as the endpoint of an SNS topic."
97-
value = aws_sqs_queue.sqs_queue.arn
98-
}
99-
100-
output "sqs_arn" {
101-
description = "Amazon Resource Name (ARN) of the primary SQS queue."
102-
value = aws_sqs_queue.sqs_queue.arn
103-
}
104-
105-
output "sqs_id" {
106-
description = "ID of the main SQS queue."
107-
value = aws_sqs_queue.sqs_queue.id
108-
}
109-
110-
output "sqs_url" {
111-
description = "URL of the SQS queue for use with API clients or AWS SDKs."
112-
value = aws_sqs_queue.sqs_queue.url
113-
}
114-
115-
output "sqs_read_policy_document" {
116-
description = "IAM policy document granting read access to the SQS queue."
117-
value = data.aws_iam_policy_document.sqs_read_policy.json
118-
}
119-
120-
output "sqs_write_policy_document" {
121-
description = "IAM policy document granting write access to the SQS queue."
122-
value = data.aws_iam_policy_document.sqs_write_policy.json
123-
}
124-
125-
output "dlq_name" {
126-
description = "Name of the dead-letter queue (DLQ), if created."
127-
value = var.enable_dlq ? aws_sqs_queue.queue_deadletter[0].name : null
128-
}
93+
}

infrastructure/sqs_alarms.tf

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
locals {
2+
monitored_queues = {
3+
# main queues
4+
"nrl_main" = module.sqs-nrl-queue.sqs_name
5+
"stitching_main" = module.sqs-stitching-queue.sqs_name
6+
"lg_bulk_main" = module.sqs-lg-bulk-upload-metadata-queue.sqs_name
7+
"lg_inv_main" = module.sqs-lg-bulk-upload-invalid-queue.sqs_name
8+
"mns_main" = module.sqs-mns-notification-queue[0].sqs_name
9+
# dead-letter queues
10+
"nrl_dlq" = module.sqs-nrl-queue.dlq_name
11+
"stitching_dlq" = module.sqs-stitching-queue.dlq_name
12+
"mns_dlq" = module.sqs-mns-notification-queue[0].dlq_name
13+
}
14+
15+
16+
days_until_alarm = [
17+
[6, "medium"],
18+
[10, "high"]
19+
]
20+
21+
flat_list = flatten([
22+
for queue_key in keys(local.monitored_queues) : [
23+
for day in local.days_until_alarm : [
24+
queue_key,
25+
local.monitored_queues[queue_key],
26+
day[0], #day
27+
day[1] #severity
28+
]
29+
]
30+
])
31+
32+
monitored_queue_day_list = [
33+
for i in range(0, length(local.flat_list), 4) : [
34+
local.flat_list[i], # key
35+
local.flat_list[i + 1], # queue name
36+
local.flat_list[i + 2], # day
37+
local.flat_list[i + 3], # severity
38+
]
39+
]
40+
}
41+
42+
43+
module "global_sqs_age_alarm_topic" {
44+
count = local.is_sandbox ? 0 : 1
45+
source = "./modules/sns"
46+
sns_encryption_key_id = module.sns_encryption_key.id
47+
topic_name = "global-sqs-age-alarm-topic"
48+
topic_protocol = "email"
49+
is_topic_endpoint_list = true
50+
topic_endpoint_list = nonsensitive(split(",", data.aws_ssm_parameter.cloud_security_notification_email_list.value))
51+
52+
delivery_policy = jsonencode({
53+
"Version" : "2012-10-17",
54+
"Statement" : [
55+
{
56+
"Effect" : "Allow",
57+
"Principal" : {
58+
"Service" : "cloudwatch.amazonaws.com"
59+
},
60+
"Action" : "SNS:Publish",
61+
"Condition" : {
62+
"ArnLike" : {
63+
"aws:SourceArn" : "arn:aws:cloudwatch:eu-west-2:${data.aws_caller_identity.current.account_id}:alarm:*"
64+
}
65+
},
66+
"Resource" : "*"
67+
}
68+
]
69+
})
70+
}
71+
72+
73+
resource "aws_cloudwatch_metric_alarm" "sqs_oldest_message" {
74+
count = local.is_sandbox ? 0 : length(local.monitored_queue_day_list)
75+
76+
alarm_name = "${terraform.workspace}_${local.monitored_queue_day_list[count.index][0]}_oldest_message_alarm_${local.monitored_queue_day_list[count.index][2]}d"
77+
comparison_operator = "GreaterThanThreshold"
78+
evaluation_periods = 1
79+
metric_name = "ApproximateAgeOfOldestMessage"
80+
namespace = "AWS/SQS"
81+
period = 86400
82+
statistic = "Maximum"
83+
threshold = local.monitored_queue_day_list[count.index][2] * 24 * 60 * 60
84+
treat_missing_data = "notBreaching"
85+
86+
dimensions = {
87+
QueueName = local.monitored_queue_day_list[count.index][1]
88+
}
89+
90+
alarm_description = "Alarm when a message in queue '${local.monitored_queue_day_list[count.index][1]}' is older than '${local.monitored_queue_day_list[count.index][2]}' days."
91+
92+
alarm_actions = [module.sqs_alarm_lambda_topic.arn]
93+
ok_actions = [module.sqs_alarm_lambda_topic.arn]
94+
95+
tags = {
96+
Name = "${terraform.workspace}_${local.monitored_queue_day_list[count.index][0]}_oldest_message_alarm_${local.monitored_queue_day_list[count.index][2]}d"
97+
severity = local.monitored_queue_day_list[count.index][3]
98+
alarm_group = local.monitored_queue_day_list[count.index][1]
99+
alarm_metric = "ApproximateAgeOfOldestMessage"
100+
is_kpi = "true"
101+
}
102+
}
103+
104+
module "sqs_alarm_lambda_topic" {
105+
source = "./modules/sns"
106+
sns_encryption_key_id = module.sns_encryption_key.id
107+
topic_name = "sqs-alarms-to-lambda-topic"
108+
topic_protocol = "lambda"
109+
topic_endpoint = module.im-alerting-lambda.lambda_arn
110+
111+
delivery_policy = jsonencode({
112+
"Version" : "2012-10-17",
113+
"Statement" : [
114+
{
115+
"Effect" : "Allow",
116+
"Principal" : {
117+
"Service" : "cloudwatch.amazonaws.com"
118+
},
119+
"Action" : ["SNS:Publish"],
120+
"Condition" : {
121+
"ArnLike" : {
122+
"aws:SourceArn" : "arn:aws:cloudwatch:eu-west-2:${data.aws_caller_identity.current.account_id}:alarm:*"
123+
}
124+
},
125+
"Resource" : "*"
126+
}
127+
]
128+
})
129+
}
130+
131+
resource "aws_lambda_permission" "sqs_im_alerting" {
132+
statement_id = "AllowExecutionFromSQSAlarmSNS"
133+
action = "lambda:InvokeFunction"
134+
function_name = module.im-alerting-lambda.lambda_arn
135+
principal = "sns.amazonaws.com"
136+
source_arn = module.sqs_alarm_lambda_topic.arn
137+
}

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)