|
| 1 | +locals { |
| 2 | + monitored_queues = { |
| 3 | + # main queues |
| 4 | + "nrl_main" = module.sqs-nrl-queue.sqs_name |
| 5 | + "stitching_main" = module.sqs-stitching-queue.sqs_name |
| 6 | + "lg_bulk_main" = module.sqs-lg-bulk-upload-metadata-queue.sqs_name |
| 7 | + "lg_inv_main" = module.sqs-lg-bulk-upload-invalid-queue.sqs_name |
| 8 | + "mns_main" = module.sqs-mns-notification-queue[0].sqs_name |
| 9 | + # dead-letter queues |
| 10 | + "nrl_dlq" = module.sqs-nrl-queue.dlq_name |
| 11 | + "stitching_dlq" = module.sqs-stitching-queue.dlq_name |
| 12 | + "mns_dlq" = module.sqs-mns-notification-queue[0].dlq_name |
| 13 | + } |
| 14 | + |
| 15 | + |
| 16 | + days_until_alarm = [ |
| 17 | + [6, "medium"], |
| 18 | + [10, "high"] |
| 19 | + ] |
| 20 | + |
| 21 | + flat_list = flatten([ |
| 22 | + for queue_key in keys(local.monitored_queues) : [ |
| 23 | + for day in local.days_until_alarm : [ |
| 24 | + queue_key, |
| 25 | + local.monitored_queues[queue_key], |
| 26 | + day[0], #day |
| 27 | + day[1] #severity |
| 28 | + ] |
| 29 | + ] |
| 30 | + ]) |
| 31 | + |
| 32 | + monitored_queue_day_list = [ |
| 33 | + for i in range(0, length(local.flat_list), 4) : [ |
| 34 | + local.flat_list[i], # key |
| 35 | + local.flat_list[i + 1], # queue name |
| 36 | + local.flat_list[i + 2], # day |
| 37 | + local.flat_list[i + 3], # severity |
| 38 | + ] |
| 39 | + ] |
| 40 | +} |
| 41 | + |
| 42 | + |
| 43 | +module "global_sqs_age_alarm_topic" { |
| 44 | + count = local.is_sandbox ? 0 : 1 |
| 45 | + source = "./modules/sns" |
| 46 | + sns_encryption_key_id = module.sns_encryption_key.id |
| 47 | + topic_name = "global-sqs-age-alarm-topic" |
| 48 | + topic_protocol = "email" |
| 49 | + is_topic_endpoint_list = true |
| 50 | + topic_endpoint_list = nonsensitive(split(",", data.aws_ssm_parameter.cloud_security_notification_email_list.value)) |
| 51 | + |
| 52 | + delivery_policy = jsonencode({ |
| 53 | + "Version" : "2012-10-17", |
| 54 | + "Statement" : [ |
| 55 | + { |
| 56 | + "Effect" : "Allow", |
| 57 | + "Principal" : { |
| 58 | + "Service" : "cloudwatch.amazonaws.com" |
| 59 | + }, |
| 60 | + "Action" : "SNS:Publish", |
| 61 | + "Condition" : { |
| 62 | + "ArnLike" : { |
| 63 | + "aws:SourceArn" : "arn:aws:cloudwatch:eu-west-2:${data.aws_caller_identity.current.account_id}:alarm:*" |
| 64 | + } |
| 65 | + }, |
| 66 | + "Resource" : "*" |
| 67 | + } |
| 68 | + ] |
| 69 | + }) |
| 70 | +} |
| 71 | + |
| 72 | + |
| 73 | +resource "aws_cloudwatch_metric_alarm" "sqs_oldest_message" { |
| 74 | + count = local.is_sandbox ? 0 : length(local.monitored_queue_day_list) |
| 75 | + |
| 76 | + alarm_name = "${terraform.workspace}_${local.monitored_queue_day_list[count.index][0]}_oldest_message_alarm_${local.monitored_queue_day_list[count.index][2]}d" |
| 77 | + comparison_operator = "GreaterThanThreshold" |
| 78 | + evaluation_periods = 1 |
| 79 | + metric_name = "ApproximateAgeOfOldestMessage" |
| 80 | + namespace = "AWS/SQS" |
| 81 | + period = 86400 |
| 82 | + statistic = "Maximum" |
| 83 | + threshold = local.monitored_queue_day_list[count.index][2] * 24 * 60 * 60 |
| 84 | + treat_missing_data = "notBreaching" |
| 85 | + |
| 86 | + dimensions = { |
| 87 | + QueueName = local.monitored_queue_day_list[count.index][1] |
| 88 | + } |
| 89 | + |
| 90 | + alarm_description = "Alarm when a message in queue '${local.monitored_queue_day_list[count.index][1]}' is older than '${local.monitored_queue_day_list[count.index][2]}' days." |
| 91 | + |
| 92 | + alarm_actions = [module.sqs_alarm_lambda_topic.arn] |
| 93 | + ok_actions = [module.sqs_alarm_lambda_topic.arn] |
| 94 | + |
| 95 | + tags = { |
| 96 | + Name = "${terraform.workspace}_${local.monitored_queue_day_list[count.index][0]}_oldest_message_alarm_${local.monitored_queue_day_list[count.index][2]}d" |
| 97 | + severity = local.monitored_queue_day_list[count.index][3] |
| 98 | + alarm_group = local.monitored_queue_day_list[count.index][1] |
| 99 | + alarm_metric = "ApproximateAgeOfOldestMessage" |
| 100 | + is_kpi = "true" |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +module "sqs_alarm_lambda_topic" { |
| 105 | + source = "./modules/sns" |
| 106 | + sns_encryption_key_id = module.sns_encryption_key.id |
| 107 | + topic_name = "sqs-alarms-to-lambda-topic" |
| 108 | + topic_protocol = "lambda" |
| 109 | + topic_endpoint = module.im-alerting-lambda.lambda_arn |
| 110 | + |
| 111 | + delivery_policy = jsonencode({ |
| 112 | + "Version" : "2012-10-17", |
| 113 | + "Statement" : [ |
| 114 | + { |
| 115 | + "Effect" : "Allow", |
| 116 | + "Principal" : { |
| 117 | + "Service" : "cloudwatch.amazonaws.com" |
| 118 | + }, |
| 119 | + "Action" : ["SNS:Publish"], |
| 120 | + "Condition" : { |
| 121 | + "ArnLike" : { |
| 122 | + "aws:SourceArn" : "arn:aws:cloudwatch:eu-west-2:${data.aws_caller_identity.current.account_id}:alarm:*" |
| 123 | + } |
| 124 | + }, |
| 125 | + "Resource" : "*" |
| 126 | + } |
| 127 | + ] |
| 128 | + }) |
| 129 | +} |
| 130 | + |
| 131 | +resource "aws_lambda_permission" "sqs_im_alerting" { |
| 132 | + statement_id = "AllowExecutionFromSQSAlarmSNS" |
| 133 | + action = "lambda:InvokeFunction" |
| 134 | + function_name = module.im-alerting-lambda.lambda_arn |
| 135 | + principal = "sns.amazonaws.com" |
| 136 | + source_arn = module.sqs_alarm_lambda_topic.arn |
| 137 | +} |
0 commit comments