Skip to content

Commit a419f5c

Browse files
committed
Update alarms
1 parent 1fdd305 commit a419f5c

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

terraform/envs/qa/variables.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ variable "EmailDomain" {
3535

3636
variable "GeneralSNSAlertArn" {
3737
type = string
38-
default = "arn:aws:sns:us-east-1:427040638965:infra-monitor-alerts:6556b66f-de43-49cd-9dee-5796b07b4ae8"
38+
default = "arn:aws:sns:us-east-1:427040638965:infra-monitor-alerts"
3939
}
4040

4141
variable "PrioritySNSAlertArn" {
4242
type = string
43-
default = "arn:aws:sns:us-east-1:427040638965:infra-monitor-alerts:6556b66f-de43-49cd-9dee-5796b07b4ae8"
43+
default = "arn:aws:sns:us-east-1:427040638965:infra-monitor-alerts"
4444
}

terraform/modules/alarms/main.tf

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ resource "aws_cloudwatch_metric_alarm" "app_dlq_messages_alarm" {
1111
alarm_description = "Items are present in the application DLQ, meaning some messages failed to process."
1212
namespace = "AWS/SQS"
1313
metric_name = "ApproximateNumberOfMessagesVisible"
14-
statistic = "Maximum"
14+
statistic = "Sum"
1515
period = 60
1616
evaluation_periods = 1
1717
comparison_operator = "GreaterThanThreshold"
@@ -22,19 +22,20 @@ resource "aws_cloudwatch_metric_alarm" "app_dlq_messages_alarm" {
2222
alarm_actions = [
2323
var.priority_sns_arn
2424
]
25+
treat_missing_data = "notBreaching"
2526
}
2627

2728
resource "aws_cloudwatch_metric_alarm" "app_latency_alarm" {
2829
for_each = var.performance_noreq_lambdas
2930
alarm_name = "${each.value}-latency-high"
30-
alarm_description = "${replace(each.value, var.resource_prefix, "")} Trailing Mean - 95% API gateway latency is > 1.25s for 2 times in 4 minutes."
31+
alarm_description = "${replace(each.value, "${var.resource_prefix}-", "")} Trailing Mean - 95% API gateway latency is > 1.5s for 2 times in 4 minutes."
3132
namespace = "AWS/Lambda"
3233
metric_name = "UrlRequestLatency"
3334
extended_statistic = "tm95"
3435
period = "120"
3536
evaluation_periods = "2"
3637
comparison_operator = "GreaterThanThreshold"
37-
threshold = "1250"
38+
threshold = "1500"
3839
alarm_actions = [
3940
var.standard_sns_arn
4041
]
@@ -46,7 +47,7 @@ resource "aws_cloudwatch_metric_alarm" "app_latency_alarm" {
4647
resource "aws_cloudwatch_metric_alarm" "app_no_requests_alarm" {
4748
for_each = var.performance_noreq_lambdas
4849
alarm_name = "${each.value}-no-requests"
49-
alarm_description = "${replace(each.value, var.resource_prefix, "")}: no requests have been received in the past 5 minutes."
50+
alarm_description = "${replace(each.value, "${var.resource_prefix}-", "")}: no requests have been received in the past 5 minutes."
5051
namespace = "AWS/Lambda"
5152
metric_name = "UrlRequestCount"
5253
statistic = "Sum"
@@ -65,7 +66,7 @@ resource "aws_cloudwatch_metric_alarm" "app_no_requests_alarm" {
6566
resource "aws_cloudwatch_metric_alarm" "app_invocation_error_alarm" {
6667
for_each = var.all_lambdas
6768
alarm_name = "${each.value}-error-invocation"
68-
alarm_description = "${replace(each.value, var.resource_prefix, "")} lambda threw a critical error."
69+
alarm_description = "${replace(each.value, "${var.resource_prefix}-", "")} lambda threw a critical error."
6970
namespace = "AWS/Lambda"
7071
metric_name = "Errors"
7172
statistic = "Sum"

0 commit comments

Comments
 (0)