Skip to content

Commit 18d551d

Browse files
committed
alarms to terraform
1 parent 9041c1e commit 18d551d

File tree

7 files changed

+149
-158
lines changed

7 files changed

+149
-158
lines changed

cloudformation/alerting.yml

Lines changed: 0 additions & 113 deletions
This file was deleted.

cloudformation/logs.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,8 @@
11
AWSTemplateFormatVersion: '2010-09-09'
22
Description: Stack Log Groups
33
Transform: AWS::Serverless-2016-10-31
4-
Parameters:
5-
LambdaFunctionName:
6-
Type: String
7-
AllowedPattern: ^[a-zA-Z0-9]+[a-zA-Z0-9-]+[a-zA-Z0-9]+$
8-
LogRetentionDays:
9-
Type: Number
4+
105
Resources:
11-
AppApiLambdaLogGroup:
12-
Type: AWS::Logs::LogGroup
13-
DeletionPolicy: Retain
14-
UpdateReplacePolicy: Retain
15-
Properties:
16-
LogGroupName:
17-
Fn::Sub: /aws/lambda/${LambdaFunctionName}
18-
RetentionInDays:
19-
Ref: LogRetentionDays
206
AppAuditLog:
217
Type: "AWS::DynamoDB::Table"
228
DeletionPolicy: "Retain"

cloudformation/main.yml

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,6 @@ Parameters:
66
RunEnvironment:
77
Type: String
88
AllowedValues: ["dev", "prod"]
9-
AlertSNSArn:
10-
Description: SNS Queue to send general alarm alerts to (prod only)
11-
Type: String
12-
Default: arn:aws:sns:us-east-1:298118738376:infra-monitor-alerts
13-
PriorityAlertSNSArn:
14-
Description: SNS Queue to send priority alarm alerts to (prod only)
15-
Type: String
16-
Default: arn:aws:sns:us-east-1:298118738376:infra-core-api-priority-alerts
179
ApplicationPrefix:
1810
Type: String
1911
Description: Application prefix, no ending dash
@@ -46,10 +38,8 @@ Conditions:
4638
Mappings:
4739
General:
4840
dev:
49-
LogRetentionDays: 7
5041
SesDomain: "aws.qa.acmuiuc.org"
5142
prod:
52-
LogRetentionDays: 90
5343
SesDomain: "acm.illinois.edu"
5444
ApiGwConfig:
5545
dev:
@@ -103,10 +93,6 @@ Resources:
10393
Type: AWS::Serverless::Application
10494
Properties:
10595
Location: ./logs.yml
106-
Parameters:
107-
LambdaFunctionName: !Sub ${ApplicationPrefix}-lambda
108-
LogRetentionDays:
109-
!FindInMap [General, !Ref RunEnvironment, LogRetentionDays]
11096

11197
AppSQSQueues:
11298
Type: AWS::Serverless::Application
@@ -116,18 +102,6 @@ Resources:
116102
QueueName: !Sub ${ApplicationPrefix}-sqs
117103
MessageTimeout: !Ref SqsMessageTimeout
118104

119-
AppAlarms:
120-
Condition: IsProd
121-
Type: AWS::Serverless::Application
122-
Properties:
123-
Location: ./alerting.yml
124-
Parameters:
125-
AlertSNSArn: !Ref AlertSNSArn
126-
PriorityAlertSNSArn: !Ref PriorityAlertSNSArn
127-
ApplicationPrefix: !Ref ApplicationPrefix
128-
ApplicationFriendlyName: !Ref ApplicationFriendlyName
129-
MainCloudfrontDistributionId: !GetAtt AppFrontendCloudfrontDistribution.Id
130-
131105
LinkryRecordSetv4:
132106
Condition: IsDev
133107
Type: AWS::Route53::RecordSet

terraform/envs/prod/main.tf

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,15 @@ provider "aws" {
1818
}
1919
}
2020

21-
import {
22-
to = aws_cloudwatch_log_group.main_app_logs
23-
id = "/aws/lambda/${var.ProjectId}-lambda"
24-
}
2521
resource "aws_cloudwatch_log_group" "main_app_logs" {
2622
name = "/aws/lambda/${var.ProjectId}-lambda"
2723
retention_in_days = var.LogRetentionDays
2824
}
25+
26+
module "app_alarms" {
27+
source = "../../modules/alarms"
28+
main_cloudfront_distribution_id = var.main_cloudfront_distribution_id
29+
resource_prefix = var.ProjectId
30+
priority_sns_arn = var.GeneralSNSAlertArn
31+
standard_sns_arn = var.PrioritySNSAlertArn
32+
}

terraform/envs/prod/variables.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,17 @@ variable "ProjectId" {
88
default = "infra-core-api"
99
}
1010

11+
variable "main_cloudfront_distribution_id" {
12+
type = string
13+
description = "(temporary) ID for the cloudfront distribution that serves the main application"
14+
}
15+
16+
variable "GeneralSNSAlertArn" {
17+
type = string
18+
default = "arn:aws:sns:us-east-1:298118738376:infra-monitor-alerts"
19+
}
20+
21+
variable "PrioritySNSAlertArn" {
22+
type = string
23+
default = "arn:aws:sns:us-east-1:298118738376:infra-core-api-priority-alerts"
24+
}

terraform/modules/alarms/main.tf

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
provider "aws" {
2+
}
3+
4+
resource "aws_cloudwatch_metric_alarm" "app_dlq_messages_alarm" {
5+
alarm_name = "${var.resource_prefix}-sqs-dlq-present"
6+
alarm_description = "Items are present in the application DLQ, meaning some messages failed to process."
7+
namespace = "AWS/SQS"
8+
metric_name = "ApproximateNumberOfMessagesVisible"
9+
statistic = "Maximum"
10+
period = 60
11+
evaluation_periods = 1
12+
comparison_operator = "GreaterThanThreshold"
13+
threshold = 0
14+
dimensions = [
15+
{
16+
Name = "QueueName"
17+
Value = "${var.resource_prefix}-sqs-dlq"
18+
}
19+
]
20+
alarm_actions = [
21+
var.priority_sns_arn
22+
]
23+
}
24+
25+
resource "aws_cloudwatch_metric_alarm" "app_latency_alarm" {
26+
alarm_name = "${var.resource_prefix}-latency-high"
27+
alarm_description = "Trailing Mean - 95% API gateway latency is > 1.25s for 2 times in 4 minutes."
28+
namespace = "AWS/Lambda"
29+
metric_name = "UrlRequestLatency"
30+
extended_statistic = "tm95"
31+
period = "120"
32+
evaluation_periods = "2"
33+
comparison_operator = "GreaterThanThreshold"
34+
threshold = "1250"
35+
alarm_actions = [
36+
var.standard_sns_arn
37+
]
38+
dimensions = [
39+
{
40+
Name = "FunctionName"
41+
Value = "${var.resource_prefix}-lambda"
42+
}
43+
]
44+
}
45+
46+
resource "aws_cloudwatch_metric_alarm" "app_no_requests_alarm" {
47+
alarm_name = "${var.resource_prefix}-no-requests"
48+
alarm_description = "No requests have been received in the past 5 minutes."
49+
namespace = "AWS/Lambda"
50+
metric_name = "UrlRequestCount"
51+
statistic = "Sum"
52+
period = "300"
53+
evaluation_periods = "1"
54+
comparison_operator = "LessThanThreshold"
55+
threshold = "1"
56+
alarm_actions = [
57+
var.priority_sns_arn
58+
]
59+
dimensions = [
60+
{
61+
Name = "FunctionName"
62+
Value = "${var.resource_prefix}-lambda"
63+
}
64+
]
65+
}
66+
67+
resource "aws_cloudwatch_metric_alarm" "app_invocation_error_alarm" {
68+
alarm_name = "${var.resource_prefix}-error-invocation"
69+
alarm_description = "Lambda threw an error, meaning the init of the application itself has encountered an error"
70+
namespace = "AWS/Lambda"
71+
metric_name = "Errors"
72+
statistic = "Sum"
73+
period = "300"
74+
evaluation_periods = "1"
75+
comparison_operator = "GreaterThanThreshold"
76+
threshold = "1"
77+
alarm_actions = [
78+
var.priority_sns_arn
79+
]
80+
dimensions = [
81+
{
82+
Name = "FunctionName"
83+
Value = "${var.resource_prefix}-lambda"
84+
}
85+
]
86+
}
87+
88+
resource "aws_cloudwatch_metric_alarm" "app5xx_error_alarm" {
89+
alarm_name = "${var.resource_prefix}-cloudfront-5xx-error"
90+
alarm_description = "Main application responses are more than 1% 5xx errors (from Cloudfront)"
91+
namespace = "AWS/CloudFront"
92+
metric_name = "5xxErrorRate"
93+
statistic = "Average"
94+
period = "300"
95+
evaluation_periods = "1"
96+
comparison_operator = "GreaterThanThreshold"
97+
threshold = "1"
98+
alarm_actions = [
99+
var.priority_sns_arn
100+
]
101+
dimensions = [
102+
{
103+
Name = "DistributionId"
104+
Value = var.main_cloudfront_distribution_id
105+
}
106+
]
107+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
variable "resource_prefix" {
2+
type = string
3+
description = "Prefix before each resource"
4+
}
5+
6+
variable "priority_sns_arn" {
7+
type = string
8+
description = "Priority SNS alerts ARN"
9+
}
10+
11+
variable "standard_sns_arn" {
12+
type = string
13+
description = "Standard SNS alerts ARN"
14+
}
15+
16+
variable "main_cloudfront_distribution_id" {
17+
type = string
18+
description = "ID for the cloudfront distribution that serves the main application"
19+
}

0 commit comments

Comments
 (0)