Skip to content

Commit 6650a62

Browse files
authored
VED-887: Slack Api Error Alerting (#1066)
* setting up account level config for slack and sns topic alerting
1 parent c30564a commit 6650a62

20 files changed

+133
-34
lines changed

infrastructure/account/batch_processor_errors_sns_topic.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
resource "aws_sns_topic" "batch_processor_errors" {
22
name = "${var.environment}-batch-processor-errors"
3-
kms_master_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.arn
3+
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
44
}
55

66
resource "aws_sns_topic_policy" "batch_processor_errors_topic_policy" {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
resource "aws_chatbot_slack_channel_configuration" "fhir_api_errors" {
2+
configuration_name = "${var.environment}-fhir-api-errors-slack-channel-config"
3+
iam_role_arn = aws_iam_role.fhir_api_errors_chatbot.arn
4+
slack_channel_id = var.environment == "prod" ? "C0A3LPKNKEE" : "C0A4F3G8J0G"
5+
slack_team_id = "TJ00QR03U"
6+
sns_topic_arns = [aws_sns_topic.fhir_api_errors.arn]
7+
}
8+
9+
resource "aws_iam_role" "fhir_api_errors_chatbot" {
10+
name = "${var.environment}-fhir-api-errors-chatbot-channel-role"
11+
assume_role_policy = jsonencode({
12+
Version = "2012-10-17"
13+
Statement = [
14+
{
15+
Action = "sts:AssumeRole"
16+
Effect = "Allow"
17+
Sid = "AssumeChatbotRole"
18+
Principal = {
19+
Service = "chatbot.amazonaws.com"
20+
}
21+
},
22+
]
23+
})
24+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
resource "aws_sns_topic" "fhir_api_errors" {
2+
name = "${var.environment}-fhir-api-errors"
3+
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
4+
}
5+
6+
resource "aws_sns_topic_policy" "fhir_api_errors_topic_policy" {
7+
arn = aws_sns_topic.fhir_api_errors.arn
8+
policy = jsonencode({
9+
Version = "2012-10-17",
10+
Statement = [
11+
{
12+
Sid = "AllowCloudWatchToPublish",
13+
Effect = "Allow",
14+
Principal = {
15+
Service = "cloudwatch.amazonaws.com"
16+
},
17+
Action = "SNS:Publish",
18+
Resource = aws_sns_topic.fhir_api_errors.arn
19+
}
20+
]
21+
})
22+
}

infrastructure/account/kms.tf

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ resource "aws_kms_alias" "id_sync_sqs_encryption" {
179179
target_key_id = aws_kms_key.id_sync_sqs_encryption.key_id
180180
}
181181

182-
resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {
182+
resource "aws_kms_key" "error_alerts_sns_encryption_key" {
183183
description = "KMS key for encrypting the batch processor errors SNS Topic messages"
184184
deletion_window_in_days = 7
185185
enable_key_rotation = true
@@ -218,5 +218,10 @@ resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {
218218

219219
resource "aws_kms_alias" "batch_processor_errors_sns_encryption_key" {
220220
name = "alias/${var.environment}-batch-processor-errors-imms-sns-encryption"
221-
target_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.key_id
221+
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
222+
}
223+
224+
resource "aws_kms_alias" "fhir_api_errors_sns_encryption_key" {
225+
name = "alias/${var.environment}-fhir-api-errors-imms-sns-encryption"
226+
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
222227
}

infrastructure/instance/batch_processor_filter_lambda.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ resource "aws_lambda_event_source_mapping" "batch_file_created_sqs_to_lambda" {
305305
}
306306

307307
resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs" {
308-
count = var.batch_error_notifications_enabled ? 1 : 0
308+
count = var.error_alarm_notifications_enabled ? 1 : 0
309309

310310
name = "${local.short_prefix}-BatchProcessorFilterErrorLogsFilter"
311311
# Ignore errors with the below exception type. This is an expected error which returns items to the queue
@@ -320,7 +320,7 @@ resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs"
320320
}
321321

322322
resource "aws_cloudwatch_metric_alarm" "batch_processor_filter_error_alarm" {
323-
count = var.batch_error_notifications_enabled ? 1 : 0
323+
count = var.error_alarm_notifications_enabled ? 1 : 0
324324

325325
alarm_name = "${local.short_prefix}-batch-processor-filter-lambda-error"
326326
comparison_operator = "GreaterThanOrEqualToThreshold"

infrastructure/instance/ecs_batch_processor_config.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ resource "aws_cloudwatch_log_group" "pipe_log_group" {
372372
}
373373

374374
resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
375-
count = var.batch_error_notifications_enabled ? 1 : 0
375+
count = var.error_alarm_notifications_enabled ? 1 : 0
376376

377377
name = "${local.short_prefix}-RecordProcessorTaskErrorLogsFilter"
378378
pattern = "%ERROR:%"
@@ -386,7 +386,7 @@ resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
386386
}
387387

388388
resource "aws_cloudwatch_metric_alarm" "record_processor_task_error_alarm" {
389-
count = var.batch_error_notifications_enabled ? 1 : 0
389+
count = var.error_alarm_notifications_enabled ? 1 : 0
390390

391391
alarm_name = "${local.short_prefix}-record-processor-task-error"
392392
comparison_operator = "GreaterThanOrEqualToThreshold"

infrastructure/instance/endpoints.tf

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ data "aws_iam_policy_document" "logs_policy_document" {
99
source_policy_documents = [templatefile("${local.policy_path}/log.json", {})]
1010
}
1111
module "get_status" {
12-
source = "./modules/lambda"
13-
prefix = local.prefix
14-
short_prefix = local.short_prefix
15-
function_name = "get_status"
16-
image_uri = module.docker_image.image_uri
17-
policy_json = data.aws_iam_policy_document.logs_policy_document.json
12+
source = "./modules/lambda"
13+
prefix = local.prefix
14+
short_prefix = local.short_prefix
15+
function_name = "get_status"
16+
image_uri = module.docker_image.image_uri
17+
policy_json = data.aws_iam_policy_document.logs_policy_document.json
18+
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
19+
environment = var.environment
1820
}
1921

2022
locals {
@@ -75,14 +77,16 @@ module "imms_event_endpoint_lambdas" {
7577
source = "./modules/lambda"
7678
count = length(local.imms_endpoints)
7779

78-
prefix = local.prefix
79-
short_prefix = local.short_prefix
80-
function_name = local.imms_endpoints[count.index]
81-
image_uri = module.docker_image.image_uri
82-
policy_json = data.aws_iam_policy_document.imms_policy_document.json
83-
environment_variables = local.imms_lambda_env_vars
84-
vpc_subnet_ids = local.private_subnet_ids
85-
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
80+
prefix = local.prefix
81+
short_prefix = local.short_prefix
82+
function_name = local.imms_endpoints[count.index]
83+
image_uri = module.docker_image.image_uri
84+
policy_json = data.aws_iam_policy_document.imms_policy_document.json
85+
environment_variables = local.imms_lambda_env_vars
86+
vpc_subnet_ids = local.private_subnet_ids
87+
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
88+
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
89+
environment = var.environment
8690
}
8791

8892

infrastructure/instance/environments/dev/int/variables.tfvars

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ environment = "dev"
33
immunisation_account_id = "345594581768"
44
dspp_core_account_id = "603871901111"
55
pds_environment = "int"
6-
batch_error_notifications_enabled = true
6+
error_alarm_notifications_enabled = true
77
create_mesh_processor = true
88
has_sub_environment_scope = true

infrastructure/instance/environments/dev/internal-dev/variables.tfvars

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ environment = "dev"
22
immunisation_account_id = "345594581768"
33
dspp_core_account_id = "603871901111"
44
pds_environment = "int"
5-
batch_error_notifications_enabled = true
5+
error_alarm_notifications_enabled = true
66
create_mesh_processor = false
77
has_sub_environment_scope = true

infrastructure/instance/environments/dev/internal-qa/variables.tfvars

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ environment = "dev"
22
immunisation_account_id = "345594581768"
33
dspp_core_account_id = "603871901111"
44
pds_environment = "int"
5-
batch_error_notifications_enabled = false
5+
error_alarm_notifications_enabled = false
66
create_mesh_processor = false
77
has_sub_environment_scope = true

0 commit comments

Comments
 (0)