Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion infrastructure/account/batch_processor_errors_sns_topic.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
resource "aws_sns_topic" "batch_processor_errors" {
name = "${var.environment}-batch-processor-errors"
kms_master_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.arn
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
}

resource "aws_sns_topic_policy" "batch_processor_errors_topic_policy" {
Expand Down
24 changes: 24 additions & 0 deletions infrastructure/account/fhir_api_errors_slack_chatbot.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
resource "aws_chatbot_slack_channel_configuration" "fhir_api_errors" {
configuration_name = "${var.environment}-fhir-api-errors-slack-channel-config"
iam_role_arn = aws_iam_role.fhir_api_errors_chatbot.arn
slack_channel_id = var.environment == "prod" ? "C0A3LPKNKEE" : "C0A4F3G8J0G"
slack_team_id = "TJ00QR03U"
sns_topic_arns = [aws_sns_topic.fhir_api_errors.arn]
}

resource "aws_iam_role" "fhir_api_errors_chatbot" {
name = "${var.environment}-fhir-api-errors-chatbot-channel-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = "AssumeChatbotRole"
Principal = {
Service = "chatbot.amazonaws.com"
}
},
]
})
}
22 changes: 22 additions & 0 deletions infrastructure/account/fhir_api_errors_sns_topic.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
resource "aws_sns_topic" "fhir_api_errors" {
name = "${var.environment}-fhir-api-errors"
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
}

resource "aws_sns_topic_policy" "fhir_api_errors_topic_policy" {
arn = aws_sns_topic.fhir_api_errors.arn
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Sid = "AllowCloudWatchToPublish",
Effect = "Allow",
Principal = {
Service = "cloudwatch.amazonaws.com"
},
Action = "SNS:Publish",
Resource = aws_sns_topic.fhir_api_errors.arn
}
]
})
}
9 changes: 7 additions & 2 deletions infrastructure/account/kms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ resource "aws_kms_alias" "id_sync_sqs_encryption" {
target_key_id = aws_kms_key.id_sync_sqs_encryption.key_id
}

resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {
resource "aws_kms_key" "error_alerts_sns_encryption_key" {
description = "KMS key for encrypting the batch processor errors SNS Topic messages"
deletion_window_in_days = 7
enable_key_rotation = true
Expand Down Expand Up @@ -218,5 +218,10 @@ resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {

resource "aws_kms_alias" "batch_processor_errors_sns_encryption_key" {
name = "alias/${var.environment}-batch-processor-errors-imms-sns-encryption"
target_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.key_id
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
}

resource "aws_kms_alias" "fhir_api_errors_sns_encryption_key" {
name = "alias/${var.environment}-fhir-api-errors-imms-sns-encryption"
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
}
1 change: 1 addition & 0 deletions infrastructure/instance/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions infrastructure/instance/batch_processor_filter_lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ resource "aws_lambda_event_source_mapping" "batch_file_created_sqs_to_lambda" {
}

resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-BatchProcessorFilterErrorLogsFilter"
# Ignore errors with the below exception type. This is an expected error which returns items to the queue
Expand All @@ -320,7 +320,7 @@ resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs"
}

resource "aws_cloudwatch_metric_alarm" "batch_processor_filter_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-batch-processor-filter-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down
4 changes: 2 additions & 2 deletions infrastructure/instance/ecs_batch_processor_config.tf
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ resource "aws_cloudwatch_log_group" "pipe_log_group" {
}

resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-RecordProcessorTaskErrorLogsFilter"
pattern = "%ERROR:%"
Expand All @@ -386,7 +386,7 @@ resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
}

resource "aws_cloudwatch_metric_alarm" "record_processor_task_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-record-processor-task-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down
36 changes: 22 additions & 14 deletions infrastructure/instance/endpoints.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ data "aws_iam_policy_document" "logs_policy_document" {
source_policy_documents = [templatefile("${local.policy_path}/log.json", {})]
}
module "get_status" {
source = "./modules/lambda"
prefix = local.prefix
short_prefix = local.short_prefix
function_name = "get_status"
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.logs_policy_document.json
source = "./modules/lambda"
prefix = local.prefix
short_prefix = local.short_prefix
function_name = "get_status"
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.logs_policy_document.json
aws_sns_topic = data.aws_sns_topic.fhir_api_errors.arn
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
}

locals {
Expand Down Expand Up @@ -57,6 +59,10 @@ data "aws_iam_policy_document" "imms_policy_document" {
]
}

data "aws_sns_topic" "fhir_api_errors" {
name = "${var.environment}-fhir-api-errors"
}

data "aws_iam_policy_document" "imms_data_quality_s3_doc" {
source_policy_documents = [
templatefile("${local.policy_path}/s3_data_quality_access.json", {
Expand All @@ -75,14 +81,16 @@ module "imms_event_endpoint_lambdas" {
source = "./modules/lambda"
count = length(local.imms_endpoints)

prefix = local.prefix
short_prefix = local.short_prefix
function_name = local.imms_endpoints[count.index]
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.imms_policy_document.json
environment_variables = local.imms_lambda_env_vars
vpc_subnet_ids = local.private_subnet_ids
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
prefix = local.prefix
short_prefix = local.short_prefix
function_name = local.imms_endpoints[count.index]
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.imms_policy_document.json
environment_variables = local.imms_lambda_env_vars
vpc_subnet_ids = local.private_subnet_ids
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
aws_sns_topic = data.aws_sns_topic.fhir_api_errors.arn
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = false
error_alarm_notifications_enabled = false
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = false
error_alarm_notifications_enabled = false
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "ref"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "preprod"
immunisation_account_id = "084828561157"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "preprod"
immunisation_account_id = "084828561157"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ environment = "prod"
immunisation_account_id = "664418956997"
dspp_core_account_id = "232116723729"
pds_environment = "prod"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
dspp_kms_key_alias = "nhsd-dspp-core-prod-extended-attributes-gdp-key"
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ environment = "prod"
immunisation_account_id = "664418956997"
dspp_core_account_id = "232116723729"
pds_environment = "prod"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
dspp_kms_key_alias = "nhsd-dspp-core-prod-extended-attributes-gdp-key"
6 changes: 3 additions & 3 deletions infrastructure/instance/file_name_processor.tf
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ resource "aws_cloudwatch_log_group" "file_name_processor_log_group" {
}

resource "aws_cloudwatch_log_metric_filter" "file_name_processor_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-FilenameProcessorErrorLogsFilter"
pattern = "%\\[ERROR\\]%"
Expand All @@ -390,7 +390,7 @@ resource "aws_cloudwatch_log_metric_filter" "file_name_processor_error_logs" {
}

resource "aws_cloudwatch_metric_alarm" "file_name_processor_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-file-name-processor-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -403,4 +403,4 @@ resource "aws_cloudwatch_metric_alarm" "file_name_processor_error_alarm" {
alarm_description = "This sets off an alarm for any error logs found in the file name processor Lambda function"
alarm_actions = [data.aws_sns_topic.batch_processor_errors.arn]
treat_missing_data = "notBreaching"
}
}
32 changes: 31 additions & 1 deletion infrastructure/instance/modules/lambda/lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,37 @@ resource "aws_cloudwatch_log_metric_filter" "max_memory_used_metric" {

metric_transformation {
name = "max-memory-used"
namespace = "${var.short_prefix}_${var.function_name}"
namespace = var.short_prefix
value = "$18"
}
}

resource "aws_cloudwatch_log_metric_filter" "fhir_api_error_logs" {
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${var.short_prefix}_${var.function_name}-ErrorLogsFilter"
pattern = "{ $.operation_outcome.status = \"500\" || $.operation_outcome.status = \"403\" }"
log_group_name = module.lambda_function_container_image.lambda_cloudwatch_log_group_name

metric_transformation {
name = "${var.short_prefix}_${var.function_name}-ApiErrorLogs"
namespace = "${var.short_prefix}_${var.function_name}-Lambda"
value = "1"
}
}

resource "aws_cloudwatch_metric_alarm" "fhir_api_error_alarm" {
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${var.short_prefix}_${var.function_name}-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 1
metric_name = "${var.short_prefix}_${var.function_name}-ErrorLogs"
namespace = "${var.short_prefix}_${var.function_name}-Lambda"
period = 120
statistic = "Sum"
threshold = 1
alarm_description = "This sets off an alarm for any error logs found in fhir api Lambda function"
alarm_actions = var.aws_sns_topic != null ? [var.aws_sns_topic] : []
treat_missing_data = "notBreaching"
}
10 changes: 10 additions & 0 deletions infrastructure/instance/modules/lambda/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ variable "function_name" {
type = string
}

variable "aws_sns_topic" {
description = "SNS topic ARN for CloudWatch alarm notifications"
type = string
default = null
}
variable "error_alarm_notifications_enabled" {
description = "useful switching error alerting between environment"
type = string
}

variable "image_uri" {
type = string
}
Expand Down
4 changes: 2 additions & 2 deletions infrastructure/instance/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ variable "pds_environment" {
}

# Remember to switch off in PR envs after testing
variable "batch_error_notifications_enabled" {
variable "error_alarm_notifications_enabled" {
default = true
description = "Switch to enable batch processing error notifications to Slack"
description = "Switch to enable error alarm notifications to Slack"
type = bool
}

Expand Down