Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion infrastructure/account/batch_processor_errors_sns_topic.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
resource "aws_sns_topic" "batch_processor_errors" {
name = "${var.environment}-batch-processor-errors"
kms_master_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.arn
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
}

resource "aws_sns_topic_policy" "batch_processor_errors_topic_policy" {
Expand Down
24 changes: 24 additions & 0 deletions infrastructure/account/fhir_api_errors_slack_chatbot.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
resource "aws_chatbot_slack_channel_configuration" "fhir_api_errors" {
configuration_name = "${var.environment}-fhir-api-errors-slack-channel-config"
iam_role_arn = aws_iam_role.fhir_api_errors_chatbot.arn
slack_channel_id = var.environment == "prod" ? "C0A3LPKNKEE" : "C0A4F3G8J0G"
slack_team_id = "TJ00QR03U"
sns_topic_arns = [aws_sns_topic.fhir_api_errors.arn]
}

resource "aws_iam_role" "fhir_api_errors_chatbot" {
name = "${var.environment}-fhir-api-errors-chatbot-channel-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Sid = "AssumeChatbotRole"
Principal = {
Service = "chatbot.amazonaws.com"
}
},
]
})
}
22 changes: 22 additions & 0 deletions infrastructure/account/fhir_api_errors_sns_topic.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
resource "aws_sns_topic" "fhir_api_errors" {
name = "${var.environment}-fhir-api-errors"
kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn
}

resource "aws_sns_topic_policy" "fhir_api_errors_topic_policy" {
arn = aws_sns_topic.fhir_api_errors.arn
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Sid = "AllowCloudWatchToPublish",
Effect = "Allow",
Principal = {
Service = "cloudwatch.amazonaws.com"
},
Action = "SNS:Publish",
Resource = aws_sns_topic.fhir_api_errors.arn
}
]
})
}
9 changes: 7 additions & 2 deletions infrastructure/account/kms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ resource "aws_kms_alias" "id_sync_sqs_encryption" {
target_key_id = aws_kms_key.id_sync_sqs_encryption.key_id
}

resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {
resource "aws_kms_key" "error_alerts_sns_encryption_key" {
description = "KMS key for encrypting the batch processor errors SNS Topic messages"
deletion_window_in_days = 7
enable_key_rotation = true
Expand Down Expand Up @@ -218,5 +218,10 @@ resource "aws_kms_key" "batch_processor_errors_sns_encryption_key" {

resource "aws_kms_alias" "batch_processor_errors_sns_encryption_key" {
name = "alias/${var.environment}-batch-processor-errors-imms-sns-encryption"
target_key_id = aws_kms_key.batch_processor_errors_sns_encryption_key.key_id
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
}

resource "aws_kms_alias" "fhir_api_errors_sns_encryption_key" {
name = "alias/${var.environment}-fhir-api-errors-imms-sns-encryption"
target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id
}
4 changes: 2 additions & 2 deletions infrastructure/instance/batch_processor_filter_lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ resource "aws_lambda_event_source_mapping" "batch_file_created_sqs_to_lambda" {
}

resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-BatchProcessorFilterErrorLogsFilter"
# Ignore errors with the below exception type. This is an expected error which returns items to the queue
Expand All @@ -320,7 +320,7 @@ resource "aws_cloudwatch_log_metric_filter" "batch_processor_filter_error_logs"
}

resource "aws_cloudwatch_metric_alarm" "batch_processor_filter_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-batch-processor-filter-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down
4 changes: 2 additions & 2 deletions infrastructure/instance/ecs_batch_processor_config.tf
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ resource "aws_cloudwatch_log_group" "pipe_log_group" {
}

resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-RecordProcessorTaskErrorLogsFilter"
pattern = "%ERROR:%"
Expand All @@ -386,7 +386,7 @@ resource "aws_cloudwatch_log_metric_filter" "record_processor_task_error_logs" {
}

resource "aws_cloudwatch_metric_alarm" "record_processor_task_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-record-processor-task-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down
32 changes: 18 additions & 14 deletions infrastructure/instance/endpoints.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ data "aws_iam_policy_document" "logs_policy_document" {
source_policy_documents = [templatefile("${local.policy_path}/log.json", {})]
}
module "get_status" {
source = "./modules/lambda"
prefix = local.prefix
short_prefix = local.short_prefix
function_name = "get_status"
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.logs_policy_document.json
source = "./modules/lambda"
prefix = local.prefix
short_prefix = local.short_prefix
function_name = "get_status"
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.logs_policy_document.json
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
environment = var.environment
}

locals {
Expand Down Expand Up @@ -75,14 +77,16 @@ module "imms_event_endpoint_lambdas" {
source = "./modules/lambda"
count = length(local.imms_endpoints)

prefix = local.prefix
short_prefix = local.short_prefix
function_name = local.imms_endpoints[count.index]
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.imms_policy_document.json
environment_variables = local.imms_lambda_env_vars
vpc_subnet_ids = local.private_subnet_ids
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
prefix = local.prefix
short_prefix = local.short_prefix
function_name = local.imms_endpoints[count.index]
image_uri = module.docker_image.image_uri
policy_json = data.aws_iam_policy_document.imms_policy_document.json
environment_variables = local.imms_lambda_env_vars
vpc_subnet_ids = local.private_subnet_ids
vpc_security_group_ids = [data.aws_security_group.existing_securitygroup.id]
error_alarm_notifications_enabled = var.error_alarm_notifications_enabled
environment = var.environment
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = false
error_alarm_notifications_enabled = false
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = false
error_alarm_notifications_enabled = false
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "dev"
immunisation_account_id = "345594581768"
dspp_core_account_id = "603871901111"
pds_environment = "ref"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = false
has_sub_environment_scope = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "preprod"
immunisation_account_id = "084828561157"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ environment = "preprod"
immunisation_account_id = "084828561157"
dspp_core_account_id = "603871901111"
pds_environment = "int"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ environment = "prod"
immunisation_account_id = "664418956997"
dspp_core_account_id = "232116723729"
pds_environment = "prod"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
dspp_kms_key_alias = "nhsd-dspp-core-prod-extended-attributes-gdp-key"
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ environment = "prod"
immunisation_account_id = "664418956997"
dspp_core_account_id = "232116723729"
pds_environment = "prod"
batch_error_notifications_enabled = true
error_alarm_notifications_enabled = true
create_mesh_processor = true
has_sub_environment_scope = false
dspp_kms_key_alias = "nhsd-dspp-core-prod-extended-attributes-gdp-key"
4 changes: 2 additions & 2 deletions infrastructure/instance/file_name_processor.tf
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ resource "aws_cloudwatch_log_group" "file_name_processor_log_group" {
}

resource "aws_cloudwatch_log_metric_filter" "file_name_processor_error_logs" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${local.short_prefix}-FilenameProcessorErrorLogsFilter"
pattern = "%\\[ERROR\\]%"
Expand All @@ -390,7 +390,7 @@ resource "aws_cloudwatch_log_metric_filter" "file_name_processor_error_logs" {
}

resource "aws_cloudwatch_metric_alarm" "file_name_processor_error_alarm" {
count = var.batch_error_notifications_enabled ? 1 : 0
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${local.short_prefix}-file-name-processor-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down
34 changes: 34 additions & 0 deletions infrastructure/instance/modules/lambda/lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,37 @@ resource "aws_cloudwatch_log_metric_filter" "max_memory_used_metric" {
value = "$18"
}
}

resource "aws_cloudwatch_log_metric_filter" "fhir_api_error_logs" {
count = var.error_alarm_notifications_enabled ? 1 : 0

name = "${var.short_prefix}_${var.function_name}-ErrorLogsFilter"
pattern = "{ $.operation_outcome.status = \"500\" || $.operation_outcome.status = \"403\" }"
log_group_name = module.lambda_function_container_image.lambda_cloudwatch_log_group_name

metric_transformation {
name = "${var.short_prefix}_${var.function_name}-ApiErrorLogs"
namespace = "${var.short_prefix}_${var.function_name}-Lambda"
value = "1"
}
}

data "aws_sns_topic" "fhir_api_errors" {
name = "${var.environment}-fhir-api-errors"
}

resource "aws_cloudwatch_metric_alarm" "fhir_api_error_alarm" {
count = var.error_alarm_notifications_enabled ? 1 : 0

alarm_name = "${var.short_prefix}_${var.function_name}-lambda-error"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 1
metric_name = "${var.short_prefix}_${var.function_name}-ApiErrorLogs"
namespace = "${var.short_prefix}_${var.function_name}-Lambda"
period = 120
statistic = "Sum"
threshold = 1
alarm_description = "This sets off an alarm for any error logs found in fhir api Lambda function"
alarm_actions = [data.aws_sns_topic.fhir_api_errors.arn]
treat_missing_data = "notBreaching"
}
10 changes: 10 additions & 0 deletions infrastructure/instance/modules/lambda/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ variable "function_name" {
type = string
}

variable "error_alarm_notifications_enabled" {
description = "Switch to enable error alarm notifications to Slack"
type = string
}

variable "image_uri" {
type = string
}
Expand All @@ -32,3 +37,8 @@ variable "vpc_subnet_ids" {
type = list(string)
default = null
}

variable "environment" {
description = "The deployment environment (e.g., dev, int, internal-qa, prod)"
type = string
}
4 changes: 2 additions & 2 deletions infrastructure/instance/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ variable "pds_environment" {
}

# Remember to switch off in PR envs after testing
variable "batch_error_notifications_enabled" {
variable "error_alarm_notifications_enabled" {
default = true
description = "Switch to enable batch processing error notifications to Slack"
description = "Switch to enable error alarm notifications to Slack"
type = bool
}

Expand Down
Loading