Skip to content

Commit dc2d4db

Browse files
[PRM-134-v2] experiment with more bands for alarms
1 parent f5ad2fa commit dc2d4db

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

infrastructure/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@
221221
| [aws_cloudwatch_metric_alarm.api_gateway_alarm_5XX](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
222222
| [aws_cloudwatch_metric_alarm.edge_presign_lambda_error](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
223223
| [aws_cloudwatch_metric_alarm.error_alarm_count_high](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
224+
| [aws_cloudwatch_metric_alarm.error_alarm_count_low](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
224225
| [aws_cloudwatch_metric_alarm.error_alarm_count_medium](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
225226
| [aws_cloudwatch_metric_alarm.msn_dlq_new_message](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |
226227
| [aws_cloudwatch_metric_alarm.nrl_dlq_new_messages](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm) | resource |

infrastructure/lambda-search-patient.tf

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ module "search-patient-details-gateway" {
3232
# }
3333

3434

35-
resource "aws_cloudwatch_metric_alarm" "error_alarm_count_medium" {
36-
alarm_name = "search_patient_error_count_medium"
35+
resource "aws_cloudwatch_metric_alarm" "error_alarm_count_low" {
36+
alarm_name = "search_patient_error_count_low"
3737
alarm_description = "Triggers when search patient lambda error count is between 1 and 3 within 2mins"
3838
comparison_operator = "GreaterThanThreshold"
3939
threshold = 0
@@ -66,14 +66,48 @@ resource "aws_cloudwatch_metric_alarm" "error_alarm_count_medium" {
6666
}
6767
}
6868

69+
70+
resource "aws_cloudwatch_metric_alarm" "error_alarm_count_medium" {
71+
alarm_name = "search_patient_error_count_medium"
72+
alarm_description = "Triggers when search patient lambda error count is between 1 and 3 within 2mins"
73+
comparison_operator = "GreaterThanThreshold"
74+
threshold = 4
75+
evaluation_periods = 1
76+
alarm_actions = [module.search_patient_alarm_topic.arn]
77+
ok_actions = [module.search_patient_alarm_topic.arn]
78+
tags = {
79+
alerting_type = "KPI"
80+
alarm_group = module.search-patient-details-lambda.function_name
81+
}
82+
metric_query {
83+
id = "error"
84+
label = "error count for search patient, high if about 4, low if between 1 and 3"
85+
return_data = true
86+
expression = "IF(m1 >= 4 AND m1 <= 6, 1, 0)"
87+
}
88+
89+
metric_query {
90+
id = "m1"
91+
92+
metric {
93+
metric_name = "Errors"
94+
namespace = "AWS/Lambda"
95+
period = 120
96+
stat = "Sum"
97+
dimensions = {
98+
FunctionName = module.search-patient-details-lambda.function_name
99+
}
100+
}
101+
}
102+
}
69103
resource "aws_cloudwatch_metric_alarm" "error_alarm_count_high" {
70104
alarm_name = "search_patient_error_count_high"
71105
alarm_description = "Triggers when search patient lambda error count is above 3 within 2mins"
72106
comparison_operator = "GreaterThanThreshold"
73107
evaluation_periods = 1
74108
alarm_actions = [module.search_patient_alarm_topic.arn]
75109
ok_actions = [module.search_patient_alarm_topic.arn]
76-
threshold = 3
110+
threshold = 7
77111
period = 120
78112
dimensions = {
79113
FunctionName = module.search-patient-details-lambda.function_name

0 commit comments

Comments
 (0)