Skip to content

Commit 39a4a49

Browse files
authored
Merge pull request #1712 from guardian/aa/unhealthyInstancesAlarm
feat: Enable unhealthy instance alarm for registration and report
2 parents 0e9f053 + 4502f60 commit 39a4a49

File tree

4 files changed

+494
-7
lines changed

4 files changed

+494
-7
lines changed

cdk/lib/__snapshots__/registration.test.ts.snap

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ exports[`The Registration stack matches the snapshot for CODE 1`] = `
2626
"GuAccessLoggingBucketParameter",
2727
"GuApplicationTargetGroup",
2828
"GuHttpsApplicationListener",
29+
"GuUnhealthyInstancesAlarm",
2930
"GuParameter",
3031
"GuCname",
3132
],
@@ -973,6 +974,125 @@ exports[`The Registration stack matches the snapshot for CODE 1`] = `
973974
},
974975
"Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
975976
},
977+
"UnhealthyInstancesAlarmRegistration921377C3": {
978+
"Properties": {
979+
"ActionsEnabled": true,
980+
"AlarmActions": [
981+
{
982+
"Fn::Join": [
983+
"",
984+
[
985+
"arn:aws:sns:eu-west-1:",
986+
{
987+
"Ref": "AWS::AccountId",
988+
},
989+
":mobile-server-side",
990+
],
991+
],
992+
},
993+
],
994+
"AlarmDescription": "registration's instances have failed healthchecks several times over the last 1 hour.
995+
This typically results in the AutoScaling Group cycling instances and can lead to problems with deployment,
996+
scaling or handling traffic spikes.
997+
998+
Check registration's application logs or ssh onto an unhealthy instance in order to debug these problems.",
999+
"AlarmName": "Unhealthy instances for registration in CODE",
1000+
"ComparisonOperator": "GreaterThanOrEqualToThreshold",
1001+
"DatapointsToAlarm": 30,
1002+
"Dimensions": [
1003+
{
1004+
"Name": "LoadBalancer",
1005+
"Value": {
1006+
"Fn::Join": [
1007+
"",
1008+
[
1009+
{
1010+
"Fn::Select": [
1011+
1,
1012+
{
1013+
"Fn::Split": [
1014+
"/",
1015+
{
1016+
"Ref": "ListenerRegistration1652F707",
1017+
},
1018+
],
1019+
},
1020+
],
1021+
},
1022+
"/",
1023+
{
1024+
"Fn::Select": [
1025+
2,
1026+
{
1027+
"Fn::Split": [
1028+
"/",
1029+
{
1030+
"Ref": "ListenerRegistration1652F707",
1031+
},
1032+
],
1033+
},
1034+
],
1035+
},
1036+
"/",
1037+
{
1038+
"Fn::Select": [
1039+
3,
1040+
{
1041+
"Fn::Split": [
1042+
"/",
1043+
{
1044+
"Ref": "ListenerRegistration1652F707",
1045+
},
1046+
],
1047+
},
1048+
],
1049+
},
1050+
],
1051+
],
1052+
},
1053+
},
1054+
{
1055+
"Name": "TargetGroup",
1056+
"Value": {
1057+
"Fn::GetAtt": [
1058+
"TargetGroupRegistrationF8D1F3E9",
1059+
"TargetGroupFullName",
1060+
],
1061+
},
1062+
},
1063+
],
1064+
"EvaluationPeriods": 60,
1065+
"MetricName": "UnHealthyHostCount",
1066+
"Namespace": "AWS/ApplicationELB",
1067+
"Period": 60,
1068+
"Statistic": "Maximum",
1069+
"Tags": [
1070+
{
1071+
"Key": "App",
1072+
"Value": "registration",
1073+
},
1074+
{
1075+
"Key": "gu:cdk:version",
1076+
"Value": "TEST",
1077+
},
1078+
{
1079+
"Key": "gu:repo",
1080+
"Value": "guardian/mobile-n10n",
1081+
},
1082+
{
1083+
"Key": "Stack",
1084+
"Value": "mobile-notifications",
1085+
},
1086+
{
1087+
"Key": "Stage",
1088+
"Value": "CODE",
1089+
},
1090+
],
1091+
"Threshold": 1,
1092+
"TreatMissingData": "notBreaching",
1093+
},
1094+
"Type": "AWS::CloudWatch::Alarm",
1095+
},
9761096
"mobilenotificationsCODEregistration504D3B6F": {
9771097
"DependsOn": [
9781098
"InstanceRoleRegistrationF217DF71",
@@ -1161,6 +1281,7 @@ exports[`The Registration stack matches the snapshot for PROD 1`] = `
11611281
"GuAccessLoggingBucketParameter",
11621282
"GuApplicationTargetGroup",
11631283
"GuHttpsApplicationListener",
1284+
"GuUnhealthyInstancesAlarm",
11641285
"GuParameter",
11651286
"GuCname",
11661287
],
@@ -2108,6 +2229,125 @@ exports[`The Registration stack matches the snapshot for PROD 1`] = `
21082229
},
21092230
"Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
21102231
},
2232+
"UnhealthyInstancesAlarmRegistration921377C3": {
2233+
"Properties": {
2234+
"ActionsEnabled": true,
2235+
"AlarmActions": [
2236+
{
2237+
"Fn::Join": [
2238+
"",
2239+
[
2240+
"arn:aws:sns:eu-west-1:",
2241+
{
2242+
"Ref": "AWS::AccountId",
2243+
},
2244+
":mobile-server-side",
2245+
],
2246+
],
2247+
},
2248+
],
2249+
"AlarmDescription": "registration's instances have failed healthchecks several times over the last 1 hour.
2250+
This typically results in the AutoScaling Group cycling instances and can lead to problems with deployment,
2251+
scaling or handling traffic spikes.
2252+
2253+
Check registration's application logs or ssh onto an unhealthy instance in order to debug these problems.",
2254+
"AlarmName": "Unhealthy instances for registration in PROD",
2255+
"ComparisonOperator": "GreaterThanOrEqualToThreshold",
2256+
"DatapointsToAlarm": 30,
2257+
"Dimensions": [
2258+
{
2259+
"Name": "LoadBalancer",
2260+
"Value": {
2261+
"Fn::Join": [
2262+
"",
2263+
[
2264+
{
2265+
"Fn::Select": [
2266+
1,
2267+
{
2268+
"Fn::Split": [
2269+
"/",
2270+
{
2271+
"Ref": "ListenerRegistration1652F707",
2272+
},
2273+
],
2274+
},
2275+
],
2276+
},
2277+
"/",
2278+
{
2279+
"Fn::Select": [
2280+
2,
2281+
{
2282+
"Fn::Split": [
2283+
"/",
2284+
{
2285+
"Ref": "ListenerRegistration1652F707",
2286+
},
2287+
],
2288+
},
2289+
],
2290+
},
2291+
"/",
2292+
{
2293+
"Fn::Select": [
2294+
3,
2295+
{
2296+
"Fn::Split": [
2297+
"/",
2298+
{
2299+
"Ref": "ListenerRegistration1652F707",
2300+
},
2301+
],
2302+
},
2303+
],
2304+
},
2305+
],
2306+
],
2307+
},
2308+
},
2309+
{
2310+
"Name": "TargetGroup",
2311+
"Value": {
2312+
"Fn::GetAtt": [
2313+
"TargetGroupRegistrationF8D1F3E9",
2314+
"TargetGroupFullName",
2315+
],
2316+
},
2317+
},
2318+
],
2319+
"EvaluationPeriods": 60,
2320+
"MetricName": "UnHealthyHostCount",
2321+
"Namespace": "AWS/ApplicationELB",
2322+
"Period": 60,
2323+
"Statistic": "Maximum",
2324+
"Tags": [
2325+
{
2326+
"Key": "App",
2327+
"Value": "registration",
2328+
},
2329+
{
2330+
"Key": "gu:cdk:version",
2331+
"Value": "TEST",
2332+
},
2333+
{
2334+
"Key": "gu:repo",
2335+
"Value": "guardian/mobile-n10n",
2336+
},
2337+
{
2338+
"Key": "Stack",
2339+
"Value": "mobile-notifications",
2340+
},
2341+
{
2342+
"Key": "Stage",
2343+
"Value": "PROD",
2344+
},
2345+
],
2346+
"Threshold": 1,
2347+
"TreatMissingData": "notBreaching",
2348+
},
2349+
"Type": "AWS::CloudWatch::Alarm",
2350+
},
21112351
"mobilenotificationsPRODregistration32DCD2DE": {
21122352
"DependsOn": [
21132353
"InstanceRoleRegistrationF217DF71",

0 commit comments

Comments
 (0)