Skip to content

Commit 40131ce

Browse files
miloszwatrobaMilosz Watroba
authored andcommitted
feat(loadbalancing): add fail-open monitoring (#537)
Fixes #532 This pull request adds fail-open monitoring to our Load Balancing monitors. To give context about fail open ([ref](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/target-group-health-checks.html)): > If a target group contains only unhealthy registered targets, the load balancer routes requests to all those targets, regardless of their health status. This means that if all targets fail health checks at the same time in all enabled Availability Zones, the load balancer fails open. The effect of the fail-open is to allow traffic to all targets in all enabled Availability Zones, regardless of their health status, based on the load balancing algorithm Adding this metric will give better visibility into whether the Load Balancer's fail open routing was used during incidents. The metrics added as part of this pull request are in line with the AWS documentation: * `UnhealthyRoutingRequestCount` for ApplicationLoadBalancer with `LoadBalancer` and `TargetGroup` dimensions ([ref](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html)) * `UnhealthyRoutingFlowCount` for NetworkLoadBalancer with `LoadBalancer` dimension ([ref](https://docs.aws.amazon.com/elasticloadbalancing/latest/network/load-balancer-cloudwatch-metrics.html)) These metrics are reported conditionally, only when they have nonzero values. Thus, I added a `FILL(metric, 0)` metric math to correctly represent the values on the dashboards. Tested with NetworkLoadBalancer, as that's the setup I have on my account - the ApplicationLoadBalancer values are based only on the documentation. --- _By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_ Co-authored-by: Milosz Watroba <[email protected]>
1 parent 318c312 commit 40131ce

File tree

11 files changed

+1083
-48
lines changed

11 files changed

+1083
-48
lines changed

API.md

Lines changed: 43 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/monitoring/aws-ecs-patterns/Ec2ServiceMonitoring.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ export class Ec2ServiceMonitoring extends Monitoring {
181181
readonly ephemeralStorageUsageMetric: MetricWithAlarmSupport;
182182
readonly activeTcpFlowCountMetric?: MetricWithAlarmSupport;
183183
readonly newTcpFlowCountMetric?: MetricWithAlarmSupport;
184+
readonly unhealthyRoutingFlowCountMetric?: MetricWithAlarmSupport;
184185
readonly processedBytesMetric?: MetricWithAlarmSupport;
185186

186187
private hasLoadBalancer: boolean;
@@ -219,6 +220,8 @@ export class Ec2ServiceMonitoring extends Monitoring {
219220
this.loadBalancerMetricFactory.metricActiveConnectionCount();
220221
this.newTcpFlowCountMetric =
221222
this.loadBalancerMetricFactory.metricNewConnectionCount();
223+
this.unhealthyRoutingFlowCountMetric =
224+
this.loadBalancerMetricFactory.metricUnhealthyRoutingCount();
222225
this.processedBytesMetric =
223226
this.loadBalancerMetricFactory.metricProcessedBytesMin();
224227
}
@@ -451,6 +454,10 @@ export class Ec2ServiceMonitoring extends Monitoring {
451454
left.push(this.newTcpFlowCountMetric);
452455
}
453456

457+
if (this.unhealthyRoutingFlowCountMetric) {
458+
left.push(this.unhealthyRoutingFlowCountMetric);
459+
}
460+
454461
if (this.processedBytesMetric) {
455462
right.push(this.processedBytesMetric);
456463
}

lib/monitoring/aws-ecs-patterns/FargateServiceMonitoring.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ export class FargateServiceMonitoring extends Monitoring {
181181
readonly ephemeralStorageUsageMetric: MetricWithAlarmSupport;
182182
readonly activeTcpFlowCountMetric?: MetricWithAlarmSupport;
183183
readonly newTcpFlowCountMetric?: MetricWithAlarmSupport;
184+
readonly unhealthyRoutingFlowCountMetric?: MetricWithAlarmSupport;
184185
readonly processedBytesMetric?: MetricWithAlarmSupport;
185186

186187
private hasLoadBalancer: boolean;
@@ -222,6 +223,8 @@ export class FargateServiceMonitoring extends Monitoring {
222223
this.loadBalancerMetricFactory.metricActiveConnectionCount();
223224
this.newTcpFlowCountMetric =
224225
this.loadBalancerMetricFactory.metricNewConnectionCount();
226+
this.unhealthyRoutingFlowCountMetric =
227+
this.loadBalancerMetricFactory.metricUnhealthyRoutingCount();
225228
this.processedBytesMetric =
226229
this.loadBalancerMetricFactory.metricProcessedBytesMin();
227230
}
@@ -455,6 +458,10 @@ export class FargateServiceMonitoring extends Monitoring {
455458
left.push(this.newTcpFlowCountMetric);
456459
}
457460

461+
if (this.unhealthyRoutingFlowCountMetric) {
462+
left.push(this.unhealthyRoutingFlowCountMetric);
463+
}
464+
458465
if (this.processedBytesMetric) {
459466
right.push(this.processedBytesMetric);
460467
}

lib/monitoring/aws-loadbalancing/ApplicationLoadBalancerMetricFactory.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,4 +107,21 @@ export class ApplicationLoadBalancerMetricFactory
107107
})
108108
);
109109
}
110+
111+
metricUnhealthyRoutingCount() {
112+
const unhealthyRoutingRequestCount = this.metricFactory.adaptMetric(
113+
this.applicationTargetGroup.metrics.custom(
114+
"UnhealthyRoutingRequestCount",
115+
{
116+
statistic: MetricStatistic.SUM,
117+
}
118+
)
119+
);
120+
121+
return this.metricFactory.createMetricMath(
122+
"FILL(unhealthyRoutingRequestCount, 0)",
123+
{ unhealthyRoutingRequestCount },
124+
"Unhealthy routing (fail open)"
125+
);
126+
}
110127
}

lib/monitoring/aws-loadbalancing/LoadBalancerMetricFactory.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,5 +104,7 @@ export interface ILoadBalancerMetricFactory {
104104

105105
metricNewConnectionCount(): MetricWithAlarmSupport;
106106

107+
metricUnhealthyRoutingCount(): MetricWithAlarmSupport;
108+
107109
metricProcessedBytesMin(): MetricWithAlarmSupport;
108110
}

lib/monitoring/aws-loadbalancing/NetworkLoadBalancerMetricFactory.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,4 +107,18 @@ export class NetworkLoadBalancerMetricFactory
107107
})
108108
);
109109
}
110+
111+
metricUnhealthyRoutingCount() {
112+
const unhealthyRoutingFlowCount = this.metricFactory.adaptMetric(
113+
this.networkLoadBalancer.metrics.custom("UnhealthyRoutingFlowCount", {
114+
statistic: MetricStatistic.SUM,
115+
})
116+
);
117+
118+
return this.metricFactory.createMetricMath(
119+
"FILL(unhealthyRoutingFlowCount, 0)",
120+
{ unhealthyRoutingFlowCount },
121+
"Unhealthy routing (fail open)"
122+
);
123+
}
110124
}

lib/monitoring/aws-loadbalancing/NetworkLoadBalancerMonitoring.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ export class NetworkLoadBalancerMonitoring extends Monitoring {
6262
protected readonly healthyTaskPercentMetric: MetricWithAlarmSupport;
6363
protected readonly activeTcpFlowCountMetric: MetricWithAlarmSupport;
6464
protected readonly newTcpFlowCountMetric: MetricWithAlarmSupport;
65+
protected readonly unhealthyRoutingFlowCountMetric: MetricWithAlarmSupport;
6566
protected readonly processedBytesMetric: MetricWithAlarmSupport;
6667

6768
constructor(
@@ -89,6 +90,8 @@ export class NetworkLoadBalancerMonitoring extends Monitoring {
8990
this.activeTcpFlowCountMetric =
9091
this.metricFactory.metricActiveConnectionCount();
9192
this.newTcpFlowCountMetric = this.metricFactory.metricNewConnectionCount();
93+
this.unhealthyRoutingFlowCountMetric =
94+
this.metricFactory.metricUnhealthyRoutingCount();
9295
this.processedBytesMetric = this.metricFactory.metricProcessedBytesMin();
9396

9497
const alarmFactory = this.createAlarmFactory(
@@ -184,7 +187,11 @@ export class NetworkLoadBalancerMonitoring extends Monitoring {
184187
width,
185188
height,
186189
title: "TCP Flows",
187-
left: [this.activeTcpFlowCountMetric, this.newTcpFlowCountMetric],
190+
left: [
191+
this.activeTcpFlowCountMetric,
192+
this.newTcpFlowCountMetric,
193+
this.unhealthyRoutingFlowCountMetric,
194+
],
188195
leftYAxis: CountAxisFromZero,
189196
right: [this.processedBytesMetric],
190197
rightYAxis: SizeAxisBytesFromZero,

0 commit comments

Comments
 (0)