Skip to content

Commit e443b40

Browse files
author
Eugene Cheung
authored
feat(ecs): add ability to alarm on ephermal storage usage (#525)
Closes #399 --- _By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_
1 parent fd545df commit e443b40

File tree

9 files changed

+2191
-200
lines changed

9 files changed

+2191
-200
lines changed

API.md

Lines changed: 291 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/monitoring/aws-ecs-patterns/BaseServiceMetricFactory.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,37 @@ export class BaseServiceMetricFactory {
6868
EcsContainerInsightsNamespace
6969
);
7070
}
71+
72+
metricEphemeralStorageReserved() {
73+
return this.metricFactory.createMetric(
74+
"EphemeralStorageReserved",
75+
MetricStatistic.MAX,
76+
"Ephemeral Storage Reserved",
77+
this.dimensionsMap,
78+
undefined,
79+
EcsContainerInsightsNamespace
80+
);
81+
}
82+
83+
metricEphemeralStorageUtilized() {
84+
return this.metricFactory.createMetric(
85+
"EphemeralStorageUtilized",
86+
MetricStatistic.MAX,
87+
"Ephemeral Storage Utilized",
88+
this.dimensionsMap,
89+
undefined,
90+
EcsContainerInsightsNamespace
91+
);
92+
}
93+
94+
metricEphemeralStorageUsageInPercent() {
95+
const total = this.metricEphemeralStorageReserved();
96+
const used = this.metricEphemeralStorageUtilized();
97+
98+
return this.metricFactory.createMetricMath(
99+
"100 * (used/total)",
100+
{ used, total },
101+
"Ephemeral Storage Usage"
102+
);
103+
}
71104
}

lib/monitoring/aws-ecs-patterns/Ec2ServiceMonitoring.ts

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,24 @@ import {
5454

5555
export interface BaseEc2ServiceAlarms {
5656
/**
57-
* minimum number of tasks, as specified in your auto scaling config
57+
* Minimum number of tasks, as specified in your auto scaling config.
5858
*/
5959
readonly minAutoScalingTaskCount?: number;
6060
/**
61-
* maximum number of tasks, as specified in your auto scaling config
61+
* Maximum number of tasks, as specified in your auto scaling config.
6262
*/
6363
readonly maxAutoScalingTaskCount?: number;
64+
readonly addCpuUsageAlarm?: Record<string, UsageThreshold>;
65+
readonly addMemoryUsageAlarm?: Record<string, UsageThreshold>;
66+
6467
/**
65-
* Container Insights needs to be enabled for the cluster for this alarm
68+
* Container Insights needs to be enabled for the cluster for this alarm.
6669
*/
6770
readonly addRunningTaskCountAlarm?: Record<string, RunningTaskCountThreshold>;
68-
readonly addCpuUsageAlarm?: Record<string, UsageThreshold>;
69-
readonly addMemoryUsageAlarm?: Record<string, UsageThreshold>;
71+
/**
72+
* Container Insights needs to be enabled for the cluster for this alarm.
73+
*/
74+
readonly addEphermalStorageUsageAlarm?: Record<string, UsageThreshold>;
7075
}
7176

7277
/**
@@ -170,9 +175,10 @@ export class Ec2ServiceMonitoring extends Monitoring {
170175
readonly healthyTaskCountMetric?: MetricWithAlarmSupport;
171176
readonly unhealthyTaskCountMetric?: MetricWithAlarmSupport;
172177
readonly healthyTaskPercentMetric?: MetricWithAlarmSupport;
173-
readonly runningTaskCountMetric: MetricWithAlarmSupport;
174178
readonly cpuUtilisationMetric: MetricWithAlarmSupport;
175179
readonly memoryUtilisationMetric: MetricWithAlarmSupport;
180+
readonly runningTaskCountMetric: MetricWithAlarmSupport;
181+
readonly ephemeralStorageUsageMetric: MetricWithAlarmSupport;
176182
readonly activeTcpFlowCountMetric?: MetricWithAlarmSupport;
177183
readonly newTcpFlowCountMetric?: MetricWithAlarmSupport;
178184
readonly processedBytesMetric?: MetricWithAlarmSupport;
@@ -216,12 +222,14 @@ export class Ec2ServiceMonitoring extends Monitoring {
216222
this.processedBytesMetric =
217223
this.loadBalancerMetricFactory.metricProcessedBytesMin();
218224
}
219-
this.runningTaskCountMetric =
220-
this.baseServiceMetricFactory.metricRunningTaskCount();
221225
this.cpuUtilisationMetric =
222226
this.baseServiceMetricFactory.metricClusterCpuUtilisationInPercent();
223227
this.memoryUtilisationMetric =
224228
this.baseServiceMetricFactory.metricClusterMemoryUtilisationInPercent();
229+
this.runningTaskCountMetric =
230+
this.baseServiceMetricFactory.metricRunningTaskCount();
231+
this.ephemeralStorageUsageMetric =
232+
this.baseServiceMetricFactory.metricEphemeralStorageUsageInPercent();
225233

226234
const alarmFactory = this.createAlarmFactory(
227235
namingStrategy.resolveAlarmFriendlyName()
@@ -288,17 +296,6 @@ export class Ec2ServiceMonitoring extends Monitoring {
288296
this.addAlarm(createdAlarm);
289297
}
290298
}
291-
292-
for (const disambiguator in props.addRunningTaskCountAlarm) {
293-
const alarmProps = props.addRunningTaskCountAlarm[disambiguator];
294-
const createdAlarm = this.taskHealthAlarmFactory.addRunningTaskCountAlarm(
295-
this.runningTaskCountMetric,
296-
alarmProps,
297-
disambiguator
298-
);
299-
this.taskHealthAnnotations.push(createdAlarm.annotation);
300-
this.addAlarm(createdAlarm);
301-
}
302299
for (const disambiguator in props.addCpuUsageAlarm) {
303300
const alarmProps = props.addCpuUsageAlarm[disambiguator];
304301
const createdAlarm = this.usageAlarmFactory.addMaxCpuUsagePercentAlarm(
@@ -320,6 +317,26 @@ export class Ec2ServiceMonitoring extends Monitoring {
320317
this.addAlarm(createdAlarm);
321318
}
322319

320+
for (const disambiguator in props.addRunningTaskCountAlarm) {
321+
const alarmProps = props.addRunningTaskCountAlarm[disambiguator];
322+
const createdAlarm = this.taskHealthAlarmFactory.addRunningTaskCountAlarm(
323+
this.runningTaskCountMetric,
324+
alarmProps,
325+
disambiguator
326+
);
327+
this.taskHealthAnnotations.push(createdAlarm.annotation);
328+
this.addAlarm(createdAlarm);
329+
}
330+
for (const disambiguator in props.addEphermalStorageUsageAlarm) {
331+
const alarmProps = props.addEphermalStorageUsageAlarm[disambiguator];
332+
const createdAlarm = this.usageAlarmFactory.addMaxDiskUsagePercentAlarm(
333+
this.ephemeralStorageUsageMetric,
334+
alarmProps,
335+
disambiguator
336+
);
337+
this.addAlarm(createdAlarm);
338+
}
339+
323340
if (this.hasLoadBalancer) {
324341
for (const disambiguator in props.addMinProcessedBytesAlarm) {
325342
const alarmProps = props.addMinProcessedBytesAlarm[disambiguator];
@@ -362,7 +379,7 @@ export class Ec2ServiceMonitoring extends Monitoring {
362379

363380
if (this.hasLoadBalancer) {
364381
return baseWidget.concat([
365-
this.createTpcFlowsWidget(QuarterWidth, DefaultGraphWidgetHeight),
382+
this.createTcpFlowsWidget(QuarterWidth, DefaultGraphWidgetHeight),
366383
this.createTaskHealthWidget(QuarterWidth, DefaultGraphWidgetHeight),
367384
]);
368385
} else {
@@ -422,7 +439,7 @@ export class Ec2ServiceMonitoring extends Monitoring {
422439
});
423440
}
424441

425-
createTpcFlowsWidget(width: number, height: number) {
442+
createTcpFlowsWidget(width: number, height: number) {
426443
const left: IMetric[] = [];
427444
const right: IMetric[] = [];
428445

@@ -448,4 +465,11 @@ export class Ec2ServiceMonitoring extends Monitoring {
448465
rightYAxis: SizeAxisBytesFromZero,
449466
});
450467
}
468+
469+
/**
470+
* @deprecated use {@see createTcpFlowsWidget} instead.
471+
*/
472+
createTpcFlowsWidget(width: number, height: number) {
473+
return this.createTcpFlowsWidget(width, height);
474+
}
451475
}

lib/monitoring/aws-ecs-patterns/FargateServiceMonitoring.ts

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,24 @@ import {
5454

5555
export interface BaseFargateServiceAlarms {
5656
/**
57-
* minimum number of tasks, as specified in your auto scaling config
57+
* Minimum number of tasks, as specified in your auto scaling config.
5858
*/
5959
readonly minAutoScalingTaskCount?: number;
6060
/**
61-
* maximum number of tasks, as specified in your auto scaling config
61+
* Maximum number of tasks, as specified in your auto scaling config.
6262
*/
6363
readonly maxAutoScalingTaskCount?: number;
64+
readonly addCpuUsageAlarm?: Record<string, UsageThreshold>;
65+
readonly addMemoryUsageAlarm?: Record<string, UsageThreshold>;
66+
6467
/**
65-
* Container Insights needs to be enabled for the cluster for this alarm
68+
* Container Insights needs to be enabled for the cluster for this alarm.
6669
*/
6770
readonly addRunningTaskCountAlarm?: Record<string, RunningTaskCountThreshold>;
68-
readonly addCpuUsageAlarm?: Record<string, UsageThreshold>;
69-
readonly addMemoryUsageAlarm?: Record<string, UsageThreshold>;
71+
/**
72+
* Container Insights needs to be enabled for the cluster for this alarm.
73+
*/
74+
readonly addEphermalStorageUsageAlarm?: Record<string, UsageThreshold>;
7075
}
7176

7277
/**
@@ -170,9 +175,10 @@ export class FargateServiceMonitoring extends Monitoring {
170175
readonly healthyTaskCountMetric?: MetricWithAlarmSupport;
171176
readonly unhealthyTaskCountMetric?: MetricWithAlarmSupport;
172177
readonly healthyTaskPercentMetric?: MetricWithAlarmSupport;
173-
readonly runningTaskCountMetric: MetricWithAlarmSupport;
174178
readonly cpuUtilisationMetric: MetricWithAlarmSupport;
175179
readonly memoryUtilisationMetric: MetricWithAlarmSupport;
180+
readonly runningTaskCountMetric: MetricWithAlarmSupport;
181+
readonly ephemeralStorageUsageMetric: MetricWithAlarmSupport;
176182
readonly activeTcpFlowCountMetric?: MetricWithAlarmSupport;
177183
readonly newTcpFlowCountMetric?: MetricWithAlarmSupport;
178184
readonly processedBytesMetric?: MetricWithAlarmSupport;
@@ -219,12 +225,14 @@ export class FargateServiceMonitoring extends Monitoring {
219225
this.processedBytesMetric =
220226
this.loadBalancerMetricFactory.metricProcessedBytesMin();
221227
}
222-
this.runningTaskCountMetric =
223-
this.baseServiceMetricFactory.metricRunningTaskCount();
224228
this.cpuUtilisationMetric =
225229
this.baseServiceMetricFactory.metricClusterCpuUtilisationInPercent();
226230
this.memoryUtilisationMetric =
227231
this.baseServiceMetricFactory.metricClusterMemoryUtilisationInPercent();
232+
this.runningTaskCountMetric =
233+
this.baseServiceMetricFactory.metricRunningTaskCount();
234+
this.ephemeralStorageUsageMetric =
235+
this.baseServiceMetricFactory.metricEphemeralStorageUsageInPercent();
228236

229237
const alarmFactory = this.createAlarmFactory(
230238
namingStrategy.resolveAlarmFriendlyName()
@@ -292,16 +300,6 @@ export class FargateServiceMonitoring extends Monitoring {
292300
}
293301
}
294302

295-
for (const disambiguator in props.addRunningTaskCountAlarm) {
296-
const alarmProps = props.addRunningTaskCountAlarm[disambiguator];
297-
const createdAlarm = this.taskHealthAlarmFactory.addRunningTaskCountAlarm(
298-
this.runningTaskCountMetric,
299-
alarmProps,
300-
disambiguator
301-
);
302-
this.taskHealthAnnotations.push(createdAlarm.annotation);
303-
this.addAlarm(createdAlarm);
304-
}
305303
for (const disambiguator in props.addCpuUsageAlarm) {
306304
const alarmProps = props.addCpuUsageAlarm[disambiguator];
307305
const createdAlarm = this.usageAlarmFactory.addMaxCpuUsagePercentAlarm(
@@ -323,6 +321,26 @@ export class FargateServiceMonitoring extends Monitoring {
323321
this.addAlarm(createdAlarm);
324322
}
325323

324+
for (const disambiguator in props.addRunningTaskCountAlarm) {
325+
const alarmProps = props.addRunningTaskCountAlarm[disambiguator];
326+
const createdAlarm = this.taskHealthAlarmFactory.addRunningTaskCountAlarm(
327+
this.runningTaskCountMetric,
328+
alarmProps,
329+
disambiguator
330+
);
331+
this.taskHealthAnnotations.push(createdAlarm.annotation);
332+
this.addAlarm(createdAlarm);
333+
}
334+
for (const disambiguator in props.addEphermalStorageUsageAlarm) {
335+
const alarmProps = props.addEphermalStorageUsageAlarm[disambiguator];
336+
const createdAlarm = this.usageAlarmFactory.addMaxDiskUsagePercentAlarm(
337+
this.ephemeralStorageUsageMetric,
338+
alarmProps,
339+
disambiguator
340+
);
341+
this.addAlarm(createdAlarm);
342+
}
343+
326344
if (this.hasLoadBalancer) {
327345
for (const disambiguator in props.addMinProcessedBytesAlarm) {
328346
const alarmProps = props.addMinProcessedBytesAlarm[disambiguator];
@@ -365,7 +383,7 @@ export class FargateServiceMonitoring extends Monitoring {
365383

366384
if (this.hasLoadBalancer) {
367385
return baseWidget.concat([
368-
this.createTpcFlowsWidget(QuarterWidth, DefaultGraphWidgetHeight),
386+
this.createTcpFlowsWidget(QuarterWidth, DefaultGraphWidgetHeight),
369387
this.createTaskHealthWidget(QuarterWidth, DefaultGraphWidgetHeight),
370388
]);
371389
} else {
@@ -425,7 +443,7 @@ export class FargateServiceMonitoring extends Monitoring {
425443
});
426444
}
427445

428-
createTpcFlowsWidget(width: number, height: number) {
446+
createTcpFlowsWidget(width: number, height: number) {
429447
const left: IMetric[] = [];
430448
const right: IMetric[] = [];
431449

@@ -451,4 +469,11 @@ export class FargateServiceMonitoring extends Monitoring {
451469
rightYAxis: SizeAxisBytesFromZero,
452470
});
453471
}
472+
473+
/**
474+
* @deprecated use {@see createTcpFlowsWidget} instead.
475+
*/
476+
createTpcFlowsWidget(width: number, height: number) {
477+
return this.createTcpFlowsWidget(width, height);
478+
}
454479
}

test/monitoring/aws-ecs-patterns/Ec2ServiceMonitoring.test.ts

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,6 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
9999
minHealthyTaskPercent: 75,
100100
},
101101
},
102-
addRunningTaskCountAlarm: {
103-
Warning: {
104-
maxRunningTasks: 5,
105-
},
106-
},
107102
addCpuUsageAlarm: {
108103
Warning: {
109104
maxUsagePercent: 80,
@@ -114,6 +109,16 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
114109
maxUsagePercent: 80,
115110
},
116111
},
112+
addRunningTaskCountAlarm: {
113+
Warning: {
114+
maxRunningTasks: 5,
115+
},
116+
},
117+
addEphermalStorageUsageAlarm: {
118+
Warning: {
119+
maxUsagePercent: 90,
120+
},
121+
},
117122
addMinProcessedBytesAlarm: {
118123
Warning: {
119124
minProcessedBytes: 1024,
@@ -129,7 +134,7 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
129134
});
130135

131136
addMonitoringDashboardsToStack(stack, monitoring);
132-
expect(numAlarmsCreated).toStrictEqual(7);
137+
expect(numAlarmsCreated).toStrictEqual(8);
133138
expect(Template.fromStack(stack)).toMatchSnapshot();
134139
});
135140

@@ -226,11 +231,6 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
226231
maxUnhealthyTasks: 3,
227232
},
228233
},
229-
addRunningTaskCountAlarm: {
230-
Warning: {
231-
maxRunningTasks: 5,
232-
},
233-
},
234234
addCpuUsageAlarm: {
235235
Warning: {
236236
maxUsagePercent: 80,
@@ -241,6 +241,16 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
241241
maxUsagePercent: 80,
242242
},
243243
},
244+
addRunningTaskCountAlarm: {
245+
Warning: {
246+
maxRunningTasks: 5,
247+
},
248+
},
249+
addEphermalStorageUsageAlarm: {
250+
Warning: {
251+
maxUsagePercent: 90,
252+
},
253+
},
244254
addMinProcessedBytesAlarm: {
245255
Warning: {
246256
minProcessedBytes: 1024,
@@ -256,7 +266,7 @@ import { TestMonitoringScope } from "../TestMonitoringScope";
256266
});
257267

258268
addMonitoringDashboardsToStack(stack, monitoring);
259-
expect(numAlarmsCreated).toStrictEqual(7);
269+
expect(numAlarmsCreated).toStrictEqual(8);
260270
expect(Template.fromStack(stack)).toMatchSnapshot();
261271
});
262272
}

0 commit comments

Comments
 (0)