Skip to content

Commit d273f84

Browse files
authored
feat(api-gateway): support monitoring slowest requests with trimmed mean (#444)
Closes #443 --- _By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license_
1 parent 241a2dd commit d273f84

File tree

9 files changed

+1407
-43
lines changed

9 files changed

+1407
-43
lines changed

API.md

Lines changed: 328 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/common/metric/MetricStatistic.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ export enum MetricStatistic {
6565
* trimmed mean; calculates the average after removing the 0.01% of data points with the highest values
6666
*/
6767
TM9999 = "tm99.99",
68+
6869
/**
6970
* trimmed mean; calculates the average after removing the 1% lowest data points and the 1% highest data points
7071
*/
@@ -94,6 +95,23 @@ export enum MetricStatistic {
9495
*/
9596
TM70_BOTH = "TM(30%:70%)",
9697

98+
/**
99+
* trimmed mean; calculates the average after removing the 95% lowest data points
100+
*/
101+
TM95_TOP = "TM(95%:100%)",
102+
/**
103+
* trimmed mean; calculates the average after removing the 99% lowest data points
104+
*/
105+
TM99_TOP = "TM(99%:100%)",
106+
/**
107+
* trimmed mean; calculates the average after removing the 99.9% lowest data points
108+
*/
109+
TM999_TOP = "TM(99.9%:100%)",
110+
/**
111+
* trimmed mean; calculates the average after removing the 99.99% lowest data points
112+
*/
113+
TM9999_TOP = "TM(99.99%:100%)",
114+
97115
/**
98116
* winsorized mean; calculates the average while treating the 50% of the highest values to be equal to the value at the 50th percentile
99117
*/

lib/common/monitoring/alarms/LatencyAlarmFactory.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ export enum LatencyType {
2323
TM99 = "TM99",
2424
TM999 = "TM999",
2525
TM9999 = "TM9999",
26+
TM95_TOP = "TM(95%:100%)",
27+
TM99_TOP = "TM(99%:100%)",
28+
TM999_TOP = "TM(99.9%:100%)",
29+
TM9999_TOP = "TM(99.99%:100%)",
2630
AVERAGE = "Average",
2731
}
2832

@@ -58,6 +62,14 @@ export function getLatencyTypeStatistic(latencyType: LatencyType) {
5862
return MetricStatistic.TM999;
5963
case LatencyType.TM9999:
6064
return MetricStatistic.TM9999;
65+
case LatencyType.TM95_TOP:
66+
return MetricStatistic.TM95_TOP;
67+
case LatencyType.TM99_TOP:
68+
return MetricStatistic.TM99_TOP;
69+
case LatencyType.TM999_TOP:
70+
return MetricStatistic.TM999_TOP;
71+
case LatencyType.TM9999_TOP:
72+
return MetricStatistic.TM9999_TOP;
6173
case LatencyType.AVERAGE:
6274
return MetricStatistic.AVERAGE;
6375
default:
@@ -98,6 +110,10 @@ export function getLatencyTypeLabel(latencyType: LatencyType) {
98110
return latencyType.replace("999", "99.9") + averageSuffix;
99111
case LatencyType.P9999:
100112
case LatencyType.TM9999:
113+
case LatencyType.TM95_TOP:
114+
case LatencyType.TM99_TOP:
115+
case LatencyType.TM999_TOP:
116+
case LatencyType.TM9999_TOP:
101117
// we need proper decimal here
102118
return latencyType.replace("9999", "99.99") + averageSuffix;
103119
case LatencyType.AVERAGE:

lib/monitoring/aws-apigateway/ApiGatewayMonitoring.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ export interface ApiGatewayMonitoringOptions extends BaseMonitoringProps {
6464
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
6565
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
6666
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;
67+
readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
68+
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
69+
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
70+
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;
6771
readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;
6872

6973
readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
@@ -175,6 +179,10 @@ export class ApiGatewayMonitoring extends Monitoring {
175179
[LatencyType.TM99]: props.addLatencyTM99Alarm,
176180
[LatencyType.TM999]: props.addLatencyTM999Alarm,
177181
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
182+
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
183+
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
184+
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
185+
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
178186
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
179187
};
180188

lib/monitoring/aws-apigatewayv2/ApiGatewayV2HttpApiMonitoring.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,20 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
5858
readonly addLatencyP999Alarm?: Record<string, LatencyThreshold>;
5959
readonly addLatencyP9999Alarm?: Record<string, LatencyThreshold>;
6060
readonly addLatencyP100Alarm?: Record<string, LatencyThreshold>;
61+
6162
readonly addLatencyTM50Alarm?: Record<string, LatencyThreshold>;
6263
readonly addLatencyTM70Alarm?: Record<string, LatencyThreshold>;
6364
readonly addLatencyTM90Alarm?: Record<string, LatencyThreshold>;
6465
readonly addLatencyTM95Alarm?: Record<string, LatencyThreshold>;
6566
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
6667
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
6768
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;
69+
70+
readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
71+
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
72+
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
73+
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;
74+
6875
readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;
6976

7077
readonly addIntegrationLatencyP50Alarm?: Record<string, LatencyThreshold>;
@@ -75,13 +82,32 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
7582
readonly addIntegrationLatencyP999Alarm?: Record<string, LatencyThreshold>;
7683
readonly addIntegrationLatencyP9999Alarm?: Record<string, LatencyThreshold>;
7784
readonly addIntegrationLatencyP100Alarm?: Record<string, LatencyThreshold>;
85+
7886
readonly addIntegrationLatencyTM50Alarm?: Record<string, LatencyThreshold>;
7987
readonly addIntegrationLatencyTM70Alarm?: Record<string, LatencyThreshold>;
8088
readonly addIntegrationLatencyTM90Alarm?: Record<string, LatencyThreshold>;
8189
readonly addIntegrationLatencyTM95Alarm?: Record<string, LatencyThreshold>;
8290
readonly addIntegrationLatencyTM99Alarm?: Record<string, LatencyThreshold>;
8391
readonly addIntegrationLatencyTM999Alarm?: Record<string, LatencyThreshold>;
8492
readonly addIntegrationLatencyTM9999Alarm?: Record<string, LatencyThreshold>;
93+
94+
readonly addIntegrationLatencyTM95OutlierAlarm?: Record<
95+
string,
96+
LatencyThreshold
97+
>;
98+
readonly addIntegrationLatencyTM99OutlierAlarm?: Record<
99+
string,
100+
LatencyThreshold
101+
>;
102+
readonly addIntegrationLatencyTM999OutlierAlarm?: Record<
103+
string,
104+
LatencyThreshold
105+
>;
106+
readonly addIntegrationLatencyTM9999OutlierAlarm?: Record<
107+
string,
108+
LatencyThreshold
109+
>;
110+
85111
readonly addIntegrationLatencyAverageAlarm?: Record<string, LatencyThreshold>;
86112

87113
readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
@@ -200,6 +226,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
200226
[LatencyType.TM99]: props.addLatencyTM99Alarm,
201227
[LatencyType.TM999]: props.addLatencyTM999Alarm,
202228
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
229+
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
230+
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
231+
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
232+
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
203233
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
204234
};
205235

@@ -219,6 +249,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
219249
[LatencyType.TM99]: props.addIntegrationLatencyTM99Alarm,
220250
[LatencyType.TM999]: props.addIntegrationLatencyTM999Alarm,
221251
[LatencyType.TM9999]: props.addIntegrationLatencyTM9999Alarm,
252+
[LatencyType.TM95_TOP]: props.addIntegrationLatencyTM95OutlierAlarm,
253+
[LatencyType.TM99_TOP]: props.addIntegrationLatencyTM99OutlierAlarm,
254+
[LatencyType.TM999_TOP]: props.addIntegrationLatencyTM999OutlierAlarm,
255+
[LatencyType.TM9999_TOP]: props.addIntegrationLatencyTM9999OutlierAlarm,
222256
[LatencyType.AVERAGE]: props.addIntegrationLatencyAverageAlarm,
223257
};
224258

test/monitoring/aws-apigateway/ApiGatewayMonitoring.test.ts

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,30 @@ test("snapshot test: all alarms", () => {
150150
datapointsToAlarm: 29999,
151151
},
152152
},
153+
addLatencyTM95OutlierAlarm: {
154+
Warning: {
155+
maxLatency: Duration.millis(29999),
156+
datapointsToAlarm: 29999,
157+
},
158+
},
159+
addLatencyTM99OutlierAlarm: {
160+
Warning: {
161+
maxLatency: Duration.millis(29999),
162+
datapointsToAlarm: 29999,
163+
},
164+
},
165+
addLatencyTM999OutlierAlarm: {
166+
Warning: {
167+
maxLatency: Duration.millis(29999),
168+
datapointsToAlarm: 29999,
169+
},
170+
},
171+
addLatencyTM9999OutlierAlarm: {
172+
Warning: {
173+
maxLatency: Duration.millis(29999),
174+
datapointsToAlarm: 29999,
175+
},
176+
},
153177
addLatencyAverageAlarm: {
154178
Warning: {
155179
maxLatency: Duration.millis(20),
@@ -170,7 +194,7 @@ test("snapshot test: all alarms", () => {
170194
});
171195

172196
addMonitoringDashboardsToStack(stack, monitoring);
173-
expect(numAlarmsCreated).toStrictEqual(22);
197+
expect(numAlarmsCreated).toStrictEqual(26);
174198
expect(Template.fromStack(stack)).toMatchSnapshot();
175199
});
176200

@@ -304,6 +328,30 @@ test("snapshot test: all alarms using interface", () => {
304328
datapointsToAlarm: 29999,
305329
},
306330
},
331+
addLatencyTM95OutlierAlarm: {
332+
Warning: {
333+
maxLatency: Duration.millis(29999),
334+
datapointsToAlarm: 29999,
335+
},
336+
},
337+
addLatencyTM99OutlierAlarm: {
338+
Warning: {
339+
maxLatency: Duration.millis(29999),
340+
datapointsToAlarm: 29999,
341+
},
342+
},
343+
addLatencyTM999OutlierAlarm: {
344+
Warning: {
345+
maxLatency: Duration.millis(29999),
346+
datapointsToAlarm: 29999,
347+
},
348+
},
349+
addLatencyTM9999OutlierAlarm: {
350+
Warning: {
351+
maxLatency: Duration.millis(29999),
352+
datapointsToAlarm: 29999,
353+
},
354+
},
307355
addLatencyAverageAlarm: {
308356
Warning: {
309357
maxLatency: Duration.millis(20),
@@ -324,6 +372,6 @@ test("snapshot test: all alarms using interface", () => {
324372
});
325373

326374
addMonitoringDashboardsToStack(stack, monitoring);
327-
expect(numAlarmsCreated).toStrictEqual(22);
375+
expect(numAlarmsCreated).toStrictEqual(26);
328376
expect(Template.fromStack(stack)).toMatchSnapshot();
329377
});

0 commit comments

Comments
 (0)