Skip to content

Commit af29cd1

Browse files
authored
Improve logging and monitoring of AB testing notification lambda (#15084)
* improve logging and monitoring of notification lambda * use human-readable function name for lambda * specify alarm conditions, improve readability of policy allowing lambda to send emails using SES
1 parent b5f02cf commit af29cd1

File tree

5 files changed

+72
-70
lines changed

5 files changed

+72
-70
lines changed

ab-testing/cdk/lib/__snapshots__/notificationLambda.test.ts.snap

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,10 @@ exports[`The AB testing notification lambda stack > matches the CODE snapshot 1`
265265
}
266266
},
267267
{
268-
"Action": "ses:SendEmail",
268+
"Action": [
269+
"ses:SendEmail",
270+
"ses:SendRawEmail"
271+
],
269272
"Effect": "Allow",
270273
"Resource": {
271274
"Fn::Join": [
@@ -309,14 +312,15 @@ exports[`The AB testing notification lambda stack > matches the CODE snapshot 1`
309312
},
310313
"Environment": {
311314
"Variables": {
315+
"STAGE": "CODE",
312316
"EMAIL_DOMAIN": {
313317
"Ref": "EmailIdentityAbtestingnotificationlambdaE053C648"
314318
},
315319
"STACK": "frontend",
316-
"STAGE": "CODE",
317320
"APP": "ab-testing-notification-lambda"
318321
}
319322
},
323+
"FunctionName": "ab-testing-notification-lambda-CODE",
320324
"Handler": "index.handler",
321325
"LoggingConfig": {
322326
"LogFormat": "JSON"
@@ -382,30 +386,10 @@ exports[`The AB testing notification lambda stack > matches the CODE snapshot 1`
382386
]
383387
}
384388
],
385-
"AlarmDescription": {
386-
"Fn::Join": [
387-
"",
388-
[
389-
{
390-
"Ref": "AbTestingNotificationLambda36F153E8"
391-
},
392-
" exceeded 1% error rate"
393-
]
394-
]
395-
},
396-
"AlarmName": {
397-
"Fn::Join": [
398-
"",
399-
[
400-
"High error percentage from ",
401-
{
402-
"Ref": "AbTestingNotificationLambda36F153E8"
403-
},
404-
" lambda in CODE"
405-
]
406-
]
407-
},
389+
"AlarmDescription": "Something went wrong notifying test owners of upcoming AB test expiries in ab-testing-notification-lambda-CODE. Please check the logs",
390+
"AlarmName": "AB Testing Notification Failures",
408391
"ComparisonOperator": "GreaterThanThreshold",
392+
"DatapointsToAlarm": 1,
409393
"EvaluationPeriods": 1,
410394
"Metrics": [
411395
{
@@ -483,7 +467,7 @@ exports[`The AB testing notification lambda stack > matches the CODE snapshot 1`
483467
"Value": "CODE"
484468
}
485469
],
486-
"Threshold": 1,
470+
"Threshold": 0,
487471
"TreatMissingData": "notBreaching"
488472
}
489473
}
@@ -630,6 +614,16 @@ exports[`The AB testing notification lambda stack > matches the PROD snapshot 1`
630614
]
631615
}
632616
},
617+
"ABTestingNotificationErrors7CB687D5": {
618+
"Type": "AWS::SNS::Subscription",
619+
"Properties": {
620+
"Endpoint": "[email protected]",
621+
"Protocol": "email",
622+
"TopicArn": {
623+
"Ref": "AbTestingNotificationSnsTopicB3559144"
624+
}
625+
}
626+
},
633627
"AbTestingNotificationLambdaServiceRole529D80CF": {
634628
"Type": "AWS::IAM::Role",
635629
"Properties": {
@@ -765,7 +759,10 @@ exports[`The AB testing notification lambda stack > matches the PROD snapshot 1`
765759
}
766760
},
767761
{
768-
"Action": "ses:SendEmail",
762+
"Action": [
763+
"ses:SendEmail",
764+
"ses:SendRawEmail"
765+
],
769766
"Effect": "Allow",
770767
"Resource": {
771768
"Fn::Join": [
@@ -809,14 +806,15 @@ exports[`The AB testing notification lambda stack > matches the PROD snapshot 1`
809806
},
810807
"Environment": {
811808
"Variables": {
809+
"STAGE": "PROD",
812810
"EMAIL_DOMAIN": {
813811
"Ref": "EmailIdentityAbtestingnotificationlambdaE053C648"
814812
},
815813
"STACK": "frontend",
816-
"STAGE": "PROD",
817814
"APP": "ab-testing-notification-lambda"
818815
}
819816
},
817+
"FunctionName": "ab-testing-notification-lambda-PROD",
820818
"Handler": "index.handler",
821819
"LoggingConfig": {
822820
"LogFormat": "JSON"
@@ -942,30 +940,10 @@ exports[`The AB testing notification lambda stack > matches the PROD snapshot 1`
942940
]
943941
}
944942
],
945-
"AlarmDescription": {
946-
"Fn::Join": [
947-
"",
948-
[
949-
{
950-
"Ref": "AbTestingNotificationLambda36F153E8"
951-
},
952-
" exceeded 1% error rate"
953-
]
954-
]
955-
},
956-
"AlarmName": {
957-
"Fn::Join": [
958-
"",
959-
[
960-
"High error percentage from ",
961-
{
962-
"Ref": "AbTestingNotificationLambda36F153E8"
963-
},
964-
" lambda in PROD"
965-
]
966-
]
967-
},
943+
"AlarmDescription": "Something went wrong notifying test owners of upcoming AB test expiries in ab-testing-notification-lambda-PROD. Please check the logs",
944+
"AlarmName": "AB Testing Notification Failures",
968945
"ComparisonOperator": "GreaterThanThreshold",
946+
"DatapointsToAlarm": 1,
969947
"EvaluationPeriods": 1,
970948
"Metrics": [
971949
{
@@ -1043,7 +1021,7 @@ exports[`The AB testing notification lambda stack > matches the PROD snapshot 1`
10431021
"Value": "PROD"
10441022
}
10451023
],
1046-
"Threshold": 1,
1024+
"Threshold": 0,
10471025
"TreatMissingData": "notBreaching"
10481026
}
10491027
}

ab-testing/cdk/lib/notificationLambda.ts

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ import {
44
type GuStackProps,
55
} from "@guardian/cdk/lib/constructs/core/stack.js";
66
import { GuEmailIdentity } from "@guardian/cdk/lib/constructs/ses/index.js";
7-
import type { App } from "aws-cdk-lib";
7+
import { type App, Duration } from "aws-cdk-lib";
88
import { Schedule } from "aws-cdk-lib/aws-events";
9-
import { Effect, PolicyStatement } from "aws-cdk-lib/aws-iam";
109
import { Runtime } from "aws-cdk-lib/aws-lambda";
11-
import { Topic } from "aws-cdk-lib/aws-sns";
10+
import { Subscription, SubscriptionProtocol, Topic } from "aws-cdk-lib/aws-sns";
1211

13-
const lambdaFunctionName = "ab-testing-notification-lambda";
12+
const appName = "ab-testing-notification-lambda";
1413

1514
const getEmailDomain = (stage: GuStackProps["stage"]) => {
1615
switch (stage) {
@@ -33,37 +32,48 @@ export class AbTestingNotificationLambda extends GuStack {
3332
};
3433

3534
const emailIdentity = new GuEmailIdentity(this, "EmailIdentity", {
36-
app: lambdaFunctionName,
35+
app: appName,
3736
domainName: getEmailDomain(props.stage),
3837
});
3938

4039
const snsTopic = new Topic(this, "AbTestingNotificationSnsTopic");
4140

41+
// Notify teams in PROD if this lambda errors
42+
if (props.stage === "PROD") {
43+
new Subscription(this, "ABTestingNotificationErrors", {
44+
topic: snsTopic,
45+
// TODO: Change this to [email protected] after testing for a while
46+
endpoint: "[email protected]",
47+
protocol: SubscriptionProtocol.EMAIL,
48+
});
49+
}
50+
4251
const lambda = new GuScheduledLambda(
4352
this,
4453
"AbTestingNotificationLambda",
4554
{
46-
app: lambdaFunctionName,
55+
functionName: `${appName}-${props.stage}`,
56+
app: appName,
4757
fileName: "lambda.zip",
4858
handler: "index.handler",
4959
rules: this.stage === "PROD" ? [runDailyRule] : [],
5060
monitoringConfiguration: {
5161
snsTopicName: snsTopic.topicName,
52-
toleratedErrorPercentage: 1,
62+
toleratedErrorPercentage: 0,
63+
alarmName: "AB Testing Notification Failures",
64+
alarmDescription: `Something went wrong notifying test owners of upcoming AB test expiries in ${appName}-${props.stage}. Please check the logs`,
65+
lengthOfEvaluationPeriod: Duration.minutes(1),
66+
numberOfEvaluationPeriodsAboveThresholdBeforeAlarm: 1,
67+
datapointsToAlarm: 1,
5368
},
5469
runtime: Runtime.NODEJS_22_X,
5570
environment: {
71+
STAGE: props.stage,
5672
EMAIL_DOMAIN: emailIdentity.emailIdentityName,
5773
},
5874
},
5975
);
6076

61-
lambda.addToRolePolicy(
62-
new PolicyStatement({
63-
effect: Effect.ALLOW,
64-
actions: ["ses:SendEmail"],
65-
resources: [emailIdentity.emailIdentityArn],
66-
}),
67-
);
77+
emailIdentity.grantSendEmail(lambda);
6878
}
6979
}

ab-testing/notification-lambda/src/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@ export const handler = async (): Promise<void> => {
66
const expiringAbTestsByOwner =
77
getExpiringAbTestsGroupedByOwner(activeABtests);
88

9+
// Check if any test owners have expiring tests
10+
// Early return if there are no results
11+
if (!Object.keys(expiringAbTestsByOwner).length) {
12+
console.log("No owners found with expiring tests");
13+
Promise.resolve();
14+
}
15+
16+
// Sending emails to owners with expiring tests
917
await Promise.all(
1018
Object.entries(expiringAbTestsByOwner).map(([owner, abTests]) =>
1119
sendEmail(owner, abTests),

ab-testing/notification-lambda/src/lib/email.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,15 @@ export const sendEmail = async (
150150
)
151151
.then(() =>
152152
console.log(
153-
`Sent AB test expiry reminder email to ${recipient}`,
153+
`Sent AB test expiry reminder email to ${recipient} for tests
154+
${Object.values(expiringAbTests)
155+
.flat()
156+
.map((test) => test.name)
157+
.join(", ")}`,
154158
),
155159
);
156160
} catch (error) {
157-
console.log(`Error sending email to ${recipient}`, error);
161+
console.error(`Error sending email to ${recipient}`, error);
162+
throw error;
158163
}
159164
};

ab-testing/notification-lambda/src/run.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111

1212
import { handler } from "./index.ts";
1313

14+
process.env.STAGE = `LOCAL`;
1415
process.env.EMAIL_DOMAIN = `abtesting.code.dev-gutools.co.uk`;
1516

16-
console.log(`Running lambda handler locally\n`);
17-
console.log(`EMAIL_DOMAIN=${process.env.EMAIL_DOMAIN}\n`);
17+
console.debug(`Running lambda handler locally\n`);
18+
console.debug(`EMAIL_DOMAIN=${process.env.EMAIL_DOMAIN}\n`);
1819

1920
void (async () => {
2021
try {

0 commit comments

Comments
 (0)