Skip to content

Commit cecd255

Browse files
committed
setup archival lambda as node lambda
1 parent a419f5c commit cecd255

File tree

11 files changed

+1417
-429
lines changed

11 files changed

+1417
-429
lines changed

package.json

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
"type": "module",
66
"workspaces": [
77
"src/api",
8-
"src/ui"
8+
"src/ui",
9+
"src/archival"
910
],
1011
"packageManager": "[email protected]",
1112
"scripts": {
1213
"postinstall": "npm run setup",
1314
"setup": "git config blame.ignoreRevsFile .git-blame-ignore-revs",
14-
"build": "concurrently --names 'api,ui' 'yarn workspace infra-core-api run build' 'yarn workspace infra-core-ui run build'",
15+
"build": "concurrently --names 'api,ui,archival' 'yarn workspace infra-core-api run build' 'yarn workspace infra-core-ui run build' 'yarn workspace infra-core-archival run build'",
1516
"postbuild": "node src/api/createLambdaPackage.js && yarn lockfile-manage",
1617
"dev": "cross-env DISABLE_AUDIT_LOG=true concurrently --names 'api,ui' 'yarn workspace infra-core-api run dev' 'yarn workspace infra-core-ui run dev'",
1718
"lockfile-manage": "synp --with-workspace --source-file yarn.lock",
@@ -38,6 +39,7 @@
3839
"@eslint/eslintrc": "^3.3.1",
3940
"@eslint/js": "^9.33.0",
4041
"@playwright/test": "^1.54.2",
42+
"@smithy/types": "^4.3.2",
4143
"@tsconfig/node22": "^22.0.1",
4244
"@types/ioredis-mock": "^8.2.5",
4345
"@types/node": "^24.3.0",
@@ -92,4 +94,4 @@
9294
"pdfjs-dist": "^4.8.69",
9395
"form-data": "^4.0.4"
9496
}
95-
}
97+
}

src/api/package.json

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@
1616
},
1717
"dependencies": {
1818
"@aws-sdk/client-cloudfront-keyvaluestore": "^3.859.0",
19-
"@aws-sdk/client-dynamodb": "^3.797.0",
20-
"@aws-sdk/client-lambda": "^3.835.0",
19+
"@aws-sdk/client-dynamodb": "^3.859.0",
20+
"@aws-sdk/client-lambda": "^3.859.0",
2121
"@aws-sdk/client-secrets-manager": "^3.859.0",
22-
"@aws-sdk/client-ses": "^3.797.0",
23-
"@aws-sdk/client-sqs": "^3.797.0",
24-
"@aws-sdk/client-sts": "^3.797.0",
25-
"@aws-sdk/signature-v4-crt": "^3.796.0",
26-
"@aws-sdk/util-dynamodb": "^3.797.0",
22+
"@aws-sdk/client-ses": "^3.859.0",
23+
"@aws-sdk/client-sqs": "^3.859.0",
24+
"@aws-sdk/client-sts": "^3.859.0",
25+
"@aws-sdk/signature-v4-crt": "^3.859.0",
26+
"@aws-sdk/util-dynamodb": "^3.859.0",
2727
"@azure/msal-node": "^3.5.1",
2828
"@fastify/auth": "^5.0.1",
2929
"@fastify/aws-lambda": "^6.0.0",

src/archival/build.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/* eslint-disable no-console */
2+
import esbuild from "esbuild";
3+
4+
const commonParams = {
5+
bundle: true,
6+
format: "esm",
7+
minify: true,
8+
outExtension: { ".js": ".mjs" },
9+
loader: {
10+
".png": "file",
11+
".pkpass": "file",
12+
".json": "file",
13+
}, // File loaders
14+
target: "es2022", // Target ES2022
15+
sourcemap: true,
16+
platform: "node",
17+
external: ["@aws-sdk/*"],
18+
banner: {
19+
js: `
20+
import path from 'path';
21+
import { fileURLToPath } from 'url';
22+
import { createRequire as topLevelCreateRequire } from 'module';
23+
const require = topLevelCreateRequire(import.meta.url);
24+
const __filename = fileURLToPath(import.meta.url);
25+
const __dirname = path.dirname(__filename);
26+
`.trim(),
27+
}, // Banner for compatibility with CommonJS
28+
};
29+
30+
esbuild
31+
.build({
32+
...commonParams,
33+
entryPoints: ["archival/dynamoStream.js"],
34+
outdir: "../../dist/archival/",
35+
})
36+
.then(() => console.log("Archival lambda build completed successfully!"))
37+
.catch((error) => {
38+
console.error("Archival lambda build failed:", error);
39+
process.exit(1);
40+
});

src/archival/dynamoStream.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/* eslint-disable no-console */
2+
import {
3+
FirehoseClient,
4+
PutRecordBatchCommand,
5+
} from "@aws-sdk/client-firehose";
6+
import { unmarshall } from "@aws-sdk/util-dynamodb";
7+
import type { DynamoDBStreamEvent, Context } from "aws-lambda";
8+
import { AttributeValue } from "@aws-sdk/client-dynamodb";
9+
10+
const firehoseClient = new FirehoseClient({});
11+
12+
const FIREHOSE_STREAM_NAME = process.env.FIREHOSE_STREAM_NAME;
13+
14+
if (!FIREHOSE_STREAM_NAME) {
15+
console.error("The 'FIREHOSE_STREAM_NAME' environment variable is not set.");
16+
throw new Error("'FIREHOSE_STREAM_NAME' is not set.");
17+
}
18+
19+
const toUtcIsoStringWithoutMillis = (date: Date): string => {
20+
return `${date.toISOString().slice(0, 19)}Z`;
21+
};
22+
23+
/**
24+
* Defines a map where keys are DynamoDB table names and values are functions
25+
* that extract a meaningful timestamp from a record. The function should
26+
* return a value parseable by the `Date` constructor (e.g., ISO 8601 string or epoch milliseconds).
27+
*/
28+
const ARCHIVE_TIMESTAMP_MAPPER: Record<
29+
string,
30+
(record: Record<string, any>) => string | number
31+
> = {
32+
"infra-core-api-room-requests-status": (record) =>
33+
record["createdAt#status"].split("#")[0],
34+
"infra-core-api-events": (record) => record.createdAt,
35+
"infra-core-api-audit-log": (record) => record.createdAt * 1000, // Convert Unix seconds to milliseconds
36+
};
37+
38+
export const handler = async (
39+
event: DynamoDBStreamEvent,
40+
_context: Context,
41+
): Promise<any> => {
42+
const firehoseRecordsToSend: { Data: Buffer }[] = [];
43+
44+
for (const record of event.Records) {
45+
// 1. **Filter for TTL Deletions**: We only care about `REMOVE` events initiated by DynamoDB's TTL service.
46+
if (
47+
record.eventName === "REMOVE" &&
48+
record.userIdentity?.principalId === "dynamodb.amazonaws.com"
49+
) {
50+
// 2. **Extract Table Name**: The table name is parsed from the event source ARN.
51+
// ARN format: arn:aws:dynamodb:region:account-id:table/TABLE_NAME/stream/...
52+
const tableName = record.eventSourceARN?.split("/")[1];
53+
if (!tableName) {
54+
console.warn(
55+
`Could not parse table name from ARN: ${record.eventSourceARN}`,
56+
);
57+
continue; // Skip this record if the ARN is malformed
58+
}
59+
60+
// 3. **Get and Deserialize Data**: The content of the expired record is in 'OldImage'.
61+
const oldImage = record.dynamodb?.OldImage;
62+
if (!oldImage) {
63+
continue; // Skip if there's no data to archive
64+
}
65+
66+
// The `unmarshall` utility converts the DynamoDB format to a standard JavaScript object.
67+
const deserializedData = unmarshall(
68+
oldImage as { [key: string]: AttributeValue },
69+
);
70+
71+
// 4. **Construct the Payload**: Add metadata to the original record data.
72+
const payload: Record<string, any> = {
73+
...deserializedData,
74+
__infra_archive_resource: tableName,
75+
__infra_archive_timestamp: toUtcIsoStringWithoutMillis(new Date()), // Default timestamp is 'now'
76+
};
77+
78+
// 5. **Apply Custom Timestamp**: If a specific timestamp extractor is defined for this table, use it.
79+
if (tableName in ARCHIVE_TIMESTAMP_MAPPER) {
80+
try {
81+
const timestampSource =
82+
ARCHIVE_TIMESTAMP_MAPPER[tableName](deserializedData);
83+
payload.__infra_archive_timestamp = toUtcIsoStringWithoutMillis(
84+
new Date(timestampSource),
85+
);
86+
} catch (e) {
87+
const error = e instanceof Error ? e.message : String(e);
88+
console.error(
89+
`Failed to extract timestamp for record from ${tableName}: ${error}. Using 'now' as timestamp.`,
90+
);
91+
}
92+
}
93+
94+
firehoseRecordsToSend.push({
95+
Data: Buffer.from(JSON.stringify(payload)),
96+
});
97+
}
98+
}
99+
100+
// 6. **Send Records to Firehose**: If we found any TTL-expired records, send them.
101+
if (firehoseRecordsToSend.length > 0) {
102+
console.info(
103+
`Found ${firehoseRecordsToSend.length} TTL-expired records to archive.`,
104+
);
105+
106+
// The PutRecordBatch API has a limit of 500 records per call. We loop
107+
// in chunks of 500 to handle large events gracefully.
108+
for (let i = 0; i < firehoseRecordsToSend.length; i += 500) {
109+
const batch = firehoseRecordsToSend.slice(i, i + 500);
110+
try {
111+
const command = new PutRecordBatchCommand({
112+
DeliveryStreamName: FIREHOSE_STREAM_NAME,
113+
Records: batch,
114+
});
115+
const response = await firehoseClient.send(command);
116+
117+
// Log any records that Firehose failed to ingest for monitoring purposes.
118+
if (response.FailedPutCount && response.FailedPutCount > 0) {
119+
console.error(
120+
`Failed to put ${response.FailedPutCount} records to Firehose.`,
121+
);
122+
// For critical apps, you could inspect `response.RequestResponses` for details.
123+
}
124+
} catch (e) {
125+
const error = e instanceof Error ? e.message : String(e);
126+
console.error(`Error sending batch to Firehose: ${error}`);
127+
// Re-throwing the exception will cause Lambda to retry the entire event batch.
128+
throw e;
129+
}
130+
}
131+
} else {
132+
console.info("No TTL-expired records found in this event.");
133+
}
134+
135+
return {
136+
statusCode: 200,
137+
body: JSON.stringify(
138+
`Successfully processed ${firehoseRecordsToSend.length} records.`,
139+
),
140+
};
141+
};

src/archival/package.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"name": "infra-core-archival",
3+
"version": "1.0.0",
4+
"description": "Archives DynamoDB TTL-deleted items to Firehose",
5+
"type": "module",
6+
"main": "index.js",
7+
"author": "ACM@UIUC",
8+
"license": "BSD-3-Clause",
9+
"scripts": {
10+
"build": "tsc && node build.js",
11+
"prettier": "prettier --check *.ts **/*.ts",
12+
"lint": "eslint . --ext .ts --cache",
13+
"prettier:write": "prettier --write *.ts **/*.ts"
14+
},
15+
"devDependencies": {
16+
"@types/aws-lambda": "^8.10.138",
17+
"@types/node": "^24.3.0",
18+
"typescript": "^5.9.2",
19+
"esbuild": "^0.25.3"
20+
},
21+
"dependencies": {
22+
"@aws-sdk/client-dynamodb": "^3.585.0",
23+
"@aws-sdk/client-firehose": "^3.585.0",
24+
"@aws-sdk/util-dynamodb": "^3.585.0"
25+
}
26+
}

src/archival/tsconfig.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"extends": "@tsconfig/node22/tsconfig.json",
3+
"compilerOptions": {
4+
"module": "Node16",
5+
"rootDir": "../",
6+
"outDir": "../../dist",
7+
"baseUrl": "../"
8+
},
9+
"ts-node": {
10+
"esm": true
11+
},
12+
"include": ["../api/**/*.ts", "../common/**/*.ts"],
13+
"exclude": ["../../node_modules", "../../dist"]
14+
}

0 commit comments

Comments
 (0)