Skip to content

Commit f9e115e

Browse files
harrim91jamesthompson26-nhs
authored andcommitted
CCM-12833: Without AgentMD - DO NOT MERGE
1 parent d58dfc1 commit f9e115e

File tree

11 files changed

+1162
-44
lines changed

11 files changed

+1162
-44
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
module "lambda_data_export_csv_writer" {
2+
source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.22/terraform-lambda.zip"
3+
4+
project = var.project
5+
environment = var.environment
6+
component = var.component
7+
aws_account_id = var.aws_account_id
8+
region = var.region
9+
10+
kms_key_arn = var.kms_key_arn
11+
12+
function_name = "data-export-csv-writer"
13+
function_module_name = "data-export-csv-writer"
14+
handler_function_name = "handler"
15+
description = "Lambda that consumes SQS messages and writes the 'data' object to S3 as CSV"
16+
memory = 512
17+
timeout = 30
18+
runtime = "nodejs20.x"
19+
log_retention_in_days = var.log_retention_in_days
20+
function_s3_bucket = var.function_s3_bucket
21+
function_code_base_path = local.lambdas_dir
22+
function_code_dir = "data-export-csv-writer/dist"
23+
24+
lambda_env_vars = {
25+
BUCKET_NAME = module.s3bucket_data_export.id
26+
KEY_PREFIX = "exports/"
27+
NODE_OPTIONS = "--enable-source-maps"
28+
}
29+
30+
iam_policy_document = {
31+
body = data.aws_iam_policy_document.lambda_data_export_csv_writer.json
32+
}
33+
34+
send_to_firehose = var.send_to_firehose
35+
log_destination_arn = var.log_destination_arn
36+
log_subscription_role_arn = var.log_subscription_role_arn
37+
}
38+
39+
resource "aws_lambda_event_source_mapping" "lambda_data_export_csv_writer" {
40+
event_source_arn = module.sqs_data_export.sqs_queue_arn
41+
function_name = module.lambda_data_export_csv_writer.function_name
42+
batch_size = 10
43+
maximum_batching_window_in_seconds = 0
44+
function_response_types = [
45+
"ReportBatchItemFailures"
46+
]
47+
48+
scaling_config {
49+
maximum_concurrency = 5
50+
}
51+
}
52+
53+
data "aws_iam_policy_document" "lambda_data_export_csv_writer" {
54+
statement {
55+
sid = "AllowSQSDLQ"
56+
effect = "Allow"
57+
actions = [
58+
"sqs:SendMessage",
59+
]
60+
resources = [
61+
module.sqs_data_export.sqs_dlq_arn,
62+
]
63+
}
64+
65+
statement {
66+
sid = "AllowSQSConsume"
67+
effect = "Allow"
68+
actions = [
69+
"sqs:ReceiveMessage",
70+
"sqs:DeleteMessage",
71+
"sqs:GetQueueAttributes",
72+
"sqs:ChangeMessageVisibility",
73+
]
74+
resources = [
75+
module.sqs_data_export.sqs_queue_arn,
76+
]
77+
}
78+
79+
statement {
80+
sid = "AllowS3Write"
81+
effect = "Allow"
82+
actions = [
83+
"s3:PutObject",
84+
"s3:AbortMultipartUpload",
85+
"s3:ListMultipartUploadParts"
86+
]
87+
resources = [
88+
"${module.s3bucket_data_export.arn}/*"
89+
]
90+
}
91+
92+
statement {
93+
sid = "AllowKMS"
94+
effect = "Allow"
95+
actions = [
96+
"kms:Decrypt",
97+
"kms:DescribeKey",
98+
"kms:Encrypt",
99+
"kms:GenerateDataKey*",
100+
"kms:ReEncrypt*",
101+
]
102+
resources = [
103+
var.kms_key_arn,
104+
]
105+
}
106+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
module "s3bucket_data_export" {
2+
source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.20/terraform-s3bucket.zip"
3+
4+
name = "data-export"
5+
6+
aws_account_id = var.aws_account_id
7+
region = var.region
8+
project = var.project
9+
environment = var.environment
10+
component = var.component
11+
12+
kms_key_arn = var.kms_key_arn
13+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
module "sqs_data_export" {
2+
source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.20/terraform-sqs.zip"
3+
4+
aws_account_id = var.aws_account_id
5+
component = var.component
6+
environment = var.environment
7+
project = var.project
8+
region = var.region
9+
name = "data-export"
10+
11+
sqs_kms_key_arn = var.kms_key_arn
12+
13+
visibility_timeout_seconds = 60
14+
15+
create_dlq = true
16+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Data Export CSV Writer Lambda
2+
3+
Consumes messages from the `data-export` SQS queue and writes the `data` object contained in each message body to a CSV file in the `data-export` S3 bucket.
4+
5+
## Message Format
6+
7+
Each SQS message is expected to have a JSON body with a top-level `data` object, for example:
8+
9+
```json
10+
{
11+
"eventType": "example.created",
12+
"data": {"id": 123, "name": "Example"},
13+
"meta": {"correlationId": "..."}
14+
}
15+
``
16+
17+
Only the `data` object is extracted; other fields are ignored.
18+
19+
## CSV Output
20+
21+
For a batch of messages the lambda produces one CSV file whose headers are the union of all keys across the collected `data` objects (first-seen order). A file name like:
22+
23+
```
24+
exports/2025-11-06T12-00-00-000Z-abcdef12.csv
25+
```
26+
27+
is written to the S3 bucket defined by the `BUCKET_NAME` environment variable (provisioned as `data-export`).
28+
29+
## Environment Variables
30+
31+
| Name | Description |
32+
|-------------|-------------------------------------------|
33+
| BUCKET_NAME | Target S3 bucket for CSV files |
34+
| KEY_PREFIX | Optional key prefix (defaults to exports/) |
35+
36+
## Building
37+
38+
From the repository root after installing workspaces:
39+
40+
```bash
41+
npm install
42+
npm run lambda-build --workspace lambdas/data-export-csv-writer
43+
```
44+
45+
## Testing
46+
47+
```bash
48+
npm run test:unit --workspace lambdas/data-export-csv-writer
49+
```
50+
51+
## Deployment
52+
53+
Terraform defines:
54+
* SQS queue & DLQ: `module.sqs_data_export`
55+
* S3 bucket: `module.s3bucket_data_export`
56+
* Lambda + Event Source Mapping: `module.lambda_data_export_csv_writer` + `aws_lambda_event_source_mapping.lambda_data_export_csv_writer`
57+
58+
Granting S3 write & SQS consume permissions is handled in the IAM policy document in `module_lambda_data_export_csv_writer.tf`.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
rm -rf dist
6+
7+
npx esbuild \
8+
--bundle \
9+
--minify \
10+
--sourcemap \
11+
--target=es2020 \
12+
--platform=node \
13+
--loader:.node=file \
14+
--entry-names=[name] \
15+
--outdir=dist \
16+
src/data-export-csv-writer.ts
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"name": "nhs-notify-data-export-csv-writer",
3+
"private": true,
4+
"version": "0.0.1",
5+
"dependencies": {
6+
"@aws-sdk/client-s3": "3.911.0"
7+
},
8+
"devDependencies": {
9+
"@swc/core": "^1.11.13",
10+
"@swc/jest": "^0.2.37",
11+
"@tsconfig/node20": "^20.1.5",
12+
"@types/aws-lambda": "^8.10.148",
13+
"@types/jest": "^29.5.14",
14+
"esbuild": "^0.25.9",
15+
"jest": "^29.7.0",
16+
"typescript": "^5.8.2"
17+
},
18+
"scripts": {
19+
"lambda-build": "./build.sh",
20+
"test:unit": "jest",
21+
"typecheck": "tsc --noEmit"
22+
}
23+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { objectsToCsv } from '../data-export-csv-writer';
2+
3+
describe('objectsToCsv', () => {
4+
it('creates a csv with union headers and escaped values', () => {
5+
const csv = objectsToCsv([
6+
{ a: 1, b: 'hello' },
7+
{ b: 'he,llo', c: '"quoted"' }
8+
]);
9+
10+
expect(csv.split('\n')[0]).toBe('a,b,c');
11+
expect(csv).toContain('1,hello,');
12+
// he,llo must be quoted
13+
expect(csv).toMatch(/"he,llo"/);
14+
// quotes escaped
15+
expect(csv).toMatch(/""quoted""/);
16+
});
17+
});
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { SQSHandler } from 'aws-lambda';
2+
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3';
3+
4+
const s3 = new S3Client({});
5+
6+
// Convert array of objects to CSV string.
7+
export const objectsToCsv = (rows: Record<string, unknown>[]): string => {
8+
if (rows.length === 0) return '';
9+
10+
// Collect union of keys preserving first-seen order.
11+
const headerSet: string[] = [];
12+
for (const row of rows) {
13+
for (const key of Object.keys(row)) {
14+
if (!headerSet.includes(key)) headerSet.push(key);
15+
}
16+
}
17+
18+
const escape = (val: unknown): string => {
19+
if (val === null || val === undefined) return '';
20+
const str = typeof val === 'string' ? val : JSON.stringify(val);
21+
const needsQuotes = /[",\n]/.test(str);
22+
const escaped = str.replace(/"/g, '""');
23+
return needsQuotes ? `"${escaped}"` : escaped;
24+
};
25+
26+
const lines = [headerSet.join(',')];
27+
for (const row of rows) {
28+
lines.push(headerSet.map((h) => escape((row as any)[h])).join(','));
29+
}
30+
return lines.join('\n');
31+
};
32+
33+
export const handler: SQSHandler = async (event) => {
34+
const bucket = process.env.BUCKET_NAME;
35+
if (!bucket) {
36+
throw new Error('BUCKET_NAME env var not set');
37+
}
38+
const prefix = process.env.KEY_PREFIX ?? '';
39+
40+
const dataRows: Record<string, unknown>[] = [];
41+
42+
for (const record of event.Records) {
43+
try {
44+
const payload: any = JSON.parse(record.body);
45+
if (payload && typeof payload === 'object' && payload.data && typeof payload.data === 'object') {
46+
dataRows.push(payload.data as Record<string, unknown>);
47+
}
48+
} catch (err) {
49+
console.warn('Skipping record - invalid JSON', err);
50+
}
51+
}
52+
53+
if (dataRows.length === 0) return;
54+
55+
const csv = objectsToCsv(dataRows);
56+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
57+
const random = Math.random().toString(36).substring(2, 10);
58+
const key = `${prefix}${timestamp}-${random}.csv`;
59+
60+
await s3.send(
61+
new PutObjectCommand({
62+
Bucket: bucket,
63+
Key: key,
64+
Body: csv,
65+
ContentType: 'text/csv'
66+
})
67+
);
68+
};
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"compilerOptions": {
3+
"baseUrl": ".",
4+
"resolveJsonModule": true
5+
},
6+
"extends": "@tsconfig/node20/tsconfig.json",
7+
"include": [
8+
"src/**/*"
9+
]
10+
}

0 commit comments

Comments
 (0)