Skip to content

Commit 9800ff2

Browse files
authored
Merge pull request #438 from NHSDigital/release/2024-12-05
Release 2024-12-05
2 parents 5de2a4e + 17f55c4 commit 9800ff2

File tree

128 files changed

+10167
-1000
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+10167
-1000
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ repos:
44
rev: v1.4.0
55
hooks:
66
- id: detect-secrets
7-
exclude: ".pre-commit-config.yaml|infrastructure/localstack/provider.tf|src/etl/sds/tests/changelog"
7+
exclude: ".pre-commit-config.yaml|infrastructure/localstack/provider.tf|src/etl/sds/tests/changelog|src/etl/sds/worker/bulk/transform_bulk/tests|src/etl/sds/worker/bulk/tests/stage_data"
88

99
- repo: https://github.com/prettier/pre-commit
1010
rev: 57f39166b5a5a504d6808b87ab98d41ebf095b46

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 2024-12-05
4+
- [PI-631] Generate Product Ids
5+
- [PI-691] Allow devs to clear terminal after each Feature Test
6+
- [PI-618] Bulk ETL
7+
38
## 2024-12-02
49
- [PI-572] Create an AS Device
510
- [PI-582] AS Additional Interations smoke test

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
4. [Workflow](#workflow)
2222
5. [Swagger](#swagger)
2323
6. [ETL](#etl)
24+
7. [Administration](#administration)
2425

2526
---
2627

@@ -429,6 +430,18 @@ and
429430
make etl--clear-state WORKSPACE=dev SET_CHANGELOG_NUMBER=540210
430431
```
431432

433+
## Administration
434+
435+
### Generating Ids
436+
437+
In order to generate a persistent list of Ids across environments then run... (The example given will generate 100 ids.)
438+
439+
```
440+
make admin--generate-ids--product SET_GENERATOR_COUNT=100
441+
```
442+
443+
Any previously generated ids will not be overwritten.
444+
432445
### Documentation
433446

434447
We have several locations for the Swagger to keep things as visible as possible

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2024.12.02
1+
2024.12.05

changelog/2024-12-05.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- [PI-631] Generate Product Ids
2+
- [PI-691] Allow devs to clear terminal after each Feature Test
3+
- [PI-618] Bulk ETL

infrastructure/terraform/per_workspace/modules/etl/sds/etl-diagram--bulk-transform-and-load.asl.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,43 @@
2626
"BackoffRate": 2
2727
}
2828
],
29+
"Next": "transform-result"
30+
},
31+
"transform-result": {
32+
"Type": "Choice",
33+
"Choices": [
34+
{
35+
"Variable": "$.error_message",
36+
"IsNull": true,
37+
"Next": "load-fanout"
38+
}
39+
],
40+
"Default": "pass"
41+
},
42+
"pass": {
43+
"Type": "Pass",
44+
"End": true
45+
},
46+
"load-fanout": {
47+
"Type": "Task",
48+
"Resource": "arn:aws:states:::lambda:invoke",
49+
"OutputPath": "$.Payload",
50+
"Parameters": {
51+
"FunctionName": "${load_fanout_worker_arn}:$LATEST"
52+
},
53+
"Retry": [
54+
{
55+
"ErrorEquals": [
56+
"Lambda.ServiceException",
57+
"Lambda.AWSLambdaException",
58+
"Lambda.SdkClientException",
59+
"Lambda.TooManyRequestsException"
60+
],
61+
"IntervalSeconds": 1,
62+
"MaxAttempts": 3,
63+
"BackoffRate": 2
64+
}
65+
],
2966
"Next": "Map"
3067
},
3168
"Map": {

infrastructure/terraform/per_workspace/modules/etl/sds/etl-diagram.asl.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
"OutputPath": "$.Payload",
7676
"Parameters": {
7777
"Payload": {},
78-
"FunctionName": "${extract_worker_arn}:$LATEST"
78+
"FunctionName": "${extract_worker_bulk_arn}:$LATEST"
7979
},
8080
"Retry": [
8181
{
@@ -151,7 +151,7 @@
151151
"OutputPath": "$.Payload",
152152
"Parameters": {
153153
"Payload": {},
154-
"FunctionName": "${extract_worker_arn}:$LATEST"
154+
"FunctionName": "${extract_worker_update_arn}:$LATEST"
155155
},
156156
"Retry": [
157157
{

infrastructure/terraform/per_workspace/modules/etl/sds/main.tf

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,11 @@ module "sds_layer" {
4646
}
4747
}
4848

49-
module "worker_extract" {
50-
source = "./worker/"
49+
module "worker_extract_bulk" {
50+
source = "./worker"
5151

52-
etl_stage = "extract"
52+
etl_stage = "extract_bulk"
53+
etl_type = "bulk"
5354
etl_name = local.etl_name
5455
assume_account = var.assume_account
5556
workspace_prefix = var.workspace_prefix
@@ -80,10 +81,47 @@ module "worker_extract" {
8081

8182
}
8283

84+
module "worker_extract_update" {
85+
source = "./worker"
86+
87+
etl_stage = "extract_update"
88+
etl_type = "update"
89+
etl_name = local.etl_name
90+
assume_account = var.assume_account
91+
workspace_prefix = var.workspace_prefix
92+
python_version = var.python_version
93+
etl_bucket_name = module.bucket.s3_bucket_id
94+
layers = [var.event_layer_arn, var.third_party_core_layer_arn, module.etl_layer.lambda_layer_arn, module.sds_layer.lambda_layer_arn, var.domain_layer_arn]
95+
96+
policy_json = <<-EOT
97+
{
98+
"Version": "2012-10-17",
99+
"Statement": [
100+
{
101+
"Action": [
102+
"s3:PutObject",
103+
"s3:AbortMultipartUpload",
104+
"s3:GetBucketLocation",
105+
"s3:GetObject",
106+
"s3:ListBucket",
107+
"s3:ListBucketMultipartUploads",
108+
"s3:PutObjectVersionTagging"
109+
],
110+
"Effect": "Allow",
111+
"Resource": ["${module.bucket.s3_bucket_arn}", "${module.bucket.s3_bucket_arn}/*"]
112+
}
113+
]
114+
}
115+
EOT
116+
117+
}
118+
119+
83120
module "worker_transform_bulk" {
84-
source = "./worker/"
121+
source = "./worker"
85122

86123
etl_stage = "transform_bulk"
124+
etl_type = "bulk"
87125
etl_name = local.etl_name
88126
assume_account = var.assume_account
89127
workspace_prefix = var.workspace_prefix
@@ -111,6 +149,13 @@ module "worker_transform_bulk" {
111149
"Effect": "Allow",
112150
"Resource": ["${module.bucket.s3_bucket_arn}", "${module.bucket.s3_bucket_arn}/*"]
113151
},
152+
{
153+
"Action": [
154+
"dynamodb:Query"
155+
],
156+
"Effect": "Allow",
157+
"Resource": ["${var.table_arn}", "${var.table_arn}/*"]
158+
},
114159
{
115160
"Action": [
116161
"kms:Decrypt"
@@ -125,9 +170,10 @@ module "worker_transform_bulk" {
125170
}
126171

127172
module "worker_transform_update" {
128-
source = "./worker/"
173+
source = "./worker"
129174

130175
etl_stage = "transform_update"
176+
etl_type = "update"
131177
etl_name = local.etl_name
132178
assume_account = var.assume_account
133179
workspace_prefix = var.workspace_prefix
@@ -174,12 +220,55 @@ module "worker_transform_update" {
174220
EOT
175221

176222
}
223+
module "worker_load_bulk_fanout" {
224+
source = "./worker"
225+
226+
etl_stage = "load_bulk_fanout"
227+
etl_type = "bulk"
228+
etl_name = local.etl_name
229+
assume_account = var.assume_account
230+
workspace_prefix = var.workspace_prefix
231+
python_version = var.python_version
232+
etl_bucket_name = module.bucket.s3_bucket_id
233+
layers = [var.event_layer_arn, var.third_party_core_layer_arn, module.etl_layer.lambda_layer_arn, module.sds_layer.lambda_layer_arn, var.domain_layer_arn]
234+
235+
policy_json = <<-EOT
236+
{
237+
"Version": "2012-10-17",
238+
"Statement": [
239+
{
240+
"Action": [
241+
"s3:PutObject",
242+
"s3:AbortMultipartUpload",
243+
"s3:GetBucketLocation",
244+
"s3:GetObject",
245+
"s3:ListBucket",
246+
"s3:ListBucketMultipartUploads",
247+
"s3:PutObjectVersionTagging"
248+
],
249+
"Effect": "Allow",
250+
"Resource": ["${module.bucket.s3_bucket_arn}", "${module.bucket.s3_bucket_arn}/*"]
251+
},
252+
{
253+
"Action": [
254+
"kms:Decrypt"
255+
],
256+
"Effect": "Allow",
257+
"Resource": ["*"]
258+
}
259+
]
260+
}
261+
EOT
262+
263+
}
264+
177265

178266

179267
module "worker_load_bulk" {
180-
source = "./worker/"
268+
source = "./worker"
181269

182270
etl_stage = "load_bulk"
271+
etl_type = "bulk"
183272
etl_name = local.etl_name
184273
assume_account = var.assume_account
185274
workspace_prefix = var.workspace_prefix
@@ -227,9 +316,10 @@ module "worker_load_bulk" {
227316
}
228317

229318
module "worker_load_update" {
230-
source = "./worker/"
319+
source = "./worker"
231320

232321
etl_stage = "load_update"
322+
etl_type = "update"
233323
etl_name = local.etl_name
234324
assume_account = var.assume_account
235325
workspace_prefix = var.workspace_prefix
@@ -279,9 +369,10 @@ module "worker_load_update" {
279369
}
280370

281371
module "worker_load_bulk_reduce" {
282-
source = "./worker/"
372+
source = "./worker"
283373

284374
etl_stage = "load_bulk_reduce"
375+
etl_type = "bulk"
285376
etl_name = local.etl_name
286377
assume_account = var.assume_account
287378
workspace_prefix = var.workspace_prefix
@@ -321,6 +412,7 @@ module "bulk_transform_and_load_step_function" {
321412
"${path.module}/etl-diagram--bulk-transform-and-load.asl.json",
322413
{
323414
transform_worker_arn = module.worker_transform_bulk.arn
415+
load_fanout_worker_arn = module.worker_load_bulk_fanout.arn
324416
load_worker_arn = module.worker_load_bulk.arn
325417
load_reduce_worker_arn = module.worker_load_bulk_reduce.arn
326418
bulk_load_chunksize = var.bulk_load_chunksize
@@ -332,9 +424,11 @@ module "bulk_transform_and_load_step_function" {
332424
lambda = {
333425
lambda = [
334426
module.worker_transform_bulk.arn,
427+
module.worker_load_bulk_fanout.arn,
335428
module.worker_load_bulk.arn,
336429
module.worker_load_bulk_reduce.arn,
337430
"${module.worker_transform_bulk.arn}:*",
431+
"${module.worker_load_bulk_fanout.arn}:*",
338432
"${module.worker_load_bulk.arn}:*",
339433
"${module.worker_load_bulk_reduce.arn}:*"
340434

@@ -404,7 +498,8 @@ resource "aws_sfn_state_machine" "state_machine" {
404498
definition = templatefile(
405499
"${path.module}/etl-diagram.asl.json",
406500
{
407-
extract_worker_arn = module.worker_extract.arn
501+
extract_worker_bulk_arn = module.worker_extract_bulk.arn
502+
extract_worker_update_arn = module.worker_extract_update.arn
408503
notify_arn = module.notify.arn
409504
etl_bucket = module.bucket.s3_bucket_id
410505
changelog_key = var.changelog_key

infrastructure/terraform/per_workspace/modules/etl/sds/output.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,7 @@ output "etl_state_lock_enforcer" {
2929
output "manual_trigger_arn" {
3030
value = module.trigger_manual.arn
3131
}
32+
33+
output "bulk_load_lambda_arn" {
34+
value = module.worker_load_bulk.arn
35+
}

infrastructure/terraform/per_workspace/modules/etl/sds/step_function_role.tf

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@ locals {
1010
lambda = {
1111
actions = ["lambda:InvokeFunction"]
1212
resources = [
13-
module.worker_extract.arn,
13+
module.worker_extract_bulk.arn,
14+
module.worker_extract_update.arn,
1415
module.worker_transform_bulk.arn,
1516
module.worker_transform_update.arn,
1617
module.worker_load_bulk.arn,
1718
module.worker_load_update.arn,
1819
module.notify.arn,
19-
"${module.worker_extract.arn}:*",
20+
"${module.worker_extract_bulk.arn}:*",
21+
"${module.worker_extract_update.arn}:*",
2022
"${module.worker_transform_bulk.arn}:*",
2123
"${module.worker_transform_update.arn}:*",
2224
"${module.worker_load_bulk.arn}:*",

0 commit comments

Comments
 (0)