Skip to content

Commit 111e0ae

Browse files
JamesW1-NHSAkol125
andauthored
VED-902: Extended Attributes (#1011)
* Refactor extended attributes * check extended attributes and move file * refactor extended attribute journeys * rolling back test * add test for file validation * refactor filenameprocessor file path into two * add test for move outside bucket * add upserts for EA files * ruff * fix upsert * test for no condition_expression * ACCOUNT_ID in tf * pathname * pathname II * cherry-pick #1013 * resolve e2e batch * unit tests * fix to e2e_batch * cleanup trace * change buckeowner and destination bucket name * duplicate batch fix * duplicate batch test fix * duplicate batch fix * change bucket name to dps destination * revert error message * bugfix - delete_file() * bugfix - delete_file() II * comment * resolve comments * fix shared utils * remove prefix * remove file validation patching * remove file validation patching * ruff * EXTENDED_ATTRIBUTES_VACC_TYPE * EXPECTED_BUCKET_OWNER_ACCOUNT * formatting * test_unexpected_bucket_name_with_extended_attributes_file * test_lambda_handler_extended_attributes_failure * remove copy check, some comments * remove copy check, some comments II * improved test_unexpected_bucket_name_and_filename_validation_fails * expiry_timestamp: int * ruff * test_lambda_handler_extended_attributes_failure --------- Co-authored-by: Akol125 <[email protected]>
1 parent 6260184 commit 111e0ae

File tree

10 files changed

+302
-53
lines changed

10 files changed

+302
-53
lines changed

lambdas/filenameprocessor/src/audit_table.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def upsert_audit_table(
1515
queue_name: str,
1616
file_status: str,
1717
error_details: Optional[str] = None,
18+
condition_expression: Optional[str] = None,
1819
) -> None:
1920
"""
2021
Updates the audit table with the file details
@@ -33,11 +34,17 @@ def upsert_audit_table(
3334

3435
try:
3536
# Add to the audit table (regardless of whether it is a duplicate)
36-
dynamodb_client.put_item(
37-
TableName=AUDIT_TABLE_NAME,
38-
Item=audit_item,
39-
ConditionExpression="attribute_not_exists(message_id)", # Prevents accidental overwrites
40-
)
37+
if not condition_expression:
38+
dynamodb_client.put_item(
39+
TableName=AUDIT_TABLE_NAME,
40+
Item=audit_item,
41+
)
42+
else:
43+
dynamodb_client.put_item(
44+
TableName=AUDIT_TABLE_NAME,
45+
Item=audit_item,
46+
ConditionExpression=condition_expression,
47+
)
4148
logger.info(
4249
"%s file, with message id %s, successfully added to audit table",
4350
file_key,

lambdas/filenameprocessor/src/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
SOURCE_BUCKET_NAME = os.getenv("SOURCE_BUCKET_NAME")
1414

15-
# We have used an internal temporary bucket here and an acutal dps bucket will replace this
15+
# We have used an internal temporary bucket here and an actual dps bucket will replace this
1616
DPS_DESTINATION_BUCKET_NAME = os.getenv("ACK_BUCKET_NAME")
1717
EXPECTED_BUCKET_OWNER_ACCOUNT = os.getenv("ACCOUNT_ID")
1818
AUDIT_TABLE_NAME = os.getenv("AUDIT_TABLE_NAME")
@@ -22,6 +22,7 @@
2222
VACCINE_TYPE_TO_DISEASES_HASH_KEY = "vacc_to_diseases"
2323
ODS_CODE_TO_SUPPLIER_SYSTEM_HASH_KEY = "ods_code_to_supplier"
2424
EXTENDED_ATTRIBUTES_FILE_PREFIX = "Vaccination_Extended_Attributes"
25+
EXTENDED_ATTRIBUTES_VACC_TYPE = "COVID"
2526
DPS_DESTINATION_PREFIX = "dps_destination/"
2627

2728
ERROR_TYPE_TO_STATUS_CODE_MAP = {

lambdas/filenameprocessor/src/file_name_processor.py

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,20 @@
1010
from uuid import uuid4
1111

1212
from audit_table import upsert_audit_table
13-
from common.aws_s3_utils import move_file, move_file_to_external_bucket
13+
from common.aws_s3_utils import (
14+
copy_file_to_external_bucket,
15+
delete_file,
16+
move_file,
17+
)
1418
from common.clients import STREAM_NAME, get_s3_client, logger
1519
from common.log_decorator import logging_decorator
1620
from common.models.errors import UnhandledAuditTableError
1721
from constants import (
1822
DPS_DESTINATION_BUCKET_NAME,
19-
DPS_DESTINATION_PREFIX,
2023
ERROR_TYPE_TO_STATUS_CODE_MAP,
24+
EXPECTED_BUCKET_OWNER_ACCOUNT,
2125
EXTENDED_ATTRIBUTES_FILE_PREFIX,
26+
EXTENDED_ATTRIBUTES_VACC_TYPE,
2227
SOURCE_BUCKET_NAME,
2328
FileNotProcessedReason,
2429
FileStatus,
@@ -56,8 +61,6 @@ def handle_record(record) -> dict:
5661
"error": str(error),
5762
}
5863

59-
expiry_timestamp = "unknown"
60-
6164
if bucket_name != SOURCE_BUCKET_NAME:
6265
return handle_unexpected_bucket_name(bucket_name, file_key)
6366

@@ -69,10 +72,6 @@ def handle_record(record) -> dict:
6972
message = "Processing not required. Event was for a file moved to /archive or /processing"
7073
return {"statusCode": 200, "message": message, "file_key": file_key}
7174

72-
# Set default values for file-specific variables
73-
message_id = "Message id was not created"
74-
created_at_formatted_string = "created_at_time not identified"
75-
7675
message_id = str(uuid4())
7776
s3_response = get_s3_client().get_object(Bucket=bucket_name, Key=file_key)
7877
created_at_formatted_string, expiry_timestamp = get_creation_and_expiry_times(s3_response)
@@ -108,7 +107,8 @@ def handle_unexpected_bucket_name(bucket_name: str, file_key: str) -> dict:
108107
config and overarching design"""
109108
try:
110109
if file_key.startswith(EXTENDED_ATTRIBUTES_FILE_PREFIX):
111-
extended_attribute_identifier = validate_extended_attributes_file_key(file_key)
110+
organization_code = validate_extended_attributes_file_key(file_key)
111+
extended_attribute_identifier = f"{organization_code}_{EXTENDED_ATTRIBUTES_VACC_TYPE}"
112112
logger.error(
113113
"Unable to process file %s due to unexpected bucket name %s",
114114
file_key,
@@ -157,7 +157,7 @@ def handle_unexpected_bucket_name(bucket_name: str, file_key: str) -> dict:
157157

158158

159159
def handle_batch_file(
160-
file_key: str, bucket_name: str, message_id: str, created_at_formatted_string: str, expiry_timestamp: str
160+
file_key: str, bucket_name: str, message_id: str, created_at_formatted_string: str, expiry_timestamp: int
161161
) -> dict:
162162
"""
163163
Processes a single record for batch file.
@@ -177,6 +177,7 @@ def handle_batch_file(
177177
expiry_timestamp,
178178
queue_name,
179179
FileStatus.QUEUED,
180+
condition_expression="attribute_not_exists(message_id)", # Prevents accidental overwrites
180181
)
181182
make_and_send_sqs_message(
182183
file_key,
@@ -239,17 +240,17 @@ def handle_batch_file(
239240

240241

241242
def handle_extended_attributes_file(
242-
file_key: str, bucket_name: str, message_id: str, created_at_formatted_string: str, expiry_timestamp: str
243+
file_key: str, bucket_name: str, message_id: str, created_at_formatted_string: str, expiry_timestamp: int
243244
) -> dict:
244245
"""
245246
Processes a single record for extended attributes file.
246247
Returns a dictionary containing information to be included in the logs.
247248
"""
249+
250+
extended_attribute_identifier = None
248251
try:
249-
extended_attribute_identifier = validate_extended_attributes_file_key(file_key)
250-
move_file_to_external_bucket(
251-
bucket_name, file_key, DPS_DESTINATION_BUCKET_NAME, f"{DPS_DESTINATION_PREFIX}{file_key}"
252-
)
252+
organization_code = validate_extended_attributes_file_key(file_key)
253+
extended_attribute_identifier = f"{organization_code}_{EXTENDED_ATTRIBUTES_VACC_TYPE}"
253254

254255
upsert_audit_table(
255256
message_id,
@@ -259,6 +260,28 @@ def handle_extended_attributes_file(
259260
extended_attribute_identifier,
260261
FileStatus.PROCESSING,
261262
)
263+
264+
# TODO: agree the prefix with DPS
265+
dest_file_key = f"dps_destination/{file_key}"
266+
copy_file_to_external_bucket(
267+
bucket_name,
268+
file_key,
269+
DPS_DESTINATION_BUCKET_NAME,
270+
dest_file_key,
271+
EXPECTED_BUCKET_OWNER_ACCOUNT,
272+
EXPECTED_BUCKET_OWNER_ACCOUNT,
273+
)
274+
delete_file(bucket_name, dest_file_key, EXPECTED_BUCKET_OWNER_ACCOUNT)
275+
276+
upsert_audit_table(
277+
message_id,
278+
file_key,
279+
created_at_formatted_string,
280+
expiry_timestamp,
281+
extended_attribute_identifier,
282+
FileStatus.PROCESSED,
283+
)
284+
262285
return {
263286
"statusCode": 200,
264287
"message": "Extended Attributes file successfully processed",
@@ -276,7 +299,13 @@ def handle_extended_attributes_file(
276299
logger.error("Error processing file '%s': %s", file_key, str(error))
277300

278301
file_status = get_file_status_for_error(error)
279-
extended_attribute_identifier = validate_extended_attributes_file_key(file_key)
302+
303+
# NB if we got InvalidFileKeyError we won't have a valid queue name
304+
if not extended_attribute_identifier:
305+
extended_attribute_identifier = "unknown"
306+
307+
# Move file to archive
308+
move_file(bucket_name, file_key, f"archive/{file_key}")
280309

281310
upsert_audit_table(
282311
message_id,

lambdas/filenameprocessor/src/file_validation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ def is_valid_datetime(timestamp: str) -> bool:
4040
def validate_extended_attributes_file_key(file_key: str) -> str:
4141
"""
4242
Checks that all elements of the file key are valid, raises an exception otherwise.
43-
Returns a string containing the organization code and COVID vaccine type needed in the audit table.
43+
Returns a string containing the organization code.
4444
"""
4545
if not match(r"^[^_.]*_[^_.]*_[^_.]*_[^_.]*_[^_.]*_[^_.]*_[^_.]*", file_key):
4646
raise InvalidFileKeyError("Initial file validation failed: invalid extended attributes file key format")
4747

4848
file_key_parts_without_extension, _ = split_file_key(file_key)
4949
organization_code = file_key_parts_without_extension[5]
50-
return f"{organization_code}_COVID"
50+
return organization_code
5151

5252

5353
def validate_batch_file_key(file_key: str) -> tuple[str, str]:

lambdas/filenameprocessor/tests/test_audit_table.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def test_upsert_audit_table_with_duplicate_message_id_raises_exception(self):
7373
queue_name=ravs_rsv_test_file.queue_name,
7474
file_status=FileStatus.PROCESSED,
7575
expiry_timestamp=ravs_rsv_test_file.expires_at,
76+
condition_expression="attribute_not_exists(message_id)",
7677
)
7778

7879
assert_audit_table_entry(ravs_rsv_test_file, FileStatus.PROCESSED)
@@ -85,4 +86,32 @@ def test_upsert_audit_table_with_duplicate_message_id_raises_exception(self):
8586
queue_name=ravs_rsv_test_file.queue_name,
8687
file_status=FileStatus.PROCESSED,
8788
expiry_timestamp=ravs_rsv_test_file.expires_at,
89+
condition_expression="attribute_not_exists(message_id)",
8890
)
91+
92+
def test_upsert_audit_table_with_duplicate_message_id_no_condition(self):
93+
"""Test that attempting to create an entry with a message_id that already exists causes no exception
94+
if the condition_expression is not set"""
95+
ravs_rsv_test_file = FileDetails("RAVS", "RSV", "YGM41", file_number=1)
96+
97+
upsert_audit_table(
98+
message_id=ravs_rsv_test_file.message_id,
99+
file_key=ravs_rsv_test_file.file_key,
100+
created_at_formatted_str=ravs_rsv_test_file.created_at_formatted_string,
101+
queue_name=ravs_rsv_test_file.queue_name,
102+
file_status=FileStatus.PROCESSING,
103+
expiry_timestamp=ravs_rsv_test_file.expires_at,
104+
)
105+
106+
assert_audit_table_entry(ravs_rsv_test_file, FileStatus.PROCESSING)
107+
108+
upsert_audit_table(
109+
message_id=ravs_rsv_test_file.message_id,
110+
file_key=ravs_rsv_test_file.file_key,
111+
created_at_formatted_str=ravs_rsv_test_file.created_at_formatted_string,
112+
queue_name=ravs_rsv_test_file.queue_name,
113+
file_status=FileStatus.PROCESSED,
114+
expiry_timestamp=ravs_rsv_test_file.expires_at,
115+
)
116+
117+
assert_audit_table_entry(ravs_rsv_test_file, FileStatus.PROCESSED)

lambdas/filenameprocessor/tests/test_file_key_validation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,18 @@ def test_split_file_key(self, _):
114114
self.assertEqual(split_file_key(file_key), expected)
115115

116116
def test_validate_extended_attributes_file_key(self, _):
117-
"""Tests that validate_extended_attributes_file_key returns organization code and COVID vaccine type if all
117+
"""Tests that validate_extended_attributes_file_key returns organization code if all
118118
elements pass validation, and raises an exception otherwise"""
119119
test_cases_for_success_scenarios = [
120120
# Valid extended attributes file key
121121
(
122122
"Vaccination_Extended_Attributes_v1_5_X8E5B_20000101T00000001.csv",
123-
"X8E5B_COVID",
123+
"X8E5B",
124124
),
125125
# Valid extended attributes file key with different organization code
126126
(
127127
"Vaccination_Extended_Attributes_v1_5_YGM41_20221231T23595999.csv",
128-
"YGM41_COVID",
128+
"YGM41",
129129
),
130130
]
131131

0 commit comments

Comments
 (0)