Skip to content

Commit 7c7b8a3

Browse files
authored
VED-744 Batch Audit Table TTL (#790)
* init: ExpiresAt added to audit table * review fixes * review fixes II
1 parent 3566001 commit 7c7b8a3

File tree

11 files changed

+50
-21
lines changed

11 files changed

+50
-21
lines changed

filenameprocessor/src/audit_table.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ def upsert_audit_table(
88
message_id: str,
99
file_key: str,
1010
created_at_formatted_str: str,
11+
expiry_timestamp: int,
1112
queue_name: str,
1213
file_status: str
1314
) -> None:
@@ -24,6 +25,7 @@ def upsert_audit_table(
2425
AuditTableKeys.QUEUE_NAME: {"S": queue_name},
2526
AuditTableKeys.STATUS: {"S": file_status},
2627
AuditTableKeys.TIMESTAMP: {"S": created_at_formatted_str},
28+
AuditTableKeys.EXPIRES_AT: {"N": str(expiry_timestamp)},
2729
},
2830
ConditionExpression="attribute_not_exists(message_id)", # Prevents accidental overwrites
2931
)

filenameprocessor/src/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
SOURCE_BUCKET_NAME = os.getenv("SOURCE_BUCKET_NAME")
1616
AUDIT_TABLE_NAME = os.getenv("AUDIT_TABLE_NAME")
17+
AUDIT_TABLE_TTL_DAYS = os.getenv("AUDIT_TABLE_TTL_DAYS")
1718
VALID_VERSIONS = ["V5"]
1819

1920
SUPPLIER_PERMISSIONS_HASH_KEY = "supplier_permissions"
@@ -48,3 +49,4 @@ class AuditTableKeys(StrEnum):
4849
QUEUE_NAME = "queue_name"
4950
STATUS = "status"
5051
TIMESTAMP = "timestamp"
52+
EXPIRES_AT = "expires_at"

filenameprocessor/src/file_name_processor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import argparse
1010
from uuid import uuid4
11-
from utils_for_filenameprocessor import get_created_at_formatted_string, move_file
11+
from utils_for_filenameprocessor import get_creation_and_expiry_times, move_file
1212
from file_key_validation import validate_file_key, is_file_in_directory_root
1313
from send_sqs_message import make_and_send_sqs_message
1414
from make_and_upload_ack_file import make_and_upload_the_ack_file
@@ -63,14 +63,14 @@ def handle_record(record) -> dict:
6363

6464
try:
6565
message_id = str(uuid4())
66-
created_at_formatted_string = get_created_at_formatted_string(bucket_name, file_key)
66+
created_at_formatted_string, expiry_timestamp = get_creation_and_expiry_times(bucket_name, file_key)
6767

6868
vaccine_type, supplier = validate_file_key(file_key)
6969
permissions = validate_vaccine_type_permissions(vaccine_type=vaccine_type, supplier=supplier)
7070

7171
queue_name = f"{supplier}_{vaccine_type}"
7272
upsert_audit_table(
73-
message_id, file_key, created_at_formatted_string, queue_name, FileStatus.QUEUED
73+
message_id, file_key, created_at_formatted_string, expiry_timestamp, queue_name, FileStatus.QUEUED
7474
)
7575
make_and_send_sqs_message(
7676
file_key, message_id, permissions, vaccine_type, supplier, created_at_formatted_string
@@ -100,7 +100,7 @@ def handle_record(record) -> dict:
100100

101101
queue_name = f"{supplier}_{vaccine_type}"
102102
upsert_audit_table(
103-
message_id, file_key, created_at_formatted_string, queue_name, FileStatus.PROCESSED
103+
message_id, file_key, created_at_formatted_string, expiry_timestamp, queue_name, FileStatus.PROCESSED
104104
)
105105

106106
# Create ack file

filenameprocessor/src/utils_for_filenameprocessor.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
"""Utils for filenameprocessor lambda"""
2+
from datetime import timedelta
23
from clients import s3_client, logger
4+
from constants import AUDIT_TABLE_TTL_DAYS
35

46

5-
def get_created_at_formatted_string(bucket_name: str, file_key: str) -> str:
6-
"""Get the created_at_formatted_string from the response"""
7+
def get_creation_and_expiry_times(bucket_name: str, file_key: str) -> (str, int):
8+
"""Get 'created_at_formatted_string' and 'expires_at' from the response"""
79
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
8-
return response["LastModified"].strftime("%Y%m%dT%H%M%S00")
10+
creation_datetime = response["LastModified"]
11+
expiry_datetime = creation_datetime + timedelta(days=int(AUDIT_TABLE_TTL_DAYS))
12+
expiry_timestamp = int(expiry_datetime.timestamp())
13+
return creation_datetime.strftime("%Y%m%dT%H%M%S00"), expiry_timestamp
914

1015

1116
def move_file(bucket_name: str, source_file_key: str, destination_file_key: str) -> None:

filenameprocessor/tests/test_audit_table.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ def test_upsert_audit_table(self):
5454
file_key=ravs_rsv_test_file.file_key,
5555
created_at_formatted_str=ravs_rsv_test_file.created_at_formatted_string,
5656
queue_name=ravs_rsv_test_file.queue_name,
57-
file_status=FileStatus.PROCESSED
57+
file_status=FileStatus.PROCESSED,
58+
expiry_timestamp=ravs_rsv_test_file.expires_at
5859
)
5960

6061
assert_audit_table_entry(ravs_rsv_test_file, FileStatus.PROCESSED)
@@ -68,7 +69,8 @@ def test_upsert_audit_table_with_duplicate_message_id_raises_exception(self):
6869
file_key=ravs_rsv_test_file.file_key,
6970
created_at_formatted_str=ravs_rsv_test_file.created_at_formatted_string,
7071
queue_name=ravs_rsv_test_file.queue_name,
71-
file_status=FileStatus.PROCESSED
72+
file_status=FileStatus.PROCESSED,
73+
expiry_timestamp=ravs_rsv_test_file.expires_at
7274
)
7375

7476
assert_audit_table_entry(ravs_rsv_test_file, FileStatus.PROCESSED)
@@ -80,4 +82,5 @@ def test_upsert_audit_table_with_duplicate_message_id_raises_exception(self):
8082
created_at_formatted_str=ravs_rsv_test_file.created_at_formatted_string,
8183
queue_name=ravs_rsv_test_file.queue_name,
8284
file_status=FileStatus.PROCESSED,
85+
expiry_timestamp=ravs_rsv_test_file.expires_at
8386
)

filenameprocessor/tests/test_lambda_handler.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
MOCK_ODS_CODE_TO_SUPPLIER
1919
)
2020
from tests.utils_for_tests.mock_environment_variables import MOCK_ENVIRONMENT_DICT, BucketNames, Sqs
21-
from tests.utils_for_tests.values_for_tests import MOCK_CREATED_AT_FORMATTED_STRING, MockFileDetails
21+
from tests.utils_for_tests.values_for_tests import MOCK_CREATED_AT_FORMATTED_STRING, MOCK_EXPIRES_AT, MockFileDetails
2222

2323
# Ensure environment variables are mocked before importing from src files
2424
with patch.dict("os.environ", MOCK_ENVIRONMENT_DICT):
@@ -62,10 +62,11 @@ def run(self, result=None):
6262

6363
# Set up common patches to be applied to all tests in the class (these can be overridden in individual tests.)
6464
common_patches = [
65-
# Patch get_created_at_formatted_string, so that the ack file key can be deduced (it is already unittested
65+
# Patch get_creation_and_expiry_times, so that the ack file key can be deduced (it is already unittested
6666
# separately). Note that files numbered '1', which are predominantly used in these tests, use the
6767
# MOCK_CREATED_AT_FORMATTED_STRING.
68-
patch("file_name_processor.get_created_at_formatted_string", return_value=MOCK_CREATED_AT_FORMATTED_STRING),
68+
patch("file_name_processor.get_creation_and_expiry_times",
69+
return_value=(MOCK_CREATED_AT_FORMATTED_STRING, MOCK_EXPIRES_AT)),
6970
# Patch redis_client to use a fake redis client.
7071
patch("elasticache.redis_client", new=fakeredis.FakeStrictRedis()),
7172
# Patch the permissions config to allow all suppliers full permissions for all vaccine types.
@@ -242,6 +243,7 @@ def test_lambda_invalid_file_key_no_other_files_in_queue(self):
242243
"queue_name": {"S": "unknown_unknown"},
243244
"status": {"S": "Processed"},
244245
"timestamp": {"S": file_details.created_at_formatted_string},
246+
"expires_at": {"N": str(file_details.expires_at)},
245247
}
246248
]
247249
self.assertEqual(self.get_audit_table_items(), expected_table_items)

filenameprocessor/tests/test_utils_for_filenameprocessor.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from unittest import TestCase
44
from unittest.mock import patch
5-
from datetime import datetime, timezone
5+
from datetime import datetime, timedelta, timezone
66
from moto import mock_s3
77
from boto3 import client as boto3_client
88

@@ -11,9 +11,10 @@
1111

1212
# Ensure environment variables are mocked before importing from src files
1313
with patch.dict("os.environ", MOCK_ENVIRONMENT_DICT):
14+
from constants import AUDIT_TABLE_TTL_DAYS
1415
from clients import REGION_NAME
1516
from utils_for_filenameprocessor import (
16-
get_created_at_formatted_string,
17+
get_creation_and_expiry_times,
1718
move_file
1819
)
1920

@@ -32,20 +33,24 @@ def tearDown(self):
3233
"""Tear down the s3 buckets"""
3334
GenericTearDown(s3_client)
3435

35-
def test_get_created_at_formatted_string(self):
36-
"""Test that get_created_at_formatted_string can correctly get the created_at_formatted_string"""
36+
def test_get_creation_and_expiry_times(self):
37+
"""Test that get_creation_and_expiry_times can correctly get the created_at_formatted_string"""
3738
bucket_name = BucketNames.SOURCE
3839
file_key = "test_file_key"
3940

4041
s3_client.put_object(Bucket=bucket_name, Key=file_key)
4142

42-
mock_last_modified = {"LastModified": datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)}
43-
expected_result = "20240101T12000000"
43+
mock_last_modified_created_at = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
44+
mock_last_modified = {"LastModified": mock_last_modified_created_at}
45+
expected_result_created_at = "20240101T12000000"
46+
expected_expiry_datetime = mock_last_modified_created_at + timedelta(days=int(AUDIT_TABLE_TTL_DAYS))
47+
expected_result_expires_at = int(expected_expiry_datetime.timestamp())
4448

4549
with patch("utils_for_filenameprocessor.s3_client.get_object", return_value=mock_last_modified):
46-
created_at_formatted_string = get_created_at_formatted_string(bucket_name, file_key)
50+
created_at_formatted_string, expires_at = get_creation_and_expiry_times(bucket_name, file_key)
4751

48-
self.assertEqual(created_at_formatted_string, expected_result)
52+
self.assertEqual(created_at_formatted_string, expected_result_created_at)
53+
self.assertEqual(expires_at, expected_result_expires_at)
4954

5055
def test_move_file(self):
5156
"""Tests that move_file correctly moves a file from one location to another within a single S3 bucket"""

filenameprocessor/tests/utils_for_tests/mock_environment_variables.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,6 @@ class Sqs:
4040
"REDIS_HOST": "localhost",
4141
"REDIS_PORT": "6379",
4242
"SPLUNK_FIREHOSE_NAME": Firehose.STREAM_NAME,
43-
"AUDIT_TABLE_NAME": "immunisation-batch-internal-dev-audit-table"
43+
"AUDIT_TABLE_NAME": "immunisation-batch-internal-dev-audit-table",
44+
"AUDIT_TABLE_TTL_DAYS": "14",
4445
}

filenameprocessor/tests/utils_for_tests/values_for_tests.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# NOTE That that file details for files numbered anything other than one will have a different
1717
# created_at_formatted_string (the final digit of the year will be the file number, rather than '1')
1818
MOCK_CREATED_AT_FORMATTED_STRING = "20010101T00000000"
19+
MOCK_EXPIRES_AT = 947808000
1920

2021

2122
class FileDetails:
@@ -33,6 +34,7 @@ def __init__(self, supplier: str, vaccine_type: str, ods_code: str, file_number:
3334
self.queue_name = f"{self.supplier}_{self.vaccine_type}"
3435

3536
self.created_at_formatted_string = f"200{file_number}0101T00000000"
37+
self.expires_at = MOCK_EXPIRES_AT
3638
self.message_id = f"{self.supplier}_{self.vaccine_type}_test_id_{file_number}"
3739
self.name = f"{self.vaccine_type}/ {self.supplier} file"
3840

@@ -61,6 +63,7 @@ def __init__(self, supplier: str, vaccine_type: str, ods_code: str, file_number:
6163
AuditTableKeys.FILENAME: {"S": self.file_key},
6264
AuditTableKeys.QUEUE_NAME: {"S": self.queue_name},
6365
AuditTableKeys.TIMESTAMP: {"S": self.created_at_formatted_string},
66+
AuditTableKeys.EXPIRES_AT: {"N": str(self.expires_at)},
6467
}
6568

6669

terraform/dynamodb.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ resource "aws_dynamodb_table" "audit-table" {
2424
type = "S"
2525
}
2626

27+
ttl {
28+
attribute_name = "expires_at"
29+
enabled = true
30+
}
31+
2732
global_secondary_index {
2833
name = "filename_index"
2934
hash_key = "filename"

0 commit comments

Comments
 (0)