Skip to content

Commit 3229763

Browse files
authored
VED-757 Handle empty batch files (#801)
1 parent ac1a001 commit 3229763

File tree

10 files changed

+106
-32
lines changed

10 files changed

+106
-32
lines changed

filenameprocessor/src/constants.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
InvalidSupplierError,
1010
UnhandledAuditTableError,
1111
DuplicateFileError,
12-
UnhandledSqsError,
12+
UnhandledSqsError, EmptyFileError,
1313
)
1414

1515
SOURCE_BUCKET_NAME = os.getenv("SOURCE_BUCKET_NAME")
@@ -27,13 +27,17 @@
2727
ERROR_TYPE_TO_STATUS_CODE_MAP = {
2828
VaccineTypePermissionsError: 403,
2929
InvalidFileKeyError: 400, # Includes invalid ODS code, therefore unable to identify supplier
30+
EmptyFileError: 400,
3031
InvalidSupplierError: 500, # Only raised if supplier variable is not correctly set
3132
UnhandledAuditTableError: 500,
3233
DuplicateFileError: 422,
3334
UnhandledSqsError: 500,
3435
Exception: 500,
3536
}
3637

38+
# The size in bytes of an empty batch file containing only the headers row
39+
EMPTY_BATCH_FILE_SIZE_IN_BYTES = 614
40+
3741

3842
class FileStatus(StrEnum):
3943
"""File status constants"""
@@ -42,6 +46,7 @@ class FileStatus(StrEnum):
4246
PROCESSING = "Processing"
4347
PROCESSED = "Processed"
4448
DUPLICATE = "Not processed - duplicate"
49+
EMPTY = "Not processed - empty file"
4550

4651

4752
class AuditTableKeys(StrEnum):

filenameprocessor/src/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ class DuplicateFileError(Exception):
55
"""A custom exception for when it is identified that the file is a duplicate."""
66

77

8+
class EmptyFileError(Exception):
9+
"""A custom exception for when the batch file contains only the header row or is completely empty"""
10+
11+
812
class UnhandledAuditTableError(Exception):
913
"""A custom exception for when an unexpected error occurs whilst adding the file to the audit table."""
1014

filenameprocessor/src/file_name_processor.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
import argparse
1010
from uuid import uuid4
1111
from utils_for_filenameprocessor import get_created_at_formatted_string, move_file, invoke_filename_lambda
12-
from file_key_validation import validate_file_key, is_file_in_directory_root
12+
from file_validation import validate_file_key, is_file_in_directory_root, validate_file_not_empty
1313
from send_sqs_message import make_and_send_sqs_message
1414
from make_and_upload_ack_file import make_and_upload_the_ack_file
1515
from audit_table import upsert_audit_table, get_next_queued_file_details, ensure_file_is_not_a_duplicate
16-
from clients import logger
16+
from clients import logger, s3_client
1717
from logging_decorator import logging_decorator
1818
from supplier_permissions import validate_vaccine_type_permissions
1919
from errors import (
@@ -22,7 +22,7 @@
2222
InvalidSupplierError,
2323
UnhandledAuditTableError,
2424
DuplicateFileError,
25-
UnhandledSqsError,
25+
UnhandledSqsError, EmptyFileError,
2626
)
2727
from constants import FileStatus, DATA_SOURCES_BUCKET_SUFFIX, ERROR_TYPE_TO_STATUS_CODE_MAP
2828

@@ -67,11 +67,15 @@ def handle_record(record) -> dict:
6767

6868
# Get message_id if the file is not new, else assign one
6969
message_id = record.get("message_id", str(uuid4()))
70+
s3_response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
7071

71-
created_at_formatted_string = get_created_at_formatted_string(bucket_name, file_key)
72+
created_at_formatted_string = get_created_at_formatted_string(s3_response)
7273

7374
vaccine_type, supplier = validate_file_key(file_key)
75+
# VED-757: Known issue with suppliers sometimes sending empty files
76+
validate_file_not_empty(s3_response)
7477
permissions = validate_vaccine_type_permissions(vaccine_type=vaccine_type, supplier=supplier)
78+
7579
if not is_existing_file:
7680
ensure_file_is_not_a_duplicate(file_key, created_at_formatted_string)
7781

@@ -106,12 +110,18 @@ def handle_record(record) -> dict:
106110
InvalidSupplierError,
107111
UnhandledAuditTableError,
108112
DuplicateFileError,
113+
EmptyFileError,
109114
UnhandledSqsError,
110115
Exception,
111116
) as error:
112117
logger.error("Error processing file '%s': %s", file_key, str(error))
113118

114-
file_status = FileStatus.DUPLICATE if isinstance(error, DuplicateFileError) else FileStatus.PROCESSED
119+
file_status = FileStatus.PROCESSED
120+
if isinstance(error, DuplicateFileError):
121+
file_status = FileStatus.DUPLICATE
122+
elif isinstance(error, EmptyFileError):
123+
file_status = FileStatus.EMPTY
124+
115125
queue_name = f"{supplier}_{vaccine_type}"
116126
upsert_audit_table(
117127
message_id, file_key, created_at_formatted_string, queue_name, file_status, is_existing_file

filenameprocessor/src/file_key_validation.py renamed to filenameprocessor/src/file_validation.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
from re import match
44
from datetime import datetime
5-
from constants import VALID_VERSIONS
5+
from constants import VALID_VERSIONS, EMPTY_BATCH_FILE_SIZE_IN_BYTES
66
from elasticache import get_valid_vaccine_types_from_cache, get_supplier_system_from_cache
7-
from errors import InvalidFileKeyError
7+
from errors import InvalidFileKeyError, EmptyFileError
88

99

1010
def is_file_in_directory_root(file_key: str) -> bool:
@@ -68,3 +68,9 @@ def validate_file_key(file_key: str) -> tuple[str, str]:
6868
raise InvalidFileKeyError("Initial file validation failed: invalid file key")
6969

7070
return vaccine_type, supplier
71+
72+
73+
def validate_file_not_empty(s3_response: dict) -> None:
74+
"""Checks that the batch file from S3 is not empty or containing only the header row"""
75+
if s3_response.get("ContentLength", 0) <= EMPTY_BATCH_FILE_SIZE_IN_BYTES:
76+
raise EmptyFileError("Initial file validation failed: batch file was empty")

filenameprocessor/src/utils_for_filenameprocessor.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
from clients import s3_client, logger, lambda_client
66

77

8-
def get_created_at_formatted_string(bucket_name: str, file_key: str) -> str:
8+
def get_created_at_formatted_string(s3_response: dict) -> str:
99
"""Get the created_at_formatted_string from the response"""
10-
response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
11-
return response["LastModified"].strftime("%Y%m%dT%H%M%S00")
10+
return s3_response["LastModified"].strftime("%Y%m%dT%H%M%S00")
1211

1312

1413
def move_file(bucket_name: str, source_file_key: str, destination_file_key: str) -> None:

filenameprocessor/tests/test_file_key_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# Ensure environment variables are mocked before importing from src files
1111
with patch.dict("os.environ", MOCK_ENVIRONMENT_DICT):
12-
from file_key_validation import is_file_in_directory_root, is_valid_datetime, validate_file_key
12+
from file_validation import is_file_in_directory_root, is_valid_datetime, validate_file_key
1313
from errors import InvalidFileKeyError
1414

1515
VALID_FLU_EMIS_FILE_KEY = MockFileDetails.emis_flu.file_key

filenameprocessor/tests/test_lambda_handler.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
MOCK_ODS_CODE_TO_SUPPLIER
1919
)
2020
from tests.utils_for_tests.mock_environment_variables import MOCK_ENVIRONMENT_DICT, BucketNames, Sqs
21-
from tests.utils_for_tests.values_for_tests import MOCK_CREATED_AT_FORMATTED_STRING, MockFileDetails
21+
from tests.utils_for_tests.values_for_tests import MOCK_CREATED_AT_FORMATTED_STRING, MockFileDetails, \
22+
MOCK_BATCH_FILE_CONTENT, MOCK_FILE_HEADERS
2223

2324
# Ensure environment variables are mocked before importing from src files
2425
with patch.dict("os.environ", MOCK_ENVIRONMENT_DICT):
@@ -92,7 +93,7 @@ def make_record(file_key: str):
9293
@staticmethod
9394
def make_record_with_message_id(file_key: str, message_id: str):
9495
"""
95-
Makes a record which includes a message_id, with the s3 bucket name set to BucketNames.SOURCE and and
96+
Makes a record which includes a message_id, with the s3 bucket name set to BucketNames.SOURCE and
9697
s3 object key set to the file_key.
9798
"""
9899
return {"s3": {"bucket": {"name": BucketNames.SOURCE}, "object": {"key": file_key}}, "message_id": message_id}
@@ -180,7 +181,7 @@ def test_lambda_handler_new_file_success_and_first_in_queue(self):
180181
for file_details in test_cases:
181182
with self.subTest(file_details.name):
182183
# Setup the file in the source bucket
183-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key)
184+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_BATCH_FILE_CONTENT)
184185

185186
with ( # noqa: E999
186187
patch("file_name_processor.uuid4", return_value=file_details.message_id), # noqa: E999
@@ -208,7 +209,7 @@ def test_lambda_handler_new_file_success_and_other_files_in_queue(self):
208209
file_details = MockFileDetails.ravs_rsv_1
209210
file_already_processing_details = MockFileDetails.ravs_rsv_2
210211

211-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key)
212+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_BATCH_FILE_CONTENT)
212213

213214
add_entry_to_table(file_already_processing_details, FileStatus.PROCESSING)
214215

@@ -234,6 +235,8 @@ def test_lambda_handler_existing_file(self):
234235
file_details = MockFileDetails.ravs_rsv_1
235236
add_entry_to_table(file_details, FileStatus.QUEUED)
236237

238+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_BATCH_FILE_CONTENT)
239+
237240
with ( # noqa: E999
238241
patch("file_name_processor.uuid4", return_value=file_details.message_id), # noqa: E999
239242
patch("file_name_processor.invoke_filename_lambda") as mock_invoke_filename_lambda, # noqa: E999
@@ -248,6 +251,32 @@ def test_lambda_handler_existing_file(self):
248251
self.assert_no_ack_file(file_details)
249252
mock_invoke_filename_lambda.assert_not_called()
250253

254+
def test_lambda_handler_correctly_flags_empty_file(self):
255+
"""
256+
VED-757 Tests that for an empty batch file:
257+
* The file status is updated to 'Not processed - empty file' in the audit table
258+
* The message is not sent to SQS
259+
* The failure inf_ack file is created
260+
"""
261+
file_details = MockFileDetails.ravs_rsv_1
262+
add_entry_to_table(file_details, FileStatus.QUEUED)
263+
264+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_FILE_HEADERS)
265+
266+
with ( # noqa: E999
267+
patch("file_name_processor.uuid4", return_value=file_details.message_id), # noqa: E999
268+
patch("file_name_processor.invoke_filename_lambda") as mock_invoke_filename_lambda, # noqa: E999
269+
): # noqa: E999
270+
lambda_handler(
271+
self.make_event([self.make_record_with_message_id(file_details.file_key, file_details.message_id)]),
272+
None,
273+
)
274+
275+
assert_audit_table_entry(file_details, FileStatus.EMPTY)
276+
self.assert_no_sqs_message()
277+
self.assert_ack_file_contents(file_details)
278+
mock_invoke_filename_lambda.assert_not_called()
279+
251280
def test_lambda_handler_non_root_file(self):
252281
"""
253282
Tests that when the file is not in the root of the source bucket, no action is taken:
@@ -280,7 +309,7 @@ def test_lambda_handler_duplicate_file_other_files_in_queue(self):
280309
* The invoke_filename_lambda method is not called
281310
"""
282311
file_details = MockFileDetails.ravs_rsv_1
283-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key)
312+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_BATCH_FILE_CONTENT)
284313

285314
duplicate_already_in_table = deepcopy(file_details)
286315
duplicate_already_in_table.message_id = "duplicate_id"
@@ -312,7 +341,7 @@ def test_lambda_invalid_file_key_no_other_files_in_queue(self):
312341
* The invoke_filename_lambda method is not called
313342
"""
314343
invalid_file_key = "InvalidVaccineType_Vaccinations_v5_YGM41_20240708T12130100.csv"
315-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=invalid_file_key)
344+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=invalid_file_key, Body=MOCK_BATCH_FILE_CONTENT)
316345
file_details = deepcopy(MockFileDetails.ravs_rsv_1)
317346
file_details.file_key = invalid_file_key
318347
file_details.ack_file_key = self.get_ack_file_key(invalid_file_key)
@@ -351,7 +380,7 @@ def test_lambda_invalid_permissions_other_files_in_queue(self):
351380
* The invoke_filename_lambda method is called with queued file details
352381
"""
353382
file_details = MockFileDetails.ravs_rsv_1
354-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key)
383+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=file_details.file_key, Body=MOCK_BATCH_FILE_CONTENT)
355384

356385
queued_file_details = MockFileDetails.ravs_rsv_2
357386
add_entry_to_table(queued_file_details, FileStatus.QUEUED)
@@ -405,9 +434,10 @@ def test_lambda_handler_multiple_records_for_same_queue(self):
405434
invalid_file_details_3.audit_table_entry[AuditTableKeys.QUEUE_NAME] = {"S": invalid_file_details_3.queue_name}
406435
invalid_file_details_3.sqs_message_body["filename"] = invalid_file_details_3.file_key
407436

408-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=valid_file_details_1.file_key)
409-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=valid_file_details_2.file_key)
410-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=invalid_file_details_3.file_key)
437+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=valid_file_details_1.file_key, Body=MOCK_BATCH_FILE_CONTENT)
438+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=valid_file_details_2.file_key, Body=MOCK_BATCH_FILE_CONTENT)
439+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=invalid_file_details_3.file_key,
440+
Body=MOCK_BATCH_FILE_CONTENT)
411441

412442
message_ids = [
413443
valid_file_details_1.message_id,

filenameprocessor/tests/test_logging_decorator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from tests.utils_for_tests.generic_setup_and_teardown import GenericSetUp, GenericTearDown
1212
from tests.utils_for_tests.mock_environment_variables import MOCK_ENVIRONMENT_DICT, BucketNames, Firehose
13-
from tests.utils_for_tests.values_for_tests import MockFileDetails, fixed_datetime
13+
from tests.utils_for_tests.values_for_tests import MockFileDetails, fixed_datetime, MOCK_BATCH_FILE_CONTENT
1414
from tests.utils_for_tests.utils_for_filenameprocessor_tests import create_mock_hget
1515

1616
# Ensure environment variables are mocked before importing from src files
@@ -41,7 +41,7 @@ class TestLoggingDecorator(unittest.TestCase):
4141
def setUp(self):
4242
"""Set up the mock AWS environment and upload a valid FLU/EMIS file example"""
4343
GenericSetUp(s3_client, firehose_client, sqs_client, dynamodb_client)
44-
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=FILE_DETAILS.file_key)
44+
s3_client.put_object(Bucket=BucketNames.SOURCE, Key=FILE_DETAILS.file_key, Body=MOCK_BATCH_FILE_CONTENT)
4545

4646
def tearDown(self):
4747
"""Clean the mock AWS environment"""

filenameprocessor/tests/test_utils_for_filenameprocessor.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,10 @@ def tearDown(self):
3737

3838
def test_get_created_at_formatted_string(self):
3939
"""Test that get_created_at_formatted_string can correctly get the created_at_formatted_string"""
40-
bucket_name = BucketNames.SOURCE
41-
file_key = "test_file_key"
42-
43-
s3_client.put_object(Bucket=bucket_name, Key=file_key)
44-
45-
mock_last_modified = {"LastModified": datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)}
40+
mock_last_modified_s3_response = {"LastModified": datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc)}
4641
expected_result = "20240101T12000000"
4742

48-
with patch("utils_for_filenameprocessor.s3_client.get_object", return_value=mock_last_modified):
49-
created_at_formatted_string = get_created_at_formatted_string(bucket_name, file_key)
43+
created_at_formatted_string = get_created_at_formatted_string(mock_last_modified_s3_response)
5044

5145
self.assertEqual(created_at_formatted_string, expected_result)
5246

filenameprocessor/tests/utils_for_tests/values_for_tests.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,29 @@ class MockFileDetails:
7575
emis_flu = FileDetails("EMIS", "FLU", "YGM41")
7676
emis_rsv = FileDetails("EMIS", "RSV", "YGM41")
7777
ravs_flu = FileDetails("RAVS", "FLU", "X8E5B")
78+
79+
80+
MOCK_FILE_HEADERS = (
81+
"NHS_NUMBER|PERSON_FORENAME|PERSON_SURNAME|PERSON_DOB|PERSON_GENDER_CODE|PERSON_POSTCODE|"
82+
"DATE_AND_TIME|SITE_CODE|SITE_CODE_TYPE_URI|UNIQUE_ID|UNIQUE_ID_URI|ACTION_FLAG|"
83+
"PERFORMING_PROFESSIONAL_FORENAME|PERFORMING_PROFESSIONAL_SURNAME|RECORDED_DATE|"
84+
"PRIMARY_SOURCE|VACCINATION_PROCEDURE_CODE|VACCINATION_PROCEDURE_TERM|DOSE_SEQUENCE|"
85+
"VACCINE_PRODUCT_CODE|VACCINE_PRODUCT_TERM|VACCINE_MANUFACTURER|BATCH_NUMBER|EXPIRY_DATE|"
86+
"SITE_OF_VACCINATION_CODE|SITE_OF_VACCINATION_TERM|ROUTE_OF_VACCINATION_CODE|"
87+
"ROUTE_OF_VACCINATION_TERM|DOSE_AMOUNT|DOSE_UNIT_CODE|DOSE_UNIT_TERM|INDICATION_CODE|"
88+
"LOCATION_CODE|LOCATION_CODE_TYPE_URI"
89+
) + "\n"
90+
91+
# The content does not matter for the filename processor Lambda. It only needs to ensure the file is not empty.
92+
# Validation of the headers and contents happens later in the batch flow
93+
MOCK_FILE_DATA = (
94+
'9674963871|"ALAN"|"PARTRIDGE"|"20190131"|"2"|"AA14 6AA"|"20240610T183325"|"J82067"|'
95+
'"https://fhir.nhs.uk/Id/ods-organization-code"|"1234"|"a_system"|"new"|"Ellena"|"O\'Reilly"|"20240101"|"TRUE"|'
96+
'"1303503001"|"Administration of vaccine product containing only Human orthopneumovirus antigen (procedure)"|'
97+
'1|"42605811000001109"|"Abrysvo vaccine powder and solvent for solution for injection 0.5ml vials (Pfizer Ltd) '
98+
'(product)"|"Pfizer"|"RSVTEST"|"20241231"|"368208006"|"Left upper arm structure (body structure)"|'
99+
'"78421000"|"Intramuscular route (qualifier value)"|"0.5"|"258773002"|"Milliliter (qualifier value)"|"Test"|'
100+
'"J82067"|"https://fhir.nhs.uk/Id/ods-organization-code"'
101+
)
102+
103+
MOCK_BATCH_FILE_CONTENT = MOCK_FILE_HEADERS + MOCK_FILE_DATA

0 commit comments

Comments
 (0)