Skip to content

Commit bdd8923

Browse files
[PRMP-540] Extract file information and Validate 1 of 1 file format and general processing (#888)
1 parent 31e22be commit bdd8923

File tree

3 files changed

+192
-44
lines changed

3 files changed

+192
-44
lines changed

lambdas/services/bulk_upload_metadata_processor_service.py

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
import urllib.parse
66
from collections import defaultdict
77
from datetime import datetime
8+
from pathlib import Path
89

910
import pydantic
1011
from botocore.exceptions import ClientError
11-
1212
from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat
1313
from enums.upload_status import UploadStatus
1414
from enums.virus_scan_result import VirusScanResult
@@ -41,6 +41,7 @@
4141
LGInvalidFilesException,
4242
VirusScanFailedException,
4343
)
44+
from utils.filename_utils import extract_nhs_number_from_bulk_upload_file_name
4445
from utils.lloyd_george_validator import validate_file_name
4546
from utils.utilities import get_virus_scan_service
4647

@@ -98,7 +99,6 @@ def process_metadata(self):
9899
logger.info("Finished parsing metadata")
99100

100101
self.send_metadata_to_fifo_sqs(staging_metadata_list)
101-
logger.info("Sent bulk upload metadata to SQS queue")
102102

103103
self.copy_metadata_to_dated_folder()
104104
self.clear_temp_storage()
@@ -195,6 +195,21 @@ def convert_to_sqs_metadata(
195195
**file.model_dump(), stored_file_name=stored_file_name
196196
)
197197

198+
def create_expedite_sqs_metadata(self, key) -> StagingSqsMetadata:
199+
"""Build a single-patient SQS metadata payload for an expedite upload."""
200+
nhs_number, file_path, ods_code, scan_date = self.validate_expedite_file(key)
201+
return StagingSqsMetadata(
202+
nhs_number=nhs_number,
203+
files=[
204+
BulkUploadQueueMetadata(
205+
file_path=file_path,
206+
stored_file_name=file_path,
207+
gp_practice_code=ods_code,
208+
scan_date=scan_date,
209+
)
210+
],
211+
)
212+
198213
@staticmethod
199214
def extract_patient_info(file_metadata: MetadataFile) -> tuple[str, str]:
200215
"""Extract key patient identifiers."""
@@ -210,6 +225,55 @@ def validate_and_correct_filename(self, file_metadata: MetadataFile) -> str:
210225
file_metadata.file_path
211226
)
212227

228+
def validate_expedite_file(self, s3_object_key: str):
229+
"""Validate and extract fields from an expedite S3 key.
230+
This ensures the file represents a single document (1of1) and derives
231+
the key fields required to build SQS metadata."""
232+
file_path = os.path.basename(s3_object_key)
233+
234+
if not file_path.startswith("1of1"):
235+
failure_msg = (
236+
"Failed processing expedite event due to file not being a 1of1"
237+
)
238+
logger.error(failure_msg)
239+
raise BulkUploadMetadataException(failure_msg)
240+
241+
nhs_number = extract_nhs_number_from_bulk_upload_file_name(file_path)[0]
242+
file_name = self.metadata_formatter_service.validate_record_filename(
243+
s3_object_key
244+
)
245+
ods_code = Path(s3_object_key).parent.name
246+
scan_date = datetime.now().strftime("%Y-%m-%d")
247+
return nhs_number, file_name, ods_code, scan_date
248+
249+
def handle_expedite_event(self, event):
250+
"""Process S3 EventBridge expedite uploads: enforce virus scan, ensure 1of1, extract identifiers
251+
and send metadata to SQS."""
252+
try:
253+
unparsed_s3_object_key = event["detail"]["object"]["key"]
254+
s3_object_key = urllib.parse.unquote_plus(
255+
unparsed_s3_object_key, encoding="utf-8"
256+
)
257+
258+
if s3_object_key.startswith("expedite/"):
259+
logger.info("Processing file from expedite folder")
260+
261+
self.enforce_virus_scanner(s3_object_key)
262+
self.check_file_status(s3_object_key)
263+
264+
sqs_metadata = [self.create_expedite_sqs_metadata(s3_object_key)]
265+
266+
self.send_metadata_to_fifo_sqs(sqs_metadata)
267+
logger.info("Successfully processed expedite event")
268+
else:
269+
failure_msg = f"Unexpected directory or file location received from EventBridge: {s3_object_key}"
270+
logger.error(failure_msg)
271+
raise BulkUploadMetadataException(failure_msg)
272+
except KeyError as e:
273+
failure_msg = f"Failed due to missing key: {str(e)}"
274+
logger.error(failure_msg)
275+
raise BulkUploadMetadataException(failure_msg)
276+
213277
def handle_invalid_filename(
214278
self,
215279
file_metadata: MetadataFile,
@@ -241,6 +305,7 @@ def send_metadata_to_fifo_sqs(
241305
nhs_number=nhs_number,
242306
group_id=f"bulk_upload_{nhs_number}",
243307
)
308+
logger.info("Sent bulk upload metadata to sqs queue")
244309

245310
def copy_metadata_to_dated_folder(self):
246311
"""Copy processed metadata CSV into a dated archive folder in S3."""
@@ -277,7 +342,7 @@ def enforce_virus_scanner(self, file_key: str):
277342

278343
try:
279344
result = self.s3_repo.check_file_tag_status_on_staging_bucket(file_key)
280-
if(result != ""):
345+
if result != "":
281346
logger.info("The file has been scanned before")
282347
return
283348
logger.info(f"Virus scan tag missing for {file_key}.")
@@ -293,27 +358,6 @@ def enforce_virus_scanner(self, file_key: str):
293358
else:
294359
raise
295360

296-
def handle_expedite_event(self, event):
297-
try:
298-
key_string = event["detail"]["object"]["key"]
299-
key = urllib.parse.unquote_plus(key_string, encoding="utf-8")
300-
301-
if key.startswith("expedite/"):
302-
logger.info("Processing file from expedite folder")
303-
304-
self.enforce_virus_scanner(key)
305-
self.check_file_status(key)
306-
307-
return # To be added upon by ticket PRMP-540
308-
else:
309-
failure_msg = f"Unexpected directory or file location received from EventBridge: {key_string}"
310-
logger.error(failure_msg)
311-
raise BulkUploadMetadataException(failure_msg)
312-
except KeyError as e:
313-
failure_msg = f"Failed due to missing key: {str(e)}"
314-
logger.error(failure_msg)
315-
raise BulkUploadMetadataException(failure_msg)
316-
317361

318362
def get_formatter_service(raw_pre_format_type):
319363
try:

lambdas/tests/unit/handlers/test_bulk_upload_metadata_processor_handler.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -59,32 +59,25 @@ def test_metadata_processor_lambda_handler_s3_event_triggers_expedite(
5959
mock_metadata_service.process_metadata.assert_not_called()
6060

6161

62-
def test_s3_event_with_expedite_key_processes(
62+
def test_s3_event_with_non_expedite_key_is_rejected(
6363
set_env, context, mock_metadata_service, caplog
6464
):
65-
event = eventbridge_event_with_s3_key(
66-
"expedite%2F1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf"
67-
)
65+
key_string = "uploads/1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf"
66+
event = eventbridge_event_with_s3_key(key_string)
6867

6968
with caplog.at_level("INFO"):
7069
lambda_handler(event, context)
7170

72-
assert any(
73-
"Handling EventBridge event from S3" in r.message for r in caplog.records
74-
)
75-
7671
mock_metadata_service.handle_expedite_event.assert_called_once_with(event)
7772
mock_metadata_service.process_metadata.assert_not_called()
7873

7974

80-
def test_s3_event_with_non_expedite_key_is_rejected(
81-
set_env, context, mock_metadata_service, caplog
82-
):
83-
key_string = "uploads/1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf"
84-
event = eventbridge_event_with_s3_key(key_string)
75+
def test_s3_event_with_expedite_key_processes(set_env, context, mock_metadata_service):
76+
event = eventbridge_event_with_s3_key(
77+
"expedite%2F1of1_Lloyd_George_Record_[John Michael SMITH]_[1234567890]_[15-05-1990].pdf"
78+
)
8579

86-
with caplog.at_level("INFO"):
87-
lambda_handler(event, context)
80+
lambda_handler(event, context)
8881

89-
mock_metadata_service.handle_expedite_event.assert_called_once_with(event)
9082
mock_metadata_service.process_metadata.assert_not_called()
83+
mock_metadata_service.handle_expedite_event.assert_called_once_with(event)

lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py

Lines changed: 115 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
import pytest
99
from botocore.exceptions import ClientError
1010
from enums.upload_status import UploadStatus
11-
from freezegun import freeze_time
12-
1311
from enums.virus_scan_result import VirusScanResult
12+
from freezegun import freeze_time
1413
from models.staging_metadata import (
1514
METADATA_FILENAME,
1615
BulkUploadQueueMetadata,
@@ -35,7 +34,7 @@
3534
BulkUploadMetadataException,
3635
InvalidFileNameException,
3736
LGInvalidFilesException,
38-
VirusScanNoResultException, VirusScanFailedException,
37+
VirusScanFailedException,
3938
)
4039

4140
METADATA_FILE_DIR = "tests/unit/helpers/data/bulk_upload"
@@ -616,6 +615,93 @@ def test_validate_and_correct_filename_sad_path(
616615
assert result == "corrected/path/file_corrected.pdf"
617616

618617

618+
@freeze_time("2025-02-03T10:00:00")
619+
def test_create_expedite_sqs_metadata_builds_expected_structure(test_service):
620+
ods_code = "A12345"
621+
key = f"expedite/{ods_code}/1of1_1234567890_record.pdf"
622+
623+
result = test_service.create_expedite_sqs_metadata(key)
624+
625+
assert result.nhs_number == "1234567890"
626+
assert len(result.files) == 1
627+
item = result.files[0]
628+
assert item.file_path == key
629+
assert item.stored_file_name == key
630+
assert item.gp_practice_code == ods_code
631+
assert item.scan_date == "2025-02-03"
632+
633+
634+
@freeze_time("2025-02-03T10:00:00")
635+
def test_handle_expedite_event_happy_path_sends_sqs(test_service, mocker):
636+
ods = "A12345"
637+
key = f"expedite/{ods}/1of1_1234567890_record.pdf"
638+
event = {"detail": {"object": {"key": key}}}
639+
640+
mocker.patch.object(BulkUploadMetadataProcessorService, "enforce_virus_scanner")
641+
mocker.patch.object(BulkUploadMetadataProcessorService, "check_file_status")
642+
mocked_send = mocker.patch.object(
643+
BulkUploadMetadataProcessorService, "send_metadata_to_fifo_sqs"
644+
)
645+
646+
test_service.handle_expedite_event(event)
647+
648+
mocked_send.assert_called_once()
649+
args, _ = mocked_send.call_args
650+
assert len(args) == 1
651+
sqs_payload_list = args[0]
652+
sqs_payload = sqs_payload_list[0]
653+
assert sqs_payload.nhs_number == "1234567890"
654+
assert len(sqs_payload.files) == 1
655+
file_item = sqs_payload.files[0]
656+
assert file_item.file_path == key
657+
assert file_item.stored_file_name == key
658+
assert file_item.gp_practice_code == ods
659+
assert file_item.scan_date == "2025-02-03"
660+
661+
662+
def test_handle_expedite_event_invalid_directory_raises(test_service, mocker):
663+
mocked_send = mocker.patch.object(
664+
BulkUploadMetadataProcessorService, "send_metadata_to_fifo_sqs"
665+
)
666+
bad_key = "notexpedite/A12345/1234567890_record.pdf"
667+
event = {"detail": {"object": {"key": bad_key}}}
668+
669+
with pytest.raises(BulkUploadMetadataException) as exc:
670+
test_service.handle_expedite_event(event)
671+
672+
assert "Unexpected directory or file location" in str(exc.value)
673+
mocked_send.assert_not_called()
674+
675+
676+
def test_handle_expedite_event_missing_key_raises(test_service, mocker):
677+
mocked_send = mocker.patch.object(
678+
BulkUploadMetadataProcessorService, "send_metadata_to_fifo_sqs"
679+
)
680+
event = {"detail": {}}
681+
682+
with pytest.raises(BulkUploadMetadataException) as exc:
683+
test_service.handle_expedite_event(event)
684+
685+
assert "Failed due to missing key" in str(exc.value)
686+
mocked_send.assert_not_called()
687+
688+
689+
def test_handle_expedite_event_rejects_non_1of1(test_service, mocker):
690+
mocker.patch.object(BulkUploadMetadataProcessorService, "enforce_virus_scanner")
691+
mocker.patch.object(BulkUploadMetadataProcessorService, "check_file_status")
692+
mocked_send = mocker.patch.object(
693+
BulkUploadMetadataProcessorService, "send_metadata_to_fifo_sqs"
694+
)
695+
key = "expedite/A12345/2of3_1234567890_record.pdf"
696+
event = {"detail": {"object": {"key": key}}}
697+
698+
with pytest.raises(BulkUploadMetadataException) as exc:
699+
test_service.handle_expedite_event(event)
700+
701+
assert "not being a 1of1" in str(exc.value)
702+
mocked_send.assert_not_called()
703+
704+
619705
@pytest.fixture
620706
def mock_csv_content():
621707
header = "FILEPATH,PAGE COUNT,GP-PRACTICE-CODE,NHS-NO,SECTION,SUB-SECTION,SCAN-DATE,SCAN-ID,USER-ID,UPLOAD"
@@ -864,10 +950,34 @@ def test_no_remapping_logic(
864950
]
865951

866952

953+
@freeze_time("2025-02-03T10:00:00")
954+
def test_validate_expedite_file_happy_path_returns_expected_tuple(test_service):
955+
ods_code = "A12345"
956+
key = f"expedite/{ods_code}/1of1_1234567890_record.pdf"
957+
958+
nhs_number, file_name, extracted_ods, scan_date = (
959+
test_service.validate_expedite_file(key)
960+
)
961+
962+
assert nhs_number == "1234567890"
963+
assert file_name == key
964+
assert extracted_ods == ods_code
965+
assert scan_date == "2025-02-03"
966+
967+
968+
def test_validate_expedite_file_rejects_non_1of1(test_service):
969+
key = "expedite/A12345/2of3_1234567890_record.pdf"
970+
with pytest.raises(BulkUploadMetadataException):
971+
test_service.validate_expedite_file(key)
972+
973+
867974
def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service):
868975
encoded_key = urllib.parse.quote_plus("expedite/folder/some file.pdf")
869976
event = {"detail": {"object": {"key": encoded_key}}}
870977

978+
mocker.patch.object(
979+
BulkUploadMetadataProcessorService, "create_expedite_sqs_metadata"
980+
)
871981
mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner")
872982
mocked_check_status = mocker.patch.object(test_service, "check_file_status")
873983

@@ -1011,6 +1121,7 @@ def test_enforce_virus_scanner_re_raises_unexpected_client_error(mocker, test_se
10111121

10121122
mock_scan.assert_not_called()
10131123

1124+
10141125
def test_check_file_status_clean_does_nothing(mocker, test_service, caplog):
10151126
file_key = "expedite/folder/file.pdf"
10161127
mock_check = mocker.patch.object(
@@ -1043,4 +1154,4 @@ def test_check_file_status_logs_issue_when_not_clean(mocker, test_service, caplo
10431154
assert any(
10441155
f"Found an issue with the file {file_key}." in record.msg
10451156
for record in caplog.records
1046-
)
1157+
)

0 commit comments

Comments
 (0)