Skip to content

Commit 7287cdf

Browse files
authored
[PRMP-401] Parse scan date in bulk upload (#795)
1 parent f8a1d22 commit 7287cdf

File tree

7 files changed

+98
-32
lines changed

7 files changed

+98
-32
lines changed

lambdas/services/bulk_upload_service.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
import os
33
import uuid
4-
from datetime import datetime
54

65
import pydantic
76
from botocore.exceptions import ClientError
@@ -39,6 +38,7 @@
3938
validate_filename_with_patient_details_lenient,
4039
validate_filename_with_patient_details_strict,
4140
validate_lg_file_names,
41+
validate_scan_date,
4242
)
4343
from utils.request_context import request_context
4444
from utils.unicode_utils import (
@@ -126,10 +126,10 @@ def handle_sqs_message(self, message: dict):
126126
logger.info("SQS event is valid. Validating NHS number and file names")
127127

128128
try:
129-
file_names = [
130-
os.path.basename(metadata.file_path)
131-
for metadata in staging_metadata.files
132-
]
129+
file_names = []
130+
for file_metadata in staging_metadata.files:
131+
file_names.append(os.path.basename(file_metadata.file_path))
132+
file_metadata.scan_date = validate_scan_date(file_metadata.scan_date)
133133
request_context.patient_nhs_no = staging_metadata.nhs_number
134134
validate_nhs_number(staging_metadata.nhs_number)
135135
validate_lg_file_names(file_names, staging_metadata.nhs_number)
@@ -139,6 +139,7 @@ def handle_sqs_message(self, message: dict):
139139
patient_ods_code = (
140140
pds_patient_details.get_ods_code_or_inactive_status_for_gp()
141141
)
142+
142143
if not self.bypass_pds:
143144
if not self.strict_mode:
144145
(
@@ -395,12 +396,6 @@ def convert_to_document_reference(
395396
) -> DocumentReference:
396397
s3_bucket_name = self.bulk_upload_s3_repository.lg_bucket_name
397398
file_name = os.path.basename(file_metadata.file_path)
398-
if file_metadata.scan_date:
399-
scan_date_formatted = datetime.strptime(
400-
file_metadata.scan_date, "%d/%m/%Y"
401-
).strftime("%Y-%m-%d")
402-
else:
403-
scan_date_formatted = None
404399
document_reference = DocumentReference(
405400
id=str(uuid.uuid4()),
406401
nhs_number=nhs_number,
@@ -409,7 +404,7 @@ def convert_to_document_reference(
409404
current_gp_ods=current_gp_ods,
410405
custodian=current_gp_ods,
411406
author=file_metadata.gp_practice_code,
412-
document_scan_creation=scan_date_formatted,
407+
document_scan_creation=file_metadata.scan_date,
413408
doc_status="preliminary",
414409
)
415410
document_reference.set_virus_scanner_result(VirusScanResult.CLEAN)

lambdas/tests/unit/helpers/data/bulk_upload/test_data.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,9 @@ def build_test_staging_metadata(file_names: list[str], nhs_number: str = "900000
243243
for file_name in file_names:
244244
source_file_path = f"/{nhs_number}/{file_name}"
245245
files.append(
246-
sample_sqs_metadata_model.model_copy(update={"file_path": source_file_path})
246+
sample_sqs_metadata_model.model_copy(
247+
update={"file_path": source_file_path, "scan_date": "2022-09-03"}
248+
)
247249
)
248250
return StagingSqsMetadata(files=files, nhs_number=nhs_number)
249251

lambdas/tests/unit/services/test_bulk_upload_service.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@
5757

5858
@pytest.fixture
5959
def repo_under_test(set_env, mocker):
60+
mocker.patch("services.bulk_upload_service.BulkUploadDynamoRepository")
61+
mocker.patch("services.bulk_upload_service.BulkUploadSqsRepository")
62+
mocker.patch("services.bulk_upload_service.BulkUploadS3Repository")
6063
service = BulkUploadService(strict_mode=True)
61-
mocker.patch.object(service, "dynamo_repository")
62-
mocker.patch.object(service, "sqs_repository")
63-
mocker.patch.object(service, "bulk_upload_s3_repository")
6464
yield service
6565

6666

@@ -889,25 +889,29 @@ def test_convert_to_document_reference(set_env, mock_uuid, repo_under_test):
889889

890890

891891
@freeze_time("2024-01-01 12:00:00")
892-
def test_convert_to_document_reference_missing_scan_date(
893-
set_env, mock_uuid, repo_under_test
892+
def test_reject_document_reference_if_missing_scan_date(
893+
set_env, mock_uuid, repo_under_test, mocker
894894
):
895895
TEST_STAGING_METADATA.retries = 0
896-
repo_under_test.bulk_upload_s3_repository.lg_bucket_name = "test_lg_s3_bucket"
897-
expected = TEST_DOCUMENT_REFERENCE
898-
expected.document_scan_creation = None
899-
TEST_FILE_METADATA.scan_date = ""
896+
modify_test_sqs_message = json.loads(TEST_SQS_MESSAGE.get("body"))
897+
modify_test_sqs_message["files"][0]["scan_date"] = ""
898+
TEST_STAGING_METADATA.files[0].scan_date = ""
899+
mock_report_upload_failure = mocker.patch.object(
900+
repo_under_test.dynamo_repository, "write_report_upload_to_dynamo"
901+
)
900902

901-
actual = repo_under_test.convert_to_document_reference(
902-
file_metadata=TEST_FILE_METADATA,
903-
nhs_number=TEST_STAGING_METADATA.nhs_number,
904-
current_gp_ods=TEST_CURRENT_GP_ODS,
903+
repo_under_test.handle_sqs_message(
904+
message={"body": json.dumps(modify_test_sqs_message)}
905905
)
906906

907-
assert actual == expected
907+
repo_under_test.dynamo_repository.write_report_upload_to_dynamo.assert_called()
908+
909+
mock_report_upload_failure.assert_called_with(
910+
TEST_STAGING_METADATA, UploadStatus.FAILED, "Invalid scan date format", ""
911+
)
912+
repo_under_test.sqs_repository.send_message_to_pdf_stitching_queue.assert_not_called()
908913

909-
TEST_FILE_METADATA.scan_date = "03/09/2022"
910-
TEST_DOCUMENT_REFERENCE.document_scan_creation = "2022-09-03"
914+
TEST_STAGING_METADATA.files[0].scan_date = "2022-09-03"
911915

912916

913917
def test_raise_client_error_from_ssm_with_pds_service(

lambdas/tests/unit/utils/test_lloyd_george_validator.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import pytest
22
from botocore.exceptions import ClientError
3+
from requests import Response
4+
35
from enums.supported_document_types import SupportedDocumentTypes
46
from enums.validation_score import ValidationResult, ValidationScore
57
from models.pds_models import Patient
6-
from requests import Response
78
from services.base.ssm_service import SSMService
89
from services.document_service import DocumentService
910
from tests.unit.conftest import TEST_NHS_NUMBER, expect_not_to_raise
@@ -57,7 +58,7 @@
5758
validate_patient_date_of_birth,
5859
validate_patient_name_lenient,
5960
validate_patient_name_strict,
60-
validate_patient_name_using_full_name_history,
61+
validate_patient_name_using_full_name_history, validate_scan_date,
6162
)
6263

6364

@@ -1346,3 +1347,14 @@ def test_calculate_validation_score_for_lenient_check(
13461347
)
13471348
assert historical == expected_historical_match
13481349
assert actual_result == expected_score
1350+
1351+
1352+
def test_validates_correct_scan_date_format():
1353+
scan_date = "12-03-2023"
1354+
result = validate_scan_date(scan_date)
1355+
assert result == "2023-03-12"
1356+
1357+
@pytest.mark.parametrize("scan_date", ["", "not-a-date", "1678246", "12/12/2024 12:12", "2023/03/12"])
1358+
def test_raises_exception_for_invalid_scan_date_format(scan_date):
1359+
with pytest.raises(LGInvalidFilesException, match="Invalid scan date format"):
1360+
validate_scan_date(scan_date)

lambdas/tests/unit/utils/test_utilities.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import datetime
2+
13
import pytest
24
from services.mock_pds_service import MockPdsApiService
35
from services.pds_api_service import PdsApiService
@@ -8,6 +10,7 @@
810
format_cloudfront_url,
911
get_file_key_from_s3_url,
1012
get_pds_service,
13+
parse_date,
1114
redact_id_to_last_4_chars,
1215
validate_nhs_number,
1316
)
@@ -111,3 +114,24 @@ def test_format_cloudfront_url_valid():
111114
cloudfront_domain = "d12345.cloudfront.net"
112115
expected_url = "https://d12345.cloudfront.net/path/to/resource"
113116
assert format_cloudfront_url(presign_url, cloudfront_domain) == expected_url
117+
118+
119+
@pytest.mark.parametrize(
120+
"input_date, expected_date",
121+
[
122+
("25/12/2023", datetime(2023, 12, 25)),
123+
("2023-12-25", datetime(2023, 12, 25)),
124+
("25-12-2023", datetime(2023, 12, 25)),
125+
("Dec 25, 2023", datetime(2023, 12, 25)),
126+
("25-Dec-2023", datetime(2023, 12, 25)),
127+
("24-NOV-2023", datetime(2023, 11, 24)),
128+
("12/12/2024 12:12", None),
129+
("25.12.2023", None),
130+
("", None),
131+
("test_text", None),
132+
(None, None),
133+
],
134+
)
135+
def test_parse_date_returns_correct_date_for_valid_formats(input_date, expected_date):
136+
result = parse_date(input_date)
137+
assert result == expected_date

lambdas/utils/lloyd_george_validator.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
name_ends_with,
2828
name_starts_with,
2929
)
30-
from utils.utilities import get_pds_service
30+
from utils.utilities import get_pds_service, parse_date
3131

3232
logger = LoggingService(__name__)
3333

@@ -386,3 +386,10 @@ def allowed_to_ingest_ods_code(patient_ods_code: str) -> bool:
386386
return True
387387

388388
return patient_ods_code.upper() in allowed_ods_codes
389+
390+
391+
def validate_scan_date(scan_date: str):
392+
if formatted_date := parse_date(scan_date):
393+
return formatted_date.strftime("%Y-%m-%d")
394+
else:
395+
raise LGInvalidFilesException("Invalid scan date format")

lambdas/utils/utilities.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,25 @@ def generate_date_folder_name(date: str) -> str:
105105
def format_cloudfront_url(presign_url: str, cloudfront_domain: str) -> str:
106106
formatted_url = f"https://{cloudfront_domain}/{presign_url}"
107107
return formatted_url
108+
109+
110+
def parse_date(date_string: str) -> datetime | None:
111+
if not date_string:
112+
return None
113+
114+
SUPPORTED_FORMATS = [
115+
"%d/%m/%Y",
116+
"%Y-%m-%d",
117+
"%d-%m-%Y",
118+
"%b %d, %Y",
119+
"%d-%b-%Y",
120+
"%d-%B-%Y",
121+
]
122+
123+
for fmt in SUPPORTED_FORMATS:
124+
try:
125+
date_object = datetime.strptime(date_string, fmt)
126+
return date_object
127+
except ValueError:
128+
continue
129+
return None

0 commit comments

Comments
 (0)