Skip to content

Commit 9228083

Browse files
committed
[PRMP-631] Pause
1 parent a1d093f commit 9228083

File tree

34 files changed

+471
-148
lines changed

34 files changed

+471
-148
lines changed

lambdas/services/bulk_upload/metadata_usb_preprocessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,12 @@ def validate_record_filename(self, file_path, metadata_nhs_number=None, *args, *
5656
self._validate_document_parts(file_path, file_name)
5757

5858
self._validate_file_extension(file_name)
59-
6059
(
6160
nhs_number,
6261
patient_name,
6362
date_of_birth,
6463
) = self._extract_metadata_from_path(directory_path)
64+
logger.info(f"Extracted metadata from file path: {nhs_number}, {patient_name}, {date_of_birth}")
6565

6666
if nhs_number != metadata_nhs_number:
6767
logger.warning(f"File as it does not match the metadata NHS number: {file_path}")

lambdas/services/bulk_upload_metadata_processor_service.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import pydantic
1111
from botocore.exceptions import ClientError
12+
1213
from enums.lloyd_george_pre_process_format import LloydGeorgePreProcessFormat
1314
from enums.upload_status import UploadStatus
1415
from enums.virus_scan_result import VirusScanResult
@@ -42,7 +43,6 @@
4243
VirusScanFailedException,
4344
)
4445
from utils.filename_utils import extract_nhs_number_from_bulk_upload_file_name
45-
from utils.lloyd_george_validator import validate_file_name
4646
from utils.utilities import get_virus_scan_service
4747

4848
logger = LoggingService(__name__)
@@ -52,9 +52,9 @@
5252
class BulkUploadMetadataProcessorService:
5353

5454
def __init__(
55-
self,
56-
metadata_formatter_service: MetadataPreprocessorService,
57-
metadata_heading_remap: dict,
55+
self,
56+
metadata_formatter_service: MetadataPreprocessorService,
57+
metadata_heading_remap: dict,
5858
):
5959
self.staging_bucket_name = os.getenv("STAGING_STORE_BUCKET_NAME")
6060
self.metadata_queue_url = os.getenv("METADATA_SQS_QUEUE_URL")
@@ -130,7 +130,7 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]:
130130
)
131131

132132
with open(
133-
csv_file_path, mode="r", encoding="utf-8-sig", errors="replace"
133+
csv_file_path, mode="r", encoding="utf-8-sig", errors="replace"
134134
) as csv_file:
135135
csv_reader = csv.DictReader(csv_file)
136136
if csv_reader.fieldnames is None:
@@ -171,7 +171,7 @@ def csv_to_sqs_metadata(self, csv_file_path: str) -> list[StagingSqsMetadata]:
171171
]
172172

173173
def process_metadata_row(
174-
self, row: dict, patients: dict[tuple[str, str], list[BulkUploadQueueMetadata]]
174+
self, row: dict, patients: dict[tuple[str, str], list[BulkUploadQueueMetadata]]
175175
) -> None:
176176
"""Validate individual file metadata and attach to patient group."""
177177
file_metadata = MetadataFile.model_validate(row)
@@ -188,7 +188,7 @@ def process_metadata_row(
188188

189189
@staticmethod
190190
def convert_to_sqs_metadata(
191-
file: MetadataFile, stored_file_name: str
191+
file: MetadataFile, stored_file_name: str
192192
) -> BulkUploadQueueMetadata:
193193
"""Convert a MetadataFile into BulkUploadQueueMetadata."""
194194
return BulkUploadQueueMetadata(
@@ -212,23 +212,19 @@ def create_expedite_sqs_metadata(self, key) -> StagingSqsMetadata:
212212

213213
@staticmethod
214214
def extract_patient_info(file_metadata: MetadataFile) -> tuple[str, str]:
215-
"""Extract key patient identifiers."""
216215
return file_metadata.nhs_number, file_metadata.gp_practice_code
217216

218217
def validate_and_correct_filename(self, file_metadata: MetadataFile) -> str:
219-
"""Validate and normalize file name."""
220218
try:
221-
validate_file_name(file_metadata.file_path.split("/")[-1])
219+
# validate_file_name(file_metadata.file_path.split("/")[-1])
222220
return file_metadata.file_path
223221
except LGInvalidFilesException:
224222
return self.metadata_formatter_service.validate_record_filename(
225-
file_metadata.file_path
223+
file_metadata.file_path,
224+
file_metadata.nhs_number
226225
)
227226

228227
def validate_expedite_file(self, s3_object_key: str):
229-
"""Validate and extract fields from an expedite S3 key.
230-
This ensures the file represents a single document (1of1) and derives
231-
the key fields required to build SQS metadata."""
232228
file_path = os.path.basename(s3_object_key)
233229

234230
if not file_path.startswith("1of1"):
@@ -275,10 +271,10 @@ def handle_expedite_event(self, event):
275271
raise BulkUploadMetadataException(failure_msg)
276272

277273
def handle_invalid_filename(
278-
self,
279-
file_metadata: MetadataFile,
280-
error: InvalidFileNameException,
281-
nhs_number: str,
274+
self,
275+
file_metadata: MetadataFile,
276+
error: InvalidFileNameException,
277+
nhs_number: str,
282278
) -> None:
283279
"""Handle invalid filenames by logging and storing failure in Dynamo."""
284280
logger.error(
@@ -293,7 +289,7 @@ def handle_invalid_filename(
293289
)
294290

295291
def send_metadata_to_fifo_sqs(
296-
self, staging_sqs_metadata_list: list[StagingSqsMetadata]
292+
self, staging_sqs_metadata_list: list[StagingSqsMetadata]
297293
) -> None:
298294
"""Send validated metadata entries to SQS FIFO queue."""
299295
for staging_sqs_metadata in staging_sqs_metadata_list:
@@ -320,7 +316,6 @@ def copy_metadata_to_dated_folder(self):
320316
self.s3_service.delete_object(self.staging_bucket_name, METADATA_FILENAME)
321317

322318
def clear_temp_storage(self):
323-
"""Delete temporary working directory."""
324319
logger.info("Clearing temp storage directory")
325320
try:
326321
shutil.rmtree(self.temp_download_dir)
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,93 @@
11
{
22
"_description": "General-Accepted-Multi File Upload 1of3",
3-
"bulk_upload_report": null,
4-
"metadata": null,
5-
"unstitched": []
3+
"bulk_upload_report": {
4+
"FilePath": "accept/9730786976/2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
5+
"NhsNumber": "9730786976",
6+
"PdsOdsCode": "M85143",
7+
"StoredFileName": "9730786976/2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
8+
"UploadStatus": "complete",
9+
"UploaderOdsCode": "M85143"
10+
},
11+
"metadata": {
12+
"Author": "M85143",
13+
"ContentType": "application/pdf",
14+
"CurrentGpOds": "M85143",
15+
"Custodian": "M85143",
16+
"DocStatus": "final",
17+
"DocumentScanCreation": "2023-01-01",
18+
"DocumentSnomedCodeType": "16521000000101",
19+
"FileName": "1of1_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
20+
"FileSize": "Decimal('578652')",
21+
"NhsNumber": "9730786976",
22+
"S3VersionID": "7lfNLZs_cykTkKDFkyretGqBMPxu9BKI",
23+
"Status": "current",
24+
"Uploaded": true,
25+
"Version": "1",
26+
"VirusScannerResult": "Clean"
27+
},
28+
"unstitched": [
29+
{
30+
"Author": "M85143",
31+
"ContentType": "application/pdf",
32+
"Created": "2025-12-23T10:09:12.436159Z",
33+
"Custodian": "M85143",
34+
"DocStatus": "final",
35+
"DocumentScanCreation": "2023-01-01",
36+
"DocumentSnomedCodeType": "16521000000101",
37+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/9bd7b329-69b1-4dc1-945d-fc357d9f8178",
38+
"FileName": "3of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
39+
"FileSize": "Decimal('173760')",
40+
"ID": "9bd7b329-69b1-4dc1-945d-fc357d9f8178",
41+
"LastUpdated": "Decimal('1766484552')",
42+
"NhsNumber": "9730786976",
43+
"S3FileKey": "9730786976/9bd7b329-69b1-4dc1-945d-fc357d9f8178",
44+
"S3VersionID": "1Xp4BDaBwIEEgZxPwxJTk5HOZTAUKJbC",
45+
"Status": "current",
46+
"Uploaded": true,
47+
"Version": "1",
48+
"VirusScannerResult": "Clean"
49+
},
50+
{
51+
"Author": "M85143",
52+
"ContentType": "application/pdf",
53+
"Created": "2025-12-23T10:09:12.266309Z",
54+
"Custodian": "M85143",
55+
"DocStatus": "final",
56+
"DocumentScanCreation": "2023-01-01",
57+
"DocumentSnomedCodeType": "16521000000101",
58+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/35905772-6357-4bf3-b812-e2eebfe56ff4",
59+
"FileName": "1of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
60+
"FileSize": "Decimal('173760')",
61+
"ID": "35905772-6357-4bf3-b812-e2eebfe56ff4",
62+
"LastUpdated": "Decimal('1766484552')",
63+
"NhsNumber": "9730786976",
64+
"S3FileKey": "9730786976/35905772-6357-4bf3-b812-e2eebfe56ff4",
65+
"S3VersionID": "8ypIaC4mMk8jQPp_aPKYhy78rm9Hom.3",
66+
"Status": "current",
67+
"Uploaded": true,
68+
"Version": "1",
69+
"VirusScannerResult": "Clean"
70+
},
71+
{
72+
"Author": "M85143",
73+
"ContentType": "application/pdf",
74+
"Created": "2025-12-23T10:09:12.340065Z",
75+
"Custodian": "M85143",
76+
"DocStatus": "final",
77+
"DocumentScanCreation": "2023-01-01",
78+
"DocumentSnomedCodeType": "16521000000101",
79+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/a7dd5def-17af-404e-93d4-276cfcdddddb",
80+
"FileName": "2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
81+
"FileSize": "Decimal('173760')",
82+
"ID": "a7dd5def-17af-404e-93d4-276cfcdddddb",
83+
"LastUpdated": "Decimal('1766484552')",
84+
"NhsNumber": "9730786976",
85+
"S3FileKey": "9730786976/a7dd5def-17af-404e-93d4-276cfcdddddb",
86+
"S3VersionID": "tBW3RaCsEgTUzSN2Od6zdgudIC31e44o",
87+
"Status": "current",
88+
"Uploaded": true,
89+
"Version": "1",
90+
"VirusScannerResult": "Clean"
91+
}
92+
]
693
}
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,93 @@
11
{
22
"_description": "General-Accepted-Multi File Upload 2of3",
3-
"bulk_upload_report": null,
4-
"metadata": null,
5-
"unstitched": []
3+
"bulk_upload_report": {
4+
"FilePath": "accept/9730786976/2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
5+
"NhsNumber": "9730786976",
6+
"PdsOdsCode": "M85143",
7+
"StoredFileName": "9730786976/2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
8+
"UploadStatus": "complete",
9+
"UploaderOdsCode": "M85143"
10+
},
11+
"metadata": {
12+
"Author": "M85143",
13+
"ContentType": "application/pdf",
14+
"CurrentGpOds": "M85143",
15+
"Custodian": "M85143",
16+
"DocStatus": "final",
17+
"DocumentScanCreation": "2023-01-01",
18+
"DocumentSnomedCodeType": "16521000000101",
19+
"FileName": "1of1_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
20+
"FileSize": "Decimal('578652')",
21+
"NhsNumber": "9730786976",
22+
"S3VersionID": "7lfNLZs_cykTkKDFkyretGqBMPxu9BKI",
23+
"Status": "current",
24+
"Uploaded": true,
25+
"Version": "1",
26+
"VirusScannerResult": "Clean"
27+
},
28+
"unstitched": [
29+
{
30+
"Author": "M85143",
31+
"ContentType": "application/pdf",
32+
"Created": "2025-12-23T10:09:12.436159Z",
33+
"Custodian": "M85143",
34+
"DocStatus": "final",
35+
"DocumentScanCreation": "2023-01-01",
36+
"DocumentSnomedCodeType": "16521000000101",
37+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/9bd7b329-69b1-4dc1-945d-fc357d9f8178",
38+
"FileName": "3of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
39+
"FileSize": "Decimal('173760')",
40+
"ID": "9bd7b329-69b1-4dc1-945d-fc357d9f8178",
41+
"LastUpdated": "Decimal('1766484552')",
42+
"NhsNumber": "9730786976",
43+
"S3FileKey": "9730786976/9bd7b329-69b1-4dc1-945d-fc357d9f8178",
44+
"S3VersionID": "1Xp4BDaBwIEEgZxPwxJTk5HOZTAUKJbC",
45+
"Status": "current",
46+
"Uploaded": true,
47+
"Version": "1",
48+
"VirusScannerResult": "Clean"
49+
},
50+
{
51+
"Author": "M85143",
52+
"ContentType": "application/pdf",
53+
"Created": "2025-12-23T10:09:12.266309Z",
54+
"Custodian": "M85143",
55+
"DocStatus": "final",
56+
"DocumentScanCreation": "2023-01-01",
57+
"DocumentSnomedCodeType": "16521000000101",
58+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/35905772-6357-4bf3-b812-e2eebfe56ff4",
59+
"FileName": "1of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
60+
"FileSize": "Decimal('173760')",
61+
"ID": "35905772-6357-4bf3-b812-e2eebfe56ff4",
62+
"LastUpdated": "Decimal('1766484552')",
63+
"NhsNumber": "9730786976",
64+
"S3FileKey": "9730786976/35905772-6357-4bf3-b812-e2eebfe56ff4",
65+
"S3VersionID": "8ypIaC4mMk8jQPp_aPKYhy78rm9Hom.3",
66+
"Status": "current",
67+
"Uploaded": true,
68+
"Version": "1",
69+
"VirusScannerResult": "Clean"
70+
},
71+
{
72+
"Author": "M85143",
73+
"ContentType": "application/pdf",
74+
"Created": "2025-12-23T10:09:12.340065Z",
75+
"Custodian": "M85143",
76+
"DocStatus": "final",
77+
"DocumentScanCreation": "2023-01-01",
78+
"DocumentSnomedCodeType": "16521000000101",
79+
"FileLocation": "s3://roga7-lloyd-george-store/9730786976/a7dd5def-17af-404e-93d4-276cfcdddddb",
80+
"FileName": "2of3_Lloyd_George_Record_[Lena Rena LAWLER]_[9730786976]_[24-07-1924].pdf",
81+
"FileSize": "Decimal('173760')",
82+
"ID": "a7dd5def-17af-404e-93d4-276cfcdddddb",
83+
"LastUpdated": "Decimal('1766484552')",
84+
"NhsNumber": "9730786976",
85+
"S3FileKey": "9730786976/a7dd5def-17af-404e-93d4-276cfcdddddb",
86+
"S3VersionID": "tBW3RaCsEgTUzSN2Od6zdgudIC31e44o",
87+
"Status": "current",
88+
"Uploaded": true,
89+
"Version": "1",
90+
"VirusScannerResult": "Clean"
91+
}
92+
]
693
}

0 commit comments

Comments
 (0)