33import shutil
44import tempfile
55import uuid
6+ from collections import defaultdict
67from datetime import datetime
78from typing import Iterable
89
910import pydantic
1011from botocore .exceptions import ClientError
1112from enums .upload_status import UploadStatus
12- from models .staging_metadata import METADATA_FILENAME , MetadataFile , StagingMetadata
13+ from models .staging_metadata import (
14+ METADATA_FILENAME ,
15+ BulkUploadQueueMetadata ,
16+ MetadataFile ,
17+ StagingSqsMetadata ,
18+ )
1319from repositories .bulk_upload .bulk_upload_dynamo_repository import (
1420 BulkUploadDynamoRepository ,
1521)
@@ -41,7 +47,6 @@ def __init__(self, metadata_formatter_service: MetadataPreprocessorService):
4147
4248 self .temp_download_dir = tempfile .mkdtemp ()
4349
44- self .corrections = {}
4550 self .practice_directory = metadata_formatter_service .practice_directory
4651 self .file_key = (
4752 f"{ metadata_formatter_service .practice_directory } /{ METADATA_FILENAME } "
@@ -53,7 +58,7 @@ def __init__(self, metadata_formatter_service: MetadataPreprocessorService):
5358 def process_metadata (self ):
5459 try :
5560 metadata_file = self .download_metadata_from_s3 ()
56- staging_metadata_list = self .csv_to_staging_metadata (metadata_file )
61+ staging_metadata_list = self .csv_to_sqs_metadata (metadata_file )
5762 logger .info ("Finished parsing metadata" )
5863
5964 self .send_metadata_to_fifo_sqs (staging_metadata_list )
@@ -90,50 +95,59 @@ def download_metadata_from_s3(self) -> str:
9095 )
9196 return local_file_path
9297
93- def csv_to_staging_metadata (self , csv_file_path : str ) -> list [StagingMetadata ]:
98+ def csv_to_sqs_metadata (self , csv_file_path : str ) -> list [StagingSqsMetadata ]:
9499 logger .info ("Parsing bulk upload metadata" )
95- patients = {}
100+ patients : defaultdict [tuple [str , str ], list [BulkUploadQueueMetadata ]] = (
101+ defaultdict (list )
102+ )
103+
96104 with open (
97105 csv_file_path , mode = "r" , encoding = "utf-8-sig" , errors = "replace"
98106 ) as csv_file_handler :
99107 csv_reader : Iterable [dict ] = csv .DictReader (csv_file_handler )
100108 for row in csv_reader :
101109 self .process_metadata_row (row , patients )
110+
102111 return [
103- StagingMetadata (
104- nhs_number = key [ 0 ] ,
105- files = value ,
112+ StagingSqsMetadata (
113+ nhs_number = nhs_number ,
114+ files = files ,
106115 )
107- for (key , value ) in patients .items ()
116+ for (nhs_number , _ ), files in patients .items ()
108117 ]
109118
110- def process_metadata_row (self , row : dict , patients : dict ) -> None :
119+ def process_metadata_row (
120+ self , row : dict , patients : dict [tuple [str , str ], list [BulkUploadQueueMetadata ]]
121+ ) -> None :
111122 file_metadata = MetadataFile .model_validate (row )
112123 nhs_number , ods_code = self .extract_patient_info (file_metadata )
113- patient_record_key = (nhs_number , ods_code )
114-
115- if patient_record_key not in patients :
116- patients [patient_record_key ] = [file_metadata ]
117- else :
118- patients [patient_record_key ].append (file_metadata )
119124
120125 try :
121- self .validate_correct_filename (file_metadata )
126+ correct_file_name = self .validate_and_correct_filename (file_metadata )
122127 except InvalidFileNameException as error :
123- self .handle_invalid_filename (
124- file_metadata , error , patient_record_key , patients
125- )
126- patients .pop (patient_record_key )
128+ self .handle_invalid_filename (file_metadata , error , nhs_number )
129+ return
130+
131+ sqs_metadata = self .convert_to_sqs_metadata (file_metadata , correct_file_name )
132+ patients [(nhs_number , ods_code )].append (sqs_metadata )
133+
134+ @staticmethod
135+ def convert_to_sqs_metadata (
136+ file : MetadataFile , stored_file_name : str
137+ ) -> BulkUploadQueueMetadata :
138+ return BulkUploadQueueMetadata (
139+ ** file .model_dump (), stored_file_name = stored_file_name
140+ )
127141
128142 def extract_patient_info (self , file_metadata : MetadataFile ) -> tuple [str , str ]:
129143 nhs_number = file_metadata .nhs_number
130144 ods_code = file_metadata .gp_practice_code
131145 return nhs_number , ods_code
132146
133- def validate_correct_filename (
147+ def validate_and_correct_filename (
134148 self ,
135149 file_metadata : MetadataFile ,
136- ) -> None :
150+ ) -> str :
137151 try :
138152 validate_file_name (file_metadata .file_path .split ("/" )[- 1 ])
139153 valid_filepath = file_metadata .file_path
@@ -142,39 +156,40 @@ def validate_correct_filename(
142156 file_metadata .file_path
143157 )
144158
145- if valid_filepath :
146- self .corrections [file_metadata .file_path ] = valid_filepath
159+ return valid_filepath
147160
148161 def handle_invalid_filename (
149162 self ,
150163 file_metadata : MetadataFile ,
151164 error : InvalidFileNameException ,
152- key : tuple [str , str ],
153- patients : dict [tuple [str , str ], list [MetadataFile ]],
165+ nhs_number : str ,
154166 ) -> None :
155167 logger .error (
156168 f"Failed to process { file_metadata .file_path } due to error: { error } "
157169 )
158- failed_entry = StagingMetadata (
159- nhs_number = key [0 ],
160- files = patients [key ],
170+ failed_file = self .convert_to_sqs_metadata (
171+ file_metadata , file_metadata .file_path
172+ )
173+ failed_entry = StagingSqsMetadata (
174+ nhs_number = nhs_number ,
175+ files = [failed_file ],
161176 )
162177 self .dynamo_repository .write_report_upload_to_dynamo (
163178 failed_entry , UploadStatus .FAILED , str (error )
164179 )
165180
166181 def send_metadata_to_fifo_sqs (
167- self , staging_metadata_list : list [StagingMetadata ]
182+ self , staging_sqs_metadata_list : list [StagingSqsMetadata ]
168183 ) -> None :
169184 sqs_group_id = f"bulk_upload_{ uuid .uuid4 ()} "
170185
171- for staging_metadata in staging_metadata_list :
172- nhs_number = staging_metadata .nhs_number
186+ for staging_sqs_metadata in staging_sqs_metadata_list :
187+ nhs_number = staging_sqs_metadata .nhs_number
173188 logger .info (f"Sending metadata for patientId: { nhs_number } " )
174189
175190 self .sqs_service .send_message_with_nhs_number_attr_fifo (
176191 queue_url = self .metadata_queue_url ,
177- message_body = staging_metadata .model_dump_json (by_alias = True ),
192+ message_body = staging_sqs_metadata .model_dump_json (by_alias = True ),
178193 nhs_number = nhs_number ,
179194 group_id = sqs_group_id ,
180195 )
0 commit comments