77from mappings import map_target_disease
88from send_to_kinesis import send_to_kinesis
99from clients import logger
10- from file_level_validation import file_level_validation
11- from errors import NoOperationPermissions , InvalidHeaders
10+ from file_level_validation import file_level_validation , validate_content_headers
11+ from errors import NoOperationPermissions , InvalidHeaders , InvalidEncoding
1212from utils_for_recordprocessor import get_csv_content_dict_reader
1313
1414
@@ -17,11 +17,21 @@ def process_csv_to_fhir(incoming_message_body: dict) -> None:
1717 For each row of the csv, attempts to transform into FHIR format, sends a message to kinesis,
1818 and documents the outcome for each row in the ack file.
1919 """
20+ encoder = "utf-8" # default encoding
2021 try :
21- interim_message_body = file_level_validation (incoming_message_body = incoming_message_body )
22+ interim_message_body = file_level_validation (incoming_message_body = incoming_message_body , encoder = encoder )
23+ except InvalidEncoding as error :
24+ logger .warning ("Invalid Encoding detected in process_csv_to_fhir: %s" , error )
25+ # retry with cp1252 encoding
26+ encoder = "cp1252"
27+ try :
28+ interim_message_body = file_level_validation (incoming_message_body = incoming_message_body , encoder = encoder )
29+ except Exception as error :
30+ logger .error (f"Error in file_level_validation with { encoder } encoding: %s" , error )
31+ return 0
2232 except (InvalidHeaders , NoOperationPermissions , Exception ): # pylint: disable=broad-exception-caught
2333 # If the file is invalid, processing should cease immediately
24- return
34+ return 0
2535
2636 file_id = interim_message_body .get ("message_id" )
2737 vaccine = interim_message_body .get ("vaccine" )
@@ -32,29 +42,28 @@ def process_csv_to_fhir(incoming_message_body: dict) -> None:
3242 csv_reader = interim_message_body .get ("csv_dict_reader" )
3343
3444 target_disease = map_target_disease (vaccine )
35- print ("process csv to fhir" )
3645 row_count = 0
37- encoder = "utf-8" # default encoding
38- try :
39- row_count = process_rows (file_id , vaccine , supplier , file_key , allowed_operations ,
40- created_at_formatted_string , csv_reader , target_disease )
41- except Exception as error : # pylint: disable=broad-exception-caught
42- new_encoder = "cp1252"
43- print (f"Error processing: { error } ." )
46+ row_count , err = process_rows (file_id , vaccine , supplier , file_key , allowed_operations ,
47+ created_at_formatted_string , csv_reader , target_disease )
48+ if err :
49+ print (f"Error processing: { err } ." )
4450 # check if it's a decode error, ie error.args[0] begins with "'utf-8' codec can't decode byte"
45- if error .reason == "invalid continuation byte" :
51+ if err .reason == "invalid continuation byte" :
52+ new_encoder = "cp1252"
4653 print (f"Encode error at row { row_count } with { encoder } . Switch to { new_encoder } " )
4754 # print(f"Detected decode error: {error.reason}")
4855 encoder = new_encoder
4956 # if we are here, re-read the file with alternative encoding and skip processed rows
50- row_count = process_rows_retry (file_id , vaccine , supplier , file_key ,
51- allowed_operations , created_at_formatted_string ,
52- encoder , row_count )
57+ csv_reader = get_csv_content_dict_reader (file_key , encoder = encoder )
58+ validate_content_headers (csv_reader )
59+ row_count = process_rows (file_id , vaccine , supplier , file_key , allowed_operations ,
60+ created_at_formatted_string , csv_reader , target_disease , row_count )
5361 else :
54- logger .error (f"Non-decode error: { error } . Cannot retry. Call someone." )
55- raise error from error
62+ logger .error (f"Non-decode error: { err } . Cannot retry. Call someone." )
63+ raise err
5664
5765 logger .info ("Total rows processed: %s" , row_count )
66+ return row_count
5867
5968
6069def process_rows_retry (file_id , vaccine , supplier , file_key , allowed_operations ,
@@ -81,35 +90,39 @@ def process_rows(file_id, vaccine, supplier, file_key, allowed_operations, creat
8190 print ("process_rows..." )
8291 row_count = 0
8392 start_row = total_rows_processed_count
84- for row in csv_reader :
85-
86- row_count += 1
87- if row_count > start_row :
88- row_id = f"{ file_id } ^{ row_count } "
89- logger .info ("MESSAGE ID : %s" , row_id )
90-
91- # convert dict to string and print first 20 chars
92- if (total_rows_processed_count % 1000 == 0 ):
93- print (f"Process: { total_rows_processed_count } " )
94- if (total_rows_processed_count > 19995 ):
95- print (f"Process: { total_rows_processed_count } - { row ['PERSON_SURNAME' ]} " )
96-
97- # Process the row to obtain the details needed for the message_body and ack file
98- details_from_processing = process_row (target_disease , allowed_operations , row )
99-
100- # Create the message body for sending
101- outgoing_message_body = {
102- "row_id" : row_id ,
103- "file_key" : file_key ,
104- "supplier" : supplier ,
105- "vax_type" : vaccine ,
106- "created_at_formatted_string" : created_at_formatted_string ,
107- ** details_from_processing ,
108- }
109-
110- send_to_kinesis (supplier , outgoing_message_body , vaccine )
111- total_rows_processed_count += 1
112- logger .info ("Total rows processed: %s" , total_rows_processed_count )
93+ try :
94+ for row in csv_reader :
95+
96+ row_count += 1
97+ if row_count > start_row :
98+ row_id = f"{ file_id } ^{ row_count } "
99+ logger .info ("MESSAGE ID : %s" , row_id )
100+
101+ # convert dict to string and print first 20 chars
102+ if (total_rows_processed_count % 1000 == 0 ):
103+ print (f"Process: { total_rows_processed_count } " )
104+ if (total_rows_processed_count > 19995 ):
105+ print (f"Process: { total_rows_processed_count } - { row ['PERSON_SURNAME' ]} " )
106+
107+ # Process the row to obtain the details needed for the message_body and ack file
108+ details_from_processing = process_row (target_disease , allowed_operations , row )
109+
110+ # Create the message body for sending
111+ outgoing_message_body = {
112+ "row_id" : row_id ,
113+ "file_key" : file_key ,
114+ "supplier" : supplier ,
115+ "vax_type" : vaccine ,
116+ "created_at_formatted_string" : created_at_formatted_string ,
117+ ** details_from_processing ,
118+ }
119+
120+ send_to_kinesis (supplier , outgoing_message_body , vaccine )
121+ total_rows_processed_count += 1
122+ logger .info ("Total rows processed: %s" , total_rows_processed_count )
123+ except Exception as error : # pylint: disable=broad-exception-caught
124+ logger .error ("Error processing row %s: %s" , row_count , error )
125+ return total_rows_processed_count , error
113126 return total_rows_processed_count
114127
115128
0 commit comments