1010from audit_table import update_audit_table_status
1111from send_to_kinesis import send_to_kinesis
1212from clients import logger
13- from file_level_validation import file_level_validation
13+ from file_level_validation import file_level_validation , get_csv_content_dict_reader
1414from errors import NoOperationPermissions , InvalidHeaders
1515
1616
17- def process_csv_to_fhir (incoming_message_body : dict ) -> None :
17+ def process_csv_to_fhir (incoming_message_body : dict , encoding = "utf-8" , start_row = 0 ) -> None :
1818 """
1919 For each row of the csv, attempts to transform into FHIR format, sends a message to kinesis,
2020 and documents the outcome for each row in the ack file.
2121 """
2222 try :
23- interim_message_body = file_level_validation (incoming_message_body = incoming_message_body )
23+ interim_message_body = file_level_validation (incoming_message_body = incoming_message_body ,
24+ encoding = encoding )
2425 except (InvalidHeaders , NoOperationPermissions , Exception ): # pylint: disable=broad-exception-caught
2526 # If the file is invalid, processing should cease immediately
2627 return
@@ -36,27 +37,65 @@ def process_csv_to_fhir(incoming_message_body: dict) -> None:
3637 target_disease = map_target_disease (vaccine )
3738
3839 row_count = 0
39- for row in csv_reader :
40- row_count += 1
41- row_id = f"{ file_id } ^{ row_count } "
42- logger .info ("MESSAGE ID : %s" , row_id )
43-
44- # Process the row to obtain the details needed for the message_body and ack file
45- details_from_processing = process_row (target_disease , allowed_operations , row )
46-
47- # Create the message body for sending
48- outgoing_message_body = {
49- "row_id" : row_id ,
50- "file_key" : file_key ,
51- "supplier" : supplier ,
52- "vax_type" : vaccine ,
53- "created_at_formatted_string" : created_at_formatted_string ,
54- ** details_from_processing ,
55- }
56-
57- send_to_kinesis (supplier , outgoing_message_body , vaccine )
58-
59- logger .info ("Total rows processed: %s" , row_count )
40+ try :
41+ for row in csv_reader :
42+ if row_count >= start_row :
43+ row_count += 1
44+ row_id = f"{ file_id } ^{ row_count } "
45+ logger .info ("MESSAGE ID : %s" , row_id )
46+ # concat dict to string for logging
47+ # row_str = ", ".join(f"{v}" for k, v in row.items())
48+ # print(f"Processing row {row_count}: {row_str[:20]}")
49+
50+ details_from_processing = process_row (target_disease , allowed_operations , row )
51+
52+ # Create the message body for sending
53+ outgoing_message_body = {
54+ "row_id" : row_id ,
55+ "file_key" : file_key ,
56+ "supplier" : supplier ,
57+ "vax_type" : vaccine ,
58+ "created_at_formatted_string" : created_at_formatted_string ,
59+ ** details_from_processing ,
60+ }
61+
62+ send_to_kinesis (supplier , outgoing_message_body , vaccine )
63+
64+ logger .info ("Total rows processed: %s" , row_count )
65+ except Exception as error : # pylint: disable=broad-exception-caught
66+ # encoder = "latin-1"
67+ encoder = "cp1252"
68+ print (f"Error processing: { error } ." )
69+ print (f"Encode error at row { row_count } with { encoding } . Switch to { encoder } " )
70+ # if we are here, re-read the file with correct encoding and ignore the processed rows
71+ # if error.args[0] == "'utf-8' codec can't decode byte 0xe9 in position 2996: invalid continuation byte":
72+ # cp1252
73+ new_reader = get_csv_content_dict_reader (file_key , encoding = encoder )
74+ start_row = row_count
75+ row_count = 0
76+ for row in new_reader :
77+ row_count += 1
78+ if row_count > start_row :
79+ row_id = f"{ file_id } ^{ row_count } "
80+ logger .info ("MESSAGE ID : %s" , row_id )
81+ original_representation = ", " .join (f"{ v } " for k , v in row .items ())
82+ if original_representation [:20 ] == "9473089333, DORTHY, " :
83+ print (f"Processing row { row_count } : { original_representation [:40 ]} " )
84+
85+ details_from_processing = process_row (target_disease , allowed_operations , row )
86+
87+ outgoing_message_body = {
88+ "row_id" : row_id ,
89+ "file_key" : file_key ,
90+ "supplier" : supplier ,
91+ "vax_type" : vaccine ,
92+ "created_at_formatted_string" : created_at_formatted_string ,
93+ ** details_from_processing ,
94+ }
95+
96+ send_to_kinesis (supplier , outgoing_message_body , vaccine )
97+
98+ logger .info ("Total rows processed: %s" , row_count )
6099
61100 update_audit_table_status (file_key , file_id , FileStatus .PREPROCESSED )
62101
@@ -66,6 +105,7 @@ def main(event: str) -> None:
66105 logger .info ("task started" )
67106 start = time .time ()
68107 try :
108+ # SAW - error thrown here when invalid character using windows-1252
69109 process_csv_to_fhir (incoming_message_body = json .loads (event ))
70110 except Exception as error : # pylint: disable=broad-exception-caught
71111 logger .error ("Error processing message: %s" , error )
0 commit comments