33from io import BytesIO
44from unittest .mock import patch
55
6- with patch ("logging_decorator.file_level_validation_logging_decorator" , lambda f : f ):
7- from batch_processor import process_csv_to_fhir
8-
96
107def create_patch (target : str ):
118 patcher = patch (target )
@@ -60,98 +57,106 @@ def insert_cp1252_at_end(self, data: list[bytes], new_text: bytes, field: int) -
6057
6158 def test_process_large_file_with_cp1252 (self ):
6259 """ Test processing a large file with cp1252 encoding """
63- n_rows = 500
64- data = self .create_test_data_from_file ("test-batch-data.csv" )
65- data = self .expand_test_data (data , n_rows )
66- data = self .insert_cp1252_at_end (data , b'D\xe9 cembre' , 2 )
67- ret1 = {"Body" : BytesIO (b"" .join (data ))}
68- ret2 = {"Body" : BytesIO (b"" .join (data ))}
69- self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
70- self .mock_map_target_disease .return_value = "some disease"
71-
72- message_body = {
73- "vaccine_type" : "vax-type-1" ,
74- "supplier" : "test-supplier" ,
75- }
76- self .mock_map_target_disease .return_value = "some disease"
77-
78- n_rows_processed = process_csv_to_fhir (message_body )
79- self .assertEqual (n_rows_processed , n_rows )
80- self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
81- # check logger.warning called for decode error
82- self .mock_logger_warning .assert_called ()
83- warning_call_args = self .mock_logger_warning .call_args [0 ][0 ]
84- self .assertTrue (warning_call_args .startswith ("Error processing: 'utf-8' codec can't decode byte" ))
60+ with patch ("logging_decorator.file_level_validation_logging_decorator" , lambda f : f ):
61+ from batch_processor import process_csv_to_fhir
62+ n_rows = 500
63+ data = self .create_test_data_from_file ("test-batch-data.csv" )
64+ data = self .expand_test_data (data , n_rows )
65+ data = self .insert_cp1252_at_end (data , b'D\xe9 cembre' , 2 )
66+ ret1 = {"Body" : BytesIO (b"" .join (data ))}
67+ ret2 = {"Body" : BytesIO (b"" .join (data ))}
68+ self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
69+ self .mock_map_target_disease .return_value = "some disease"
70+
71+ message_body = {
72+ "vaccine_type" : "vax-type-1" ,
73+ "supplier" : "test-supplier" ,
74+ }
75+ self .mock_map_target_disease .return_value = "some disease"
76+
77+ n_rows_processed = process_csv_to_fhir (message_body )
78+ self .assertEqual (n_rows_processed , n_rows )
79+ self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
80+ # check logger.warning called for decode error
81+ self .mock_logger_warning .assert_called ()
82+ warning_call_args = self .mock_logger_warning .call_args [0 ][0 ]
83+ self .assertTrue (warning_call_args .startswith ("Error processing: 'utf-8' codec can't decode byte" ))
8584
8685 def test_process_large_file_with_utf8 (self ):
8786 """ Test processing a large file with utf-8 encoding """
88- n_rows = 500
89- data = self .create_test_data_from_file ("test-batch-data.csv" )
90- data = self .expand_test_data (data , n_rows )
91- ret1 = {"Body" : BytesIO (b"" .join (data ))}
92- ret2 = {"Body" : BytesIO (b"" .join (data ))}
93- self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
94- self .mock_map_target_disease .return_value = "some disease"
95-
96- message_body = {
97- "vaccine_type" : "vax-type-1" ,
98- "supplier" : "test-supplier" ,
99- }
100- self .mock_map_target_disease .return_value = "some disease"
101-
102- n_rows_processed = process_csv_to_fhir (message_body )
103- self .assertEqual (n_rows_processed , n_rows )
104- self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
105- self .mock_logger_warning .assert_not_called ()
106- self .mock_logger_error .assert_not_called ()
87+ with patch ("logging_decorator.file_level_validation_logging_decorator" , lambda f : f ):
88+ from batch_processor import process_csv_to_fhir
89+ n_rows = 500
90+ data = self .create_test_data_from_file ("test-batch-data.csv" )
91+ data = self .expand_test_data (data , n_rows )
92+ ret1 = {"Body" : BytesIO (b"" .join (data ))}
93+ ret2 = {"Body" : BytesIO (b"" .join (data ))}
94+ self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
95+ self .mock_map_target_disease .return_value = "some disease"
96+
97+ message_body = {
98+ "vaccine_type" : "vax-type-1" ,
99+ "supplier" : "test-supplier" ,
100+ }
101+ self .mock_map_target_disease .return_value = "some disease"
102+
103+ n_rows_processed = process_csv_to_fhir (message_body )
104+ self .assertEqual (n_rows_processed , n_rows )
105+ self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
106+ self .mock_logger_warning .assert_not_called ()
107+ self .mock_logger_error .assert_not_called ()
107108
108109 def test_process_cp1252_small_file (self ):
109110 """ Test processing a small file with cp1252 encoding """
110- data = self .create_test_data_from_file ("test-batch-data-cp1252.csv" )
111- data = [line if line .endswith (b"\n " ) else line + b"\n " for line in data ]
112- n_rows = len (data ) - 1 # Exclude header
113-
114- ret1 = {"Body" : BytesIO (b"" .join (data ))}
115- ret2 = {"Body" : BytesIO (b"" .join (data ))}
116- self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
117- self .mock_map_target_disease .return_value = "some disease"
118-
119- message_body = {
120- "vaccine_type" : "vax-type-1" ,
121- "supplier" : "test-supplier" ,
122- }
123-
124- self .mock_map_target_disease .return_value = "some disease"
125-
126- n_rows_processed = process_csv_to_fhir (message_body )
127- self .assertEqual (n_rows_processed , n_rows )
128- self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
129- self .mock_logger_warning .assert_called ()
130- warning_call_args = self .mock_logger_warning .call_args [0 ][0 ]
131- self .assertTrue (warning_call_args .startswith ("Invalid Encoding detected in process_csv_to_fhir" ))
111+ with patch ("logging_decorator.file_level_validation_logging_decorator" , lambda f : f ):
112+ from batch_processor import process_csv_to_fhir
113+ data = self .create_test_data_from_file ("test-batch-data-cp1252.csv" )
114+ data = [line if line .endswith (b"\n " ) else line + b"\n " for line in data ]
115+ n_rows = len (data ) - 1 # Exclude header
116+
117+ ret1 = {"Body" : BytesIO (b"" .join (data ))}
118+ ret2 = {"Body" : BytesIO (b"" .join (data ))}
119+ self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
120+ self .mock_map_target_disease .return_value = "some disease"
121+
122+ message_body = {
123+ "vaccine_type" : "vax-type-1" ,
124+ "supplier" : "test-supplier" ,
125+ }
126+
127+ self .mock_map_target_disease .return_value = "some disease"
128+
129+ n_rows_processed = process_csv_to_fhir (message_body )
130+ self .assertEqual (n_rows_processed , n_rows )
131+ self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
132+ self .mock_logger_warning .assert_called ()
133+ warning_call_args = self .mock_logger_warning .call_args [0 ][0 ]
134+ self .assertTrue (warning_call_args .startswith ("Invalid Encoding detected in process_csv_to_fhir" ))
132135
133136 def test_process_utf8_small_file (self ):
134137 """ Test processing a small file with cp1252 encoding """
135- data = self .create_test_data_from_file ("test-batch-data.csv" )
136- data = [line if line .endswith (b"\n " ) else line + b"\n " for line in data ]
137- n_rows = len (data ) - 1 # Exclude header
138-
139- ret1 = {"Body" : BytesIO (b"" .join (data ))}
140- ret2 = {"Body" : BytesIO (b"" .join (data ))}
141- self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
142- self .mock_map_target_disease .return_value = "some disease"
143-
144- message_body = {
145- "vaccine_type" : "vax-type-1" ,
146- "supplier" : "test-supplier" ,
147- }
148- self .mock_map_target_disease .return_value = "some disease"
149-
150- n_rows_processed = process_csv_to_fhir (message_body )
151- self .assertEqual (n_rows_processed , n_rows )
152- self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
153- self .mock_logger_warning .assert_not_called ()
154- self .mock_logger_error .assert_not_called ()
138+ with patch ("logging_decorator.file_level_validation_logging_decorator" , lambda f : f ):
139+ from batch_processor import process_csv_to_fhir
140+ data = self .create_test_data_from_file ("test-batch-data.csv" )
141+ data = [line if line .endswith (b"\n " ) else line + b"\n " for line in data ]
142+ n_rows = len (data ) - 1 # Exclude header
143+
144+ ret1 = {"Body" : BytesIO (b"" .join (data ))}
145+ ret2 = {"Body" : BytesIO (b"" .join (data ))}
146+ self .mock_s3_get_object .side_effect = [ret1 , ret2 ]
147+ self .mock_map_target_disease .return_value = "some disease"
148+
149+ message_body = {
150+ "vaccine_type" : "vax-type-1" ,
151+ "supplier" : "test-supplier" ,
152+ }
153+ self .mock_map_target_disease .return_value = "some disease"
154+
155+ n_rows_processed = process_csv_to_fhir (message_body )
156+ self .assertEqual (n_rows_processed , n_rows )
157+ self .assertEqual (self .mock_send_to_kinesis .call_count , n_rows )
158+ self .mock_logger_warning .assert_not_called ()
159+ self .mock_logger_error .assert_not_called ()
155160
156161 def test_fix_cp1252 (self ):
157162 # create a cp1252 string that contains an accented E
@@ -176,5 +181,3 @@ def dict_decode(input_dict: dict, encoding: str) -> dict:
176181 else :
177182 decoded_dict [key ] = value
178183 return decoded_dict
179-
180- # @TODO TEST to check correct number of messages sent to firehose with encode error
0 commit comments