Skip to content

Commit f2d17ed

Browse files
committed
failures=2
1 parent 9a6a678 commit f2d17ed

File tree

1 file changed

+91
-88
lines changed

1 file changed

+91
-88
lines changed

recordprocessor/tests/test_batch_processor.py

Lines changed: 91 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
from io import BytesIO
44
from unittest.mock import patch
55

6-
with patch("logging_decorator.file_level_validation_logging_decorator", lambda f: f):
7-
from batch_processor import process_csv_to_fhir
8-
96

107
def create_patch(target: str):
118
patcher = patch(target)
@@ -60,98 +57,106 @@ def insert_cp1252_at_end(self, data: list[bytes], new_text: bytes, field: int) -
6057

6158
def test_process_large_file_with_cp1252(self):
6259
""" Test processing a large file with cp1252 encoding """
63-
n_rows = 500
64-
data = self.create_test_data_from_file("test-batch-data.csv")
65-
data = self.expand_test_data(data, n_rows)
66-
data = self.insert_cp1252_at_end(data, b'D\xe9cembre', 2)
67-
ret1 = {"Body": BytesIO(b"".join(data))}
68-
ret2 = {"Body": BytesIO(b"".join(data))}
69-
self.mock_s3_get_object.side_effect = [ret1, ret2]
70-
self.mock_map_target_disease.return_value = "some disease"
71-
72-
message_body = {
73-
"vaccine_type": "vax-type-1",
74-
"supplier": "test-supplier",
75-
}
76-
self.mock_map_target_disease.return_value = "some disease"
77-
78-
n_rows_processed = process_csv_to_fhir(message_body)
79-
self.assertEqual(n_rows_processed, n_rows)
80-
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
81-
# check logger.warning called for decode error
82-
self.mock_logger_warning.assert_called()
83-
warning_call_args = self.mock_logger_warning.call_args[0][0]
84-
self.assertTrue(warning_call_args.startswith("Error processing: 'utf-8' codec can't decode byte"))
60+
with patch("logging_decorator.file_level_validation_logging_decorator", lambda f: f):
61+
from batch_processor import process_csv_to_fhir
62+
n_rows = 500
63+
data = self.create_test_data_from_file("test-batch-data.csv")
64+
data = self.expand_test_data(data, n_rows)
65+
data = self.insert_cp1252_at_end(data, b'D\xe9cembre', 2)
66+
ret1 = {"Body": BytesIO(b"".join(data))}
67+
ret2 = {"Body": BytesIO(b"".join(data))}
68+
self.mock_s3_get_object.side_effect = [ret1, ret2]
69+
self.mock_map_target_disease.return_value = "some disease"
70+
71+
message_body = {
72+
"vaccine_type": "vax-type-1",
73+
"supplier": "test-supplier",
74+
}
75+
self.mock_map_target_disease.return_value = "some disease"
76+
77+
n_rows_processed = process_csv_to_fhir(message_body)
78+
self.assertEqual(n_rows_processed, n_rows)
79+
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
80+
# check logger.warning called for decode error
81+
self.mock_logger_warning.assert_called()
82+
warning_call_args = self.mock_logger_warning.call_args[0][0]
83+
self.assertTrue(warning_call_args.startswith("Error processing: 'utf-8' codec can't decode byte"))
8584

8685
def test_process_large_file_with_utf8(self):
8786
""" Test processing a large file with utf-8 encoding """
88-
n_rows = 500
89-
data = self.create_test_data_from_file("test-batch-data.csv")
90-
data = self.expand_test_data(data, n_rows)
91-
ret1 = {"Body": BytesIO(b"".join(data))}
92-
ret2 = {"Body": BytesIO(b"".join(data))}
93-
self.mock_s3_get_object.side_effect = [ret1, ret2]
94-
self.mock_map_target_disease.return_value = "some disease"
95-
96-
message_body = {
97-
"vaccine_type": "vax-type-1",
98-
"supplier": "test-supplier",
99-
}
100-
self.mock_map_target_disease.return_value = "some disease"
101-
102-
n_rows_processed = process_csv_to_fhir(message_body)
103-
self.assertEqual(n_rows_processed, n_rows)
104-
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
105-
self.mock_logger_warning.assert_not_called()
106-
self.mock_logger_error.assert_not_called()
87+
with patch("logging_decorator.file_level_validation_logging_decorator", lambda f: f):
88+
from batch_processor import process_csv_to_fhir
89+
n_rows = 500
90+
data = self.create_test_data_from_file("test-batch-data.csv")
91+
data = self.expand_test_data(data, n_rows)
92+
ret1 = {"Body": BytesIO(b"".join(data))}
93+
ret2 = {"Body": BytesIO(b"".join(data))}
94+
self.mock_s3_get_object.side_effect = [ret1, ret2]
95+
self.mock_map_target_disease.return_value = "some disease"
96+
97+
message_body = {
98+
"vaccine_type": "vax-type-1",
99+
"supplier": "test-supplier",
100+
}
101+
self.mock_map_target_disease.return_value = "some disease"
102+
103+
n_rows_processed = process_csv_to_fhir(message_body)
104+
self.assertEqual(n_rows_processed, n_rows)
105+
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
106+
self.mock_logger_warning.assert_not_called()
107+
self.mock_logger_error.assert_not_called()
107108

108109
def test_process_cp1252_small_file(self):
109110
""" Test processing a small file with cp1252 encoding """
110-
data = self.create_test_data_from_file("test-batch-data-cp1252.csv")
111-
data = [line if line.endswith(b"\n") else line + b"\n" for line in data]
112-
n_rows = len(data) - 1 # Exclude header
113-
114-
ret1 = {"Body": BytesIO(b"".join(data))}
115-
ret2 = {"Body": BytesIO(b"".join(data))}
116-
self.mock_s3_get_object.side_effect = [ret1, ret2]
117-
self.mock_map_target_disease.return_value = "some disease"
118-
119-
message_body = {
120-
"vaccine_type": "vax-type-1",
121-
"supplier": "test-supplier",
122-
}
123-
124-
self.mock_map_target_disease.return_value = "some disease"
125-
126-
n_rows_processed = process_csv_to_fhir(message_body)
127-
self.assertEqual(n_rows_processed, n_rows)
128-
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
129-
self.mock_logger_warning.assert_called()
130-
warning_call_args = self.mock_logger_warning.call_args[0][0]
131-
self.assertTrue(warning_call_args.startswith("Invalid Encoding detected in process_csv_to_fhir"))
111+
with patch("logging_decorator.file_level_validation_logging_decorator", lambda f: f):
112+
from batch_processor import process_csv_to_fhir
113+
data = self.create_test_data_from_file("test-batch-data-cp1252.csv")
114+
data = [line if line.endswith(b"\n") else line + b"\n" for line in data]
115+
n_rows = len(data) - 1 # Exclude header
116+
117+
ret1 = {"Body": BytesIO(b"".join(data))}
118+
ret2 = {"Body": BytesIO(b"".join(data))}
119+
self.mock_s3_get_object.side_effect = [ret1, ret2]
120+
self.mock_map_target_disease.return_value = "some disease"
121+
122+
message_body = {
123+
"vaccine_type": "vax-type-1",
124+
"supplier": "test-supplier",
125+
}
126+
127+
self.mock_map_target_disease.return_value = "some disease"
128+
129+
n_rows_processed = process_csv_to_fhir(message_body)
130+
self.assertEqual(n_rows_processed, n_rows)
131+
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
132+
self.mock_logger_warning.assert_called()
133+
warning_call_args = self.mock_logger_warning.call_args[0][0]
134+
self.assertTrue(warning_call_args.startswith("Invalid Encoding detected in process_csv_to_fhir"))
132135

133136
def test_process_utf8_small_file(self):
134137
""" Test processing a small file with cp1252 encoding """
135-
data = self.create_test_data_from_file("test-batch-data.csv")
136-
data = [line if line.endswith(b"\n") else line + b"\n" for line in data]
137-
n_rows = len(data) - 1 # Exclude header
138-
139-
ret1 = {"Body": BytesIO(b"".join(data))}
140-
ret2 = {"Body": BytesIO(b"".join(data))}
141-
self.mock_s3_get_object.side_effect = [ret1, ret2]
142-
self.mock_map_target_disease.return_value = "some disease"
143-
144-
message_body = {
145-
"vaccine_type": "vax-type-1",
146-
"supplier": "test-supplier",
147-
}
148-
self.mock_map_target_disease.return_value = "some disease"
149-
150-
n_rows_processed = process_csv_to_fhir(message_body)
151-
self.assertEqual(n_rows_processed, n_rows)
152-
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
153-
self.mock_logger_warning.assert_not_called()
154-
self.mock_logger_error.assert_not_called()
138+
with patch("logging_decorator.file_level_validation_logging_decorator", lambda f: f):
139+
from batch_processor import process_csv_to_fhir
140+
data = self.create_test_data_from_file("test-batch-data.csv")
141+
data = [line if line.endswith(b"\n") else line + b"\n" for line in data]
142+
n_rows = len(data) - 1 # Exclude header
143+
144+
ret1 = {"Body": BytesIO(b"".join(data))}
145+
ret2 = {"Body": BytesIO(b"".join(data))}
146+
self.mock_s3_get_object.side_effect = [ret1, ret2]
147+
self.mock_map_target_disease.return_value = "some disease"
148+
149+
message_body = {
150+
"vaccine_type": "vax-type-1",
151+
"supplier": "test-supplier",
152+
}
153+
self.mock_map_target_disease.return_value = "some disease"
154+
155+
n_rows_processed = process_csv_to_fhir(message_body)
156+
self.assertEqual(n_rows_processed, n_rows)
157+
self.assertEqual(self.mock_send_to_kinesis.call_count, n_rows)
158+
self.mock_logger_warning.assert_not_called()
159+
self.mock_logger_error.assert_not_called()
155160

156161
def test_fix_cp1252(self):
157162
# create a cp1252 string that contains an accented E
@@ -176,5 +181,3 @@ def dict_decode(input_dict: dict, encoding: str) -> dict:
176181
else:
177182
decoded_dict[key] = value
178183
return decoded_dict
179-
180-
# @TODO TEST to check correct number of messages sent to firehose with encode error

0 commit comments

Comments
 (0)