Skip to content

Commit f3a0df9

Browse files
committed
refactor csv incoming data to dict
1 parent 0eb7541 commit f3a0df9

File tree

5 files changed

+18
-29
lines changed

5 files changed

+18
-29
lines changed

lambdas/shared/src/common/validator/parsers/csv_line_parser.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,15 @@
1313
{'nhs_number': '9011011', 'name': 'Tom', 'age': '32'}
1414
"""
1515

16-
import csv
17-
1816

1917
class CSVLineParser:
2018
# parser variables
2119
def __init__(self):
2220
self.csv_file_data: dict[str, str] = {}
2321

2422
# parse the CSV into a Dictionary
25-
def parse_csv_line(self, csv_row: str, csv_header: str) -> None:
26-
# create a key value mapping
27-
keys = list(csv.reader([csv_header]))[0]
28-
values = list(csv.reader([csv_row]))[0]
29-
self.csv_file_data = dict(zip(keys, values, strict=False))
23+
def parse_csv_line(self, csv_row: dict[str, str]) -> None:
24+
self.csv_file_data = csv_row
3025

3126
# Retrieves the value of a specific column name as a list.
3227
def get_key_value(self, field_name: str) -> list[str]:

lambdas/shared/src/common/validator/parsers/paser_interface.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ def extract_field_values(self, field_path) -> list[str]:
3232

3333

3434
class BatchInterface(PaserInterface):
35-
def __init__(self, csv_row: str, csv_header: str):
35+
def __init__(self, csv_row: dict[str, str]):
3636
self.csv_line_parser = CSVLineParser()
37-
self.csv_line_parser.parse_csv_line(csv_row, csv_header)
37+
self.csv_line_parser.parse_csv_line(csv_row)
3838

3939
def get_data_format(self) -> str:
4040
return "batch"

lambdas/shared/src/common/validator/validator.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,14 @@ def validate_fhir(
9595

9696
def validate_csv_row(
9797
self,
98-
csv_row: str,
99-
csv_header: list[str],
98+
csv_row: dict[str, str],
10099
summarise: bool = False,
101100
report_unexpected_exception: bool = True,
102101
inc_header_in_row_count: bool = True,
103102
) -> list[ErrorReport]:
104103
return self.run_validation(
105104
data_type=DataType.CSVROW,
106105
csv_row=csv_row,
107-
csv_header=csv_header,
108106
summarise=summarise,
109107
report_unexpected_exception=report_unexpected_exception,
110108
inc_header_in_row_count=inc_header_in_row_count,
@@ -115,8 +113,7 @@ def run_validation(
115113
self,
116114
data_type: DataType,
117115
fhir_data: dict = None,
118-
csv_row: str = None,
119-
csv_header: list[str] = None,
116+
csv_row: dict[str, str] = None,
120117
summarise=False,
121118
report_unexpected_exception=True,
122119
inc_header_in_row_count=True,
@@ -128,7 +125,7 @@ def run_validation(
128125
case DataType.FHIR:
129126
data_parser = FHIRInterface(fhir_data)
130127
case DataType.CSVROW:
131-
data_parser = BatchInterface(csv_row, csv_header)
128+
data_parser = BatchInterface(csv_row)
132129

133130
except Exception as e:
134131
if report_unexpected_exception:

lambdas/shared/tests/test_common/validator/test_csv_line_parser.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,13 @@
77
import unittest
88

99
from common.validator.parsers.csv_line_parser import CSVLineParser
10-
from test_common.validator.testing_utils.constants import CSV_HEADER, CSV_VALUES
11-
from tests.test_common.validator.testing_utils.csv_fhir_utils import build_row
10+
from test_common.validator.testing_utils.constants import CSV_VALUES
1211

1312

1413
class TestCSVLineParser(unittest.TestCase):
1514
def test_parse_normal(self):
1615
csv_parsers = CSVLineParser()
17-
csv_rows = build_row(CSV_HEADER, CSV_VALUES)
18-
csv_parsers.parse_csv_line(csv_rows, CSV_HEADER)
16+
csv_parsers.parse_csv_line(CSV_VALUES)
1917
self.assertEqual(csv_parsers.csv_file_data, CSV_VALUES)
2018
self.assertEqual(csv_parsers.get_key_value("NHS_NUMBER"), ["9000000009"])
2119

@@ -24,10 +22,10 @@ def test_extra_values_ignored(self):
2422
Ignore values that do not have a corresponding key
2523
"""
2624
csv_parsers = CSVLineParser()
27-
csv_parsers.parse_csv_line("9000000009,Alex,Trent", "NHS_NUMBER,PERSON_FORENAME,PERSON_SURNAME")
25+
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"})
2826
self.assertEqual(
2927
csv_parsers.csv_file_data,
30-
{"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "PERSON_SURNAME": "Trent"},
28+
{"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex", "": "Trent"},
3129
)
3230
self.assertEqual(csv_parsers.get_key_value("PERSON_FORENAME"), ["Alex"])
3331

@@ -37,7 +35,7 @@ def test_fewer_values_than_keys(self):
3735
raises an error when accessing key without value
3836
"""
3937
csv_parsers = CSVLineParser()
40-
csv_parsers.parse_csv_line("9000000009,Alex", "NHS_NUMBER,PERSON_FORENAME,PERSON_SURNAME")
38+
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
4139
self.assertIn("NHS_NUMBER", csv_parsers.csv_file_data)
4240
self.assertIn("PERSON_FORENAME", csv_parsers.csv_file_data)
4341
self.assertNotIn("PERSON_SURNAME", csv_parsers.csv_file_data)
@@ -48,7 +46,7 @@ def test_get_missing_key_raises(self):
4846
"""
4947
Test that accessing a non-existent key raises KeyError"""
5048
csv_parsers = CSVLineParser()
51-
csv_parsers.parse_csv_line("9000000009,Alex", "NHS_NUMBER,PERSON_FORENAME,PERSON_SURNAME")
49+
csv_parsers.parse_csv_line({"NHS_NUMBER": "9000000009", "PERSON_FORENAME": "Alex"})
5250
with self.assertRaises(KeyError):
5351
_ = csv_parsers.get_key_value("VACCINE_TYPE")
5452

lambdas/shared/tests/test_common/validator/test_validation_csv_row.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from common.validator.error_report.error_reporter import build_error_report
88
from common.validator.validator import Validator
9-
from test_common.validator.testing_utils.constants import CSV_HEADER, CSV_VALUES
10-
from tests.test_common.validator.testing_utils.csv_fhir_utils import build_row, parse_test_file
9+
from test_common.validator.testing_utils.constants import CSV_VALUES
10+
from tests.test_common.validator.testing_utils.csv_fhir_utils import parse_test_file
1111

1212
schema_data_folder = Path(__file__).parent / "test_schemas"
1313
schemaFilePath = schema_data_folder / "test_schema.json"
@@ -23,13 +23,12 @@ def setUp(self):
2323
self.maxDiff = None
2424

2525
def test_run_validation_on_valid_csv_row(self):
26-
valid_rows = build_row(CSV_HEADER, CSV_VALUES)
27-
error_list = self.validator.validate_csv_row(valid_rows, CSV_HEADER, True, True, True)
26+
error_list = self.validator.validate_csv_row(CSV_VALUES, True, True, True)
2827
self.assertEqual(error_list, [])
2928

3029
def test_run_validation_on_invalid_csv_row(self):
31-
invalid_rows = build_row(CSV_HEADER, {**CSV_VALUES, "NHS_NUMBER": ""})
32-
error_list = self.validator.validate_csv_row(invalid_rows, CSV_HEADER, True, True, True)
30+
invalid_rows = {**CSV_VALUES, "NHS_NUMBER": ""}
31+
error_list = self.validator.validate_csv_row(invalid_rows, True, True, True)
3332

3433
self.assertTrue(len(error_list) > 0)
3534
messages = [(e.name, e.message, e.details) for e in error_list]

0 commit comments

Comments
 (0)