Skip to content

Commit c53041a

Browse files
committed
bump validation code
1 parent deb2342 commit c53041a

File tree

6 files changed

+165
-176
lines changed

6 files changed

+165
-176
lines changed

lambdas/shared/src/common/validator/expression_checker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class ExpressionChecker:
1515
"""
1616

1717
def __init__(self, data_parser, summarise: bool, report_unexpected_exception: bool):
18-
self.data_parser = data_parser # FHIR data parser for additional functions
18+
self.data_parser = data_parser
1919
self.data_look_up = LookUpData()
2020
self.key_data = KeyData()
2121
self.summarise = summarise

lambdas/shared/src/common/validator/parsers/schema_parser.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
>>> parser = SchemaParser()
1515
>>> parser.parse_schema(schema)
1616
>>> parser.expression_count()
17-
2
1817
>>> parser.get_expression(0)
1918
{'expression': 'LOOKUP', 'field': 'route'}
2019
"""

lambdas/shared/src/common/validator/validator.py

Lines changed: 74 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,8 @@
1515

1616

1717
class Validator:
18-
def __init__(self, schema_file="", data_type: DataType = None, filepath=""):
19-
self.filepath = filepath
20-
self.json_data = {}
21-
self.fhir_data = {}
18+
def __init__(self, schema_file=""):
2219
self.schema_file = schema_file
23-
self.csv_row = ""
24-
self.csv_header = ""
25-
self.data_type = data_type
26-
self.data_parser = ""
27-
self.error_records: list[ErrorReport] = []
2820

2921
# Retrieve all the Parsers,
3022
def _get_csv_parser(self, filepath: str) -> CSVParser:
@@ -50,6 +42,7 @@ def _get_schema_parser(self, schemafile: str) -> SchemaParser:
5042
# Collect and add error record to the list
5143
def _add_error_record(
5244
self,
45+
error_records: list[ErrorReport],
5346
error_record: ErrorReport,
5447
expression_error_group: str,
5548
expression_name: str,
@@ -61,27 +54,27 @@ def _add_error_record(
6154
error_record.name = expression_name
6255
error_record.id = expression_id
6356
error_record.error_level = error_level
64-
self.error_records.append(error_record)
57+
error_records.append(error_record)
6558

6659
# Function to help identify a parent failure in the error list
67-
def _check_error_record_for_fail(self, expression_identifier: str) -> bool:
68-
for error_record in self.error_records:
60+
def _check_error_record_for_fail(self, expression_identifier: str, error_records: list[ErrorReport]) -> bool:
61+
for error_record in error_records:
6962
if error_record.id == expression_identifier:
7063
return True
7164
return False
7265

7366
# validate a single expression against the data file
7467
def _validate_expression(
75-
self, expression_validator: ExpressionChecker, expression: dict, inc_header_in_row_count: bool
68+
self,
69+
expression_validator: ExpressionChecker,
70+
expression: dict,
71+
data_parser,
72+
error_records: list[ErrorReport],
73+
inc_header_in_row_count: bool,
74+
is_csv: bool,
7675
) -> ErrorReport | int:
77-
row = 1
78-
if inc_header_in_row_count:
79-
row = 2
80-
81-
if self.is_csv:
82-
expression_fieldname = expression["fieldNameFlat"]
83-
else:
84-
expression_fieldname = expression["fieldNameFHIR"]
76+
row = 2 if inc_header_in_row_count else 1
77+
expression_fieldname = expression["fieldNameFlat"] if is_csv else expression["fieldNameFHIR"]
8578

8679
expression_id = expression["expressionId"]
8780
error_level = expression["errorLevel"]
@@ -93,25 +86,25 @@ def _validate_expression(
9386
# Check to see if the expression has a parent, if so did the parent validate
9487
if "parentExpression" in expression:
9588
parent_expression = expression["parentExpression"]
96-
if self._check_error_record_for_fail(parent_expression):
89+
if self._check_error_record_for_fail(parent_expression, error_records):
9790
error_record = ErrorReport(
9891
code=ExceptionLevels.PARENT_FAILED,
9992
message=MESSAGES[ExceptionLevels.PARENT_FAILED] + ", Parent ID: " + parent_expression,
10093
)
10194
self._add_error_record(error_record, expression_error_group, expression_name, expression_id, error_level)
102-
return error_record
95+
error_records.append(error_record)
96+
return
10397

10498
try:
105-
expression_values = self.data_parser.get_key_value(expression_fieldname)
99+
expression_values = data_parser.get_key_value(expression_fieldname)
106100
except Exception as e:
107101
message = f"Data get values Unexpected exception [{e.__class__.__name__}]: {e}"
108-
error_report = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
102+
error_record = ErrorReport(code=ExceptionLevels.PARSING_ERROR, message=message)
109103
# original code had self.CriticalErrorLevel. Replaced with error_level
110-
self._add_error_record(error_report, expression_error_group, expression_name, expression_id, error_level)
111-
return error_report
104+
self._add_error_record(error_record, expression_error_group, expression_name, expression_id, error_level)
105+
return error_records.append(error_record)
112106

113107
for value in expression_values:
114-
error_record: ErrorReport | None = None
115108
try:
116109
error_record = expression_validator.validate_expression(
117110
expression_type, expression_rule, expression_fieldname, value, row
@@ -132,20 +125,28 @@ def validate_fhir(
132125
report_unexpected_exception: bool = True,
133126
inc_header_in_row_count: bool = True,
134127
) -> list[ErrorReport]:
135-
self.data_type = DataType.FHIR
136-
self.fhir_data = fhir_data
137-
return self.run_validation(summarise, report_unexpected_exception, inc_header_in_row_count)
128+
return self.run_validation(
129+
data_type=DataType.FHIR,
130+
fhir_data=fhir_data,
131+
summarise=summarise,
132+
report_unexpected_exception=report_unexpected_exception,
133+
inc_header_in_row_count=inc_header_in_row_count,
134+
)
138135

139136
def validate_csv(
140137
self,
141-
filepath: str,
138+
batch_filepath: str,
142139
summarise: bool = False,
143140
report_unexpected_exception: bool = True,
144141
inc_header_in_row_count: bool = True,
145142
) -> list[ErrorReport]:
146-
self.data_type = DataType.CSV
147-
self.filepath = filepath
148-
return self.run_validation(summarise, report_unexpected_exception, inc_header_in_row_count)
143+
return self.run_validation(
144+
data_type=DataType.CSV,
145+
batch_filepath=batch_filepath,
146+
summarise=summarise,
147+
report_unexpected_exception=report_unexpected_exception,
148+
inc_header_in_row_count=inc_header_in_row_count,
149+
)
149150

150151
def validate_csv_row(
151152
self,
@@ -155,72 +156,62 @@ def validate_csv_row(
155156
report_unexpected_exception: bool = True,
156157
inc_header_in_row_count: bool = True,
157158
) -> list[ErrorReport]:
158-
self.data_type = DataType.CSVROW
159-
self.csv_row = csv_row
160-
self.csv_header = csv_header
161-
return self.run_validation(summarise, report_unexpected_exception, inc_header_in_row_count)
159+
return self.run_validation(
160+
data_type=DataType.CSVROW,
161+
csv_row=csv_row,
162+
csv_header=csv_header,
163+
summarise=summarise,
164+
report_unexpected_exception=report_unexpected_exception,
165+
inc_header_in_row_count=inc_header_in_row_count,
166+
)
162167

163168
# run the validation against the data
164169
def run_validation(
165-
self, summarise=False, report_unexpected_exception=True, inc_header_in_row_count=True
170+
self,
171+
data_type: DataType,
172+
fhir_data: dict = None,
173+
batch_filepath: str = None,
174+
csv_row: str = None,
175+
csv_header: list[str] = None,
176+
summarise=False,
177+
report_unexpected_exception=True,
178+
inc_header_in_row_count=True,
166179
) -> list[ErrorReport]:
167-
try:
168-
self.error_records.clear()
180+
error_records: list[ErrorReport] = []
169181

170-
match self.data_type:
182+
try:
183+
match data_type:
171184
case DataType.FHIR:
172-
self.data_parser = self._get_fhir_parser(self.fhir_data)
173-
self.is_csv = False
185+
data_parser = self._get_fhir_parser(fhir_data)
186+
is_csv = False
174187
case DataType.CSV:
175-
self.data_parser = self._get_csv_parser(self.filepath)
176-
self.is_csv = True
188+
data_parser = self._get_csv_parser(batch_filepath)
189+
is_csv = True
177190
case DataType.CSVROW:
178-
self.data_parser = self._get_csv_line_parser(self.csv_row, self.csv_header)
179-
self.is_csv = True
191+
data_parser = self._get_csv_line_parser(csv_row, csv_header)
192+
is_csv = True
180193

181194
except Exception as e:
182195
if report_unexpected_exception:
183196
message = f"Data Parser Unexpected exception [{e.__class__.__name__}]: {e}"
184197
return [ErrorReport(code=0, message=message)]
185198

186-
try:
187-
schema_parser = self._get_schema_parser(self.schema_file)
188-
except Exception as e:
189-
if report_unexpected_exception:
190-
message = f"Schema Parser Unexpected exception [{e.__class__.__name__}]: {e}"
191-
return [ErrorReport(code=0, message=message)]
192-
193-
try:
194-
expression_validator = ExpressionChecker(self.data_parser, summarise, report_unexpected_exception)
195-
except Exception as e:
196-
if report_unexpected_exception:
197-
message = f"Expression Checker Unexpected exception [{e.__class__.__name__}]: {e}"
198-
return [ErrorReport(code=0, message=message)]
199-
200-
# get list of expressions
201-
try:
202-
expressions = schema_parser.get_expressions()
203-
except Exception as e:
204-
if report_unexpected_exception:
205-
message = f"Expression Getter Unexpected exception [{e.__class__.__name__}]: {e}"
206-
return [ErrorReport(code=0, message=message)]
199+
schema_parser = self._get_schema_parser(self.schema_file)
200+
expression_validator = ExpressionChecker(data_parser, summarise, report_unexpected_exception)
201+
expressions = schema_parser.get_expressions()
207202

208203
for expression in expressions:
209-
self._validate_expression(expression_validator, expression, inc_header_in_row_count)
204+
self._validate_expression(
205+
expression_validator, expression, data_parser, error_records, inc_header_in_row_count, is_csv
206+
)
210207

211-
return self.error_records
208+
return error_records
212209

213210
# Build the error Report
214-
def build_error_report(self, event_id: str) -> dict:
215-
occurrence_date_time = self.data_parser.get_fhir_value("occurrenceDateTime")
211+
def build_error_report(self, event_id: str, data_parser, error_records: list[ErrorReport]) -> dict:
212+
occurrence_date_time = data_parser.get_fhir_value("occurrenceDateTime")
216213
dq_reporter = DQReporter()
217-
dq_report = dq_reporter.generate_error_report(event_id, occurrence_date_time, self.error_records)
218-
219-
return dq_report
214+
return dq_reporter.generate_error_report(event_id, occurrence_date_time, error_records)
220215

221-
# Check all errors to see if we have a critical error that would fail the validation
222-
def has_validation_failed(self) -> bool:
223-
for error_record in self.error_records:
224-
if error_record.error_level == ErrorLevels.CRITICAL_ERROR:
225-
return True
226-
return False
216+
def has_validation_failed(self, error_records: list[ErrorReport]) -> bool:
217+
return any(er.error_level == ErrorLevels.CRITICAL_ERROR for er in error_records)

lambdas/shared/tests/test_common/validator/test_application_fhir.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
from common.validator.validator import Validator
77
from tests.test_common.validator.testing_utils.csv_fhir_utils import parse_test_file
88

9-
# TODO this needs success and fail cases
10-
119

1210
class TestApplication(unittest.TestCase):
1311
def setUp(self):

lambdas/shared/tests/test_common/validator/test_validation_csv_row.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@ class TestValidator(unittest.TestCase):
1717

1818
def setUp(self):
1919
self.validator = Validator(parse_test_file(schemaFilePath))
20+
self.maxDiff = None
2021

2122
def test_run_validation_on_valid_csv_row(self):
2223
valid_rows = build_row(CSV_HEADER, CSV_VALUES)
2324
error_report = self.validator.validate_csv_row(valid_rows, CSV_HEADER, True, True, True)
2425
print(f"Error Report: {error_report}")
25-
self.maxDiff = None
2626
self.assertEqual(error_report, [])
2727

2828
def test_run_validation_on_invalid_csv_row(self):
2929
invalid_rows = build_row(CSV_HEADER, {**CSV_VALUES, "NHS_NUMBER": ""})
3030
error_report = self.validator.validate_csv_row(invalid_rows, CSV_HEADER, True, True, True)
31+
print(f"Error Report: {error_report}")
3132
self.assertTrue(len(error_report) > 0)
3233
messages = [(e.name, e.message, e.details) for e in error_report]
3334
expected_error = (

0 commit comments

Comments
 (0)