Skip to content

Commit 90cfd44

Browse files
authored
VED-948: Add completeness checker (#1073)
1 parent 05011b0 commit 90cfd44

File tree

11 files changed

+329
-84
lines changed

11 files changed

+329
-84
lines changed

lambdas/filenameprocessor/src/file_name_processor.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
(ODS code has multiple lengths)
77
"""
88

9-
import argparse
109
from uuid import uuid4
1110

1211
from audit_table import upsert_audit_table
@@ -264,7 +263,6 @@ def handle_extended_attributes_file(
264263
FileStatus.PROCESSING,
265264
)
266265

267-
# TODO: agree the prefix with DPS
268266
dest_file_key = f"{DPS_DESTINATION_PREFIX}/{file_key}"
269267
copy_file_to_external_bucket(
270268
bucket_name,
@@ -339,18 +337,3 @@ def lambda_handler(event: dict, context) -> None: # pylint: disable=unused-argu
339337
handle_record(record)
340338

341339
logger.info("Filename processor lambda task completed")
342-
343-
344-
def run_local():
345-
parser = argparse.ArgumentParser("file_name_processor")
346-
parser.add_argument("--bucket", required=True, help="Bucket name.", type=str)
347-
parser.add_argument("--key", required=True, help="Object key.", type=str)
348-
args = parser.parse_args()
349-
350-
event = {"Records": [{"s3": {"bucket": {"name": args.bucket}, "object": {"key": args.key}}}]}
351-
print(event)
352-
print(lambda_handler(event=event, context={}))
353-
354-
355-
if __name__ == "__main__":
356-
run_local()

lambdas/filenameprocessor/tests/test_lambda_handler.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Tests for lambda_handler"""
22

33
import json
4-
import sys
54
from contextlib import ExitStack
65
from copy import deepcopy
76
from json import loads as json_loads
@@ -778,37 +777,3 @@ def test_unexpected_bucket_name_and_filename_validation_fails(self):
778777
self.assertIn("Unable to process file", args[0])
779778
self.assertIn(invalid_file_key, args)
780779
self.assertIn("unknown-bucket", args)
781-
782-
783-
class TestMainEntryPoint(TestCase):
784-
def test_run_local_constructs_event_and_calls_lambda_handler(self):
785-
test_args = [
786-
"file_name_processor.py",
787-
"--bucket",
788-
"test-bucket",
789-
"--key",
790-
"some/path/file.csv",
791-
]
792-
793-
expected_event = {
794-
"Records": [
795-
{
796-
"s3": {
797-
"bucket": {"name": "test-bucket"},
798-
"object": {"key": "some/path/file.csv"},
799-
}
800-
}
801-
]
802-
}
803-
804-
with (
805-
patch.object(sys, "argv", test_args),
806-
patch("file_name_processor.lambda_handler") as mock_lambda_handler,
807-
patch("file_name_processor.print") as mock_print,
808-
):
809-
import file_name_processor
810-
811-
file_name_processor.run_local()
812-
813-
mock_lambda_handler.assert_called_once_with(event=expected_event, context={})
814-
mock_print.assert_called()
Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from dataclasses import dataclass
22

3-
from common.data_quality.completeness import DataQualityCompletenessChecker
3+
from common.data_quality.completeness import DataQualityCompletenessChecker, MissingFields
44
from common.data_quality.validator import DataQualityValidator
55
from common.models.fhir_converter.converter import Converter
6-
from common.models.fhir_converter.mappings import ActionFlag
76

87

98
@dataclass
109
class DataQualityOutput:
11-
incomplete_fields: dict[str, list[str]]
10+
missing_fields: MissingFields
1211
invalid_fields: list[str]
1312
timeliness: dict[str, int]
1413

@@ -18,33 +17,31 @@ class DataQualityChecker:
1817

1918
def __init__(
2019
self,
21-
immunisation: dict,
22-
action_flag: ActionFlag,
2320
completeness_checker: DataQualityCompletenessChecker,
24-
data_quality_validator: DataQualityValidator,
2521
is_batch_csv: bool,
2622
):
27-
self.immunisation = immunisation
28-
self.fhir_converter = Converter(fhir_data=immunisation, action_flag=action_flag)
2923
self.completeness_checker = completeness_checker
30-
self.data_quality_validator = data_quality_validator
3124
self.is_batch_csv = is_batch_csv
3225

33-
def run_checks(self) -> DataQualityOutput:
26+
def run_checks(self, immunisation: dict) -> DataQualityOutput:
27+
data_quality_validator = DataQualityValidator()
28+
3429
if not self.is_batch_csv:
35-
self.immunisation = self.fhir_converter.run_conversion()
30+
immunisation = Converter(fhir_data=immunisation).run_conversion()
3631

3732
return DataQualityOutput(
38-
incomplete_fields=self._check_completeness(),
39-
invalid_fields=self._check_validity(),
40-
timeliness=self._check_timeliness(),
33+
missing_fields=self._check_completeness(immunisation),
34+
invalid_fields=self._check_validity(immunisation, data_quality_validator),
35+
timeliness=self._check_timeliness(immunisation),
4136
)
4237

43-
def _check_completeness(self) -> dict[str, list[str]]:
44-
pass
38+
def _check_completeness(self, immunisation: dict) -> MissingFields:
39+
return self.completeness_checker.run_checks(immunisation)
4540

46-
def _check_validity(self) -> list[str]:
41+
@staticmethod
42+
def _check_validity(immunisation: dict, data_quality_validator: DataQualityValidator) -> list[str]:
4743
pass
4844

49-
def _check_timeliness(self) -> dict[str, int]:
45+
@staticmethod
46+
def _check_timeliness(immunisation: dict) -> dict[str, int]:
5047
pass
Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,68 @@
1+
from dataclasses import dataclass
2+
3+
required_fields = [
4+
"NHS_NUMBER",
5+
"VACCINATION_PROCEDURE_TERM",
6+
"DOSE_SEQUENCE",
7+
"VACCINE_PRODUCT_CODE",
8+
"VACCINE_PRODUCT_TERM",
9+
"VACCINE_MANUFACTURER",
10+
"BATCH_NUMBER",
11+
"EXPIRY_DATE",
12+
"SITE_OF_VACCINATION_CODE",
13+
"SITE_OF_VACCINATION_TERM",
14+
"ROUTE_OF_VACCINATION_CODE",
15+
"ROUTE_OF_VACCINATION_TERM",
16+
"DOSE_AMOUNT",
17+
"DOSE_UNIT_CODE",
18+
"DOSE_UNIT_TERM",
19+
"INDICATION_CODE",
20+
]
21+
mandatory_fields = [
22+
"PERSON_FORENAME",
23+
"PERSON_SURNAME",
24+
"PERSON_DOB",
25+
"PERSON_GENDER_CODE",
26+
"PERSON_POSTCODE",
27+
"DATE_AND_TIME",
28+
"SITE_CODE",
29+
"SITE_CODE_TYPE_URI",
30+
"UNIQUE_ID",
31+
"UNIQUE_ID_URI",
32+
"ACTION_FLAG",
33+
"RECORDED_DATE",
34+
"PRIMARY_SOURCE",
35+
"VACCINATION_PROCEDURE_CODE",
36+
"LOCATION_CODE",
37+
"LOCATION_CODE_TYPE_URI",
38+
]
39+
optional_fields = [
40+
"PERFORMING_PROFESSIONAL_FORENAME",
41+
"PERFORMING_PROFESSIONAL_SURNAME",
42+
]
43+
44+
45+
@dataclass
46+
class MissingFields:
47+
required_fields: list[str]
48+
mandatory_fields: list[str]
49+
optional_fields: list[str]
50+
51+
152
class DataQualityCompletenessChecker:
2-
def check_completeness(self, immunisation: dict) -> dict[str, list[str]]:
3-
pass
53+
def run_checks(self, immunisation: dict) -> MissingFields:
54+
return MissingFields(
55+
required_fields=self._get_missing_fields(immunisation, required_fields),
56+
mandatory_fields=self._get_missing_fields(immunisation, mandatory_fields),
57+
optional_fields=self._get_missing_fields(immunisation, optional_fields),
58+
)
59+
60+
@staticmethod
61+
def _get_missing_fields(immunisation: dict, list_of_fields: list[str]) -> list[str]:
62+
missing_fields = []
63+
for field in list_of_fields:
64+
exists = immunisation.get(field)
65+
if not exists:
66+
missing_fields.append(field)
67+
68+
return missing_fields

lambdas/shared/tests/test_common/data_quality/checker.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

lambdas/shared/tests/test_common/data_quality/completeness.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

0 commit comments

Comments
 (0)