Skip to content

Commit d68abad

Browse files
edhall-nhsdlzhry2nhs
authored andcommitted
VED-948: Add completeness checker (#1073)
1 parent 1a5601a commit d68abad

File tree

9 files changed

+329
-32
lines changed

9 files changed

+329
-32
lines changed
Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from dataclasses import dataclass
22

3-
from common.data_quality.completeness import DataQualityCompletenessChecker
3+
from common.data_quality.completeness import DataQualityCompletenessChecker, MissingFields
44
from common.data_quality.validator import DataQualityValidator
55
from common.models.fhir_converter.converter import Converter
6-
from common.models.fhir_converter.mappings import ActionFlag
76

87

98
@dataclass
109
class DataQualityOutput:
11-
incomplete_fields: dict[str, list[str]]
10+
missing_fields: MissingFields
1211
invalid_fields: list[str]
1312
timeliness: dict[str, int]
1413

@@ -18,33 +17,31 @@ class DataQualityChecker:
1817

1918
def __init__(
2019
self,
21-
immunisation: dict,
22-
action_flag: ActionFlag,
2320
completeness_checker: DataQualityCompletenessChecker,
24-
data_quality_validator: DataQualityValidator,
2521
is_batch_csv: bool,
2622
):
27-
self.immunisation = immunisation
28-
self.fhir_converter = Converter(fhir_data=immunisation, action_flag=action_flag)
2923
self.completeness_checker = completeness_checker
30-
self.data_quality_validator = data_quality_validator
3124
self.is_batch_csv = is_batch_csv
3225

33-
def run_checks(self) -> DataQualityOutput:
26+
def run_checks(self, immunisation: dict) -> DataQualityOutput:
27+
data_quality_validator = DataQualityValidator()
28+
3429
if not self.is_batch_csv:
35-
self.immunisation = self.fhir_converter.run_conversion()
30+
immunisation = Converter(fhir_data=immunisation).run_conversion()
3631

3732
return DataQualityOutput(
38-
incomplete_fields=self._check_completeness(),
39-
invalid_fields=self._check_validity(),
40-
timeliness=self._check_timeliness(),
33+
missing_fields=self._check_completeness(immunisation),
34+
invalid_fields=self._check_validity(immunisation, data_quality_validator),
35+
timeliness=self._check_timeliness(immunisation),
4136
)
4237

43-
def _check_completeness(self) -> dict[str, list[str]]:
44-
pass
38+
def _check_completeness(self, immunisation: dict) -> MissingFields:
39+
return self.completeness_checker.run_checks(immunisation)
4540

46-
def _check_validity(self) -> list[str]:
41+
@staticmethod
42+
def _check_validity(immunisation: dict, data_quality_validator: DataQualityValidator) -> list[str]:
4743
pass
4844

49-
def _check_timeliness(self) -> dict[str, int]:
45+
@staticmethod
46+
def _check_timeliness(immunisation: dict) -> dict[str, int]:
5047
pass
Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,68 @@
1+
from dataclasses import dataclass
2+
3+
required_fields = [
4+
"NHS_NUMBER",
5+
"VACCINATION_PROCEDURE_TERM",
6+
"DOSE_SEQUENCE",
7+
"VACCINE_PRODUCT_CODE",
8+
"VACCINE_PRODUCT_TERM",
9+
"VACCINE_MANUFACTURER",
10+
"BATCH_NUMBER",
11+
"EXPIRY_DATE",
12+
"SITE_OF_VACCINATION_CODE",
13+
"SITE_OF_VACCINATION_TERM",
14+
"ROUTE_OF_VACCINATION_CODE",
15+
"ROUTE_OF_VACCINATION_TERM",
16+
"DOSE_AMOUNT",
17+
"DOSE_UNIT_CODE",
18+
"DOSE_UNIT_TERM",
19+
"INDICATION_CODE",
20+
]
21+
mandatory_fields = [
22+
"PERSON_FORENAME",
23+
"PERSON_SURNAME",
24+
"PERSON_DOB",
25+
"PERSON_GENDER_CODE",
26+
"PERSON_POSTCODE",
27+
"DATE_AND_TIME",
28+
"SITE_CODE",
29+
"SITE_CODE_TYPE_URI",
30+
"UNIQUE_ID",
31+
"UNIQUE_ID_URI",
32+
"ACTION_FLAG",
33+
"RECORDED_DATE",
34+
"PRIMARY_SOURCE",
35+
"VACCINATION_PROCEDURE_CODE",
36+
"LOCATION_CODE",
37+
"LOCATION_CODE_TYPE_URI",
38+
]
39+
optional_fields = [
40+
"PERFORMING_PROFESSIONAL_FORENAME",
41+
"PERFORMING_PROFESSIONAL_SURNAME",
42+
]
43+
44+
45+
@dataclass
46+
class MissingFields:
47+
required_fields: list[str]
48+
mandatory_fields: list[str]
49+
optional_fields: list[str]
50+
51+
152
class DataQualityCompletenessChecker:
2-
def check_completeness(self, immunisation: dict) -> dict[str, list[str]]:
3-
pass
53+
def run_checks(self, immunisation: dict) -> MissingFields:
54+
return MissingFields(
55+
required_fields=self._get_missing_fields(immunisation, required_fields),
56+
mandatory_fields=self._get_missing_fields(immunisation, mandatory_fields),
57+
optional_fields=self._get_missing_fields(immunisation, optional_fields),
58+
)
59+
60+
@staticmethod
61+
def _get_missing_fields(immunisation: dict, list_of_fields: list[str]) -> list[str]:
62+
missing_fields = []
63+
for field in list_of_fields:
64+
exists = immunisation.get(field)
65+
if not exists:
66+
missing_fields.append(field)
67+
68+
return missing_fields

lambdas/shared/tests/test_common/data_quality/checker.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

lambdas/shared/tests/test_common/data_quality/completeness.py

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from decimal import Decimal
2+
3+
VALID_FHIR_IMMUNISATION = {
4+
"resourceType": "Immunization",
5+
"contained": [
6+
{
7+
"resourceType": "Practitioner",
8+
"id": "Pract1",
9+
"name": [{"family": "Nightingale", "given": ["Florence"]}],
10+
},
11+
{
12+
"resourceType": "Patient",
13+
"id": "Pat1",
14+
"identifier": [
15+
{
16+
"system": "https://fhir.nhs.uk/Id/nhs-number",
17+
"value": "9000000009",
18+
}
19+
],
20+
"name": [{"family": "Trailor", "given": ["Sam"]}],
21+
"gender": "unknown",
22+
"birthDate": "1965-02-28",
23+
"address": [{"postalCode": "EC1A 1BB"}],
24+
},
25+
],
26+
"extension": [
27+
{
28+
"url": "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-VaccinationProcedure",
29+
"valueCodeableConcept": {
30+
"coding": [
31+
{
32+
"system": "http://snomed.info/sct",
33+
"code": "13246814444444",
34+
"display": "Administration of first dose of severe acute respiratory syndrome coronavirus 2 vaccine (procedure)",
35+
"extension": [
36+
{
37+
"url": "https://fhir.hl7.org.uk/StructureDefinition/Extension-UKCore-CodingSCTDescDisplay",
38+
"valueString": "Test Value string 123456 COVID vaccination",
39+
},
40+
{
41+
"url": "http://hl7.org/fhir/StructureDefinition/coding-sctdescid",
42+
"valueId": "5306706018",
43+
},
44+
],
45+
}
46+
]
47+
},
48+
}
49+
],
50+
"identifier": [
51+
{
52+
"system": "https://supplierABC/identifiers/vacc",
53+
"value": "ACME-vacc123456",
54+
}
55+
],
56+
"status": "completed",
57+
"vaccineCode": {
58+
"coding": [
59+
{
60+
"system": "http://snomed.info/sct",
61+
"code": "39114911000001105",
62+
"display": "COVID-19 Vaccine Vaxzevria (ChAdOx1 S [recombinant]) not less than 2.5x100,000,000 infectious units/0.5ml dose suspension for injection multidose vials (AstraZeneca UK Ltd) (product)",
63+
}
64+
]
65+
},
66+
"patient": {"reference": "#Pat1"},
67+
"occurrenceDateTime": "2021-02-07T13:28:17+00:00",
68+
"recorded": "2021-02-07T13:28:17+00:00",
69+
"primarySource": True,
70+
"manufacturer": {"display": "AstraZeneca Ltd"},
71+
"location": {
72+
"type": "Location",
73+
"identifier": {
74+
"value": "EC1111",
75+
"system": "https://fhir.nhs.uk/Id/ods-organization-code",
76+
},
77+
},
78+
"lotNumber": "4120Z001",
79+
"expirationDate": "2021-07-02",
80+
"site": {
81+
"coding": [
82+
{
83+
"system": "http://snomed.info/sct",
84+
"code": "368208006",
85+
"display": "Left upper arm structure (body structure)",
86+
}
87+
]
88+
},
89+
"route": {
90+
"coding": [
91+
{
92+
"system": "http://snomed.info/sct",
93+
"code": "78421000",
94+
"display": "Intramuscular route (qualifier value)",
95+
}
96+
]
97+
},
98+
"doseQuantity": {
99+
"value": str(Decimal(0.5)),
100+
"unit": "milliliter",
101+
"system": "http://snomed.info/sct",
102+
"code": "ml",
103+
},
104+
"performer": [
105+
{"actor": {"reference": "#Pract1"}},
106+
{
107+
"actor": {
108+
"type": "Organization",
109+
"identifier": {
110+
"system": "https://fhir.nhs.uk/Id/ods-organization-code",
111+
"value": "B0C4P",
112+
},
113+
}
114+
},
115+
],
116+
"reasonCode": [{"coding": [{"code": "443684005", "system": "http://snomed.info/sct"}]}],
117+
"protocolApplied": [
118+
{
119+
"targetDisease": [
120+
{
121+
"coding": [
122+
{
123+
"system": "http://snomed.info/sct",
124+
"code": "840539006",
125+
"display": "Disease caused by severe acute respiratory syndrome coronavirus 2",
126+
}
127+
]
128+
}
129+
],
130+
"doseNumberPositiveInt": 1,
131+
}
132+
],
133+
}
134+
135+
VALID_BATCH_IMMUNISATION = {
136+
"NHS_NUMBER": "9000000009",
137+
"PERSON_FORENAME": "Sam",
138+
"PERSON_SURNAME": "Trailor",
139+
"PERSON_DOB": "19650228",
140+
"PERSON_GENDER_CODE": "0",
141+
"PERSON_POSTCODE": "EC1A 1BB",
142+
"DATE_AND_TIME": "20210207T13281700",
143+
"SITE_CODE": "B0C4P",
144+
"SITE_CODE_TYPE_URI": "https://fhir.nhs.uk/Id/ods-organization-code",
145+
"UNIQUE_ID": "ACME-vacc123456",
146+
"UNIQUE_ID_URI": "https://supplierABC/identifiers/vacc",
147+
"ACTION_FLAG": "UPDATE",
148+
"PERFORMING_PROFESSIONAL_FORENAME": "Florence",
149+
"PERFORMING_PROFESSIONAL_SURNAME": "Nightingale",
150+
"RECORDED_DATE": "20210207",
151+
"PRIMARY_SOURCE": "TRUE",
152+
"VACCINATION_PROCEDURE_CODE": "13246814444444",
153+
"VACCINATION_PROCEDURE_TERM": "Test Value string 123456 COVID vaccination",
154+
"DOSE_SEQUENCE": "1",
155+
"VACCINE_PRODUCT_CODE": "39114911000001105",
156+
"VACCINE_PRODUCT_TERM": "COVID-19 Vaccine Vaxzevria (ChAdOx1 S [recombinant]) not less than 2.5x100,000,000 infectious units/0.5ml dose suspension for injection multidose vials (AstraZeneca UK Ltd) (product)",
157+
"VACCINE_MANUFACTURER": "AstraZeneca Ltd",
158+
"BATCH_NUMBER": "4120Z001",
159+
"EXPIRY_DATE": "20210702",
160+
"SITE_OF_VACCINATION_CODE": "368208006",
161+
"SITE_OF_VACCINATION_TERM": "Left upper arm structure (body structure)",
162+
"ROUTE_OF_VACCINATION_CODE": "78421000",
163+
"ROUTE_OF_VACCINATION_TERM": "Intramuscular route (qualifier value)",
164+
"DOSE_AMOUNT": "0.5",
165+
"DOSE_UNIT_CODE": "ml",
166+
"DOSE_UNIT_TERM": "milliliter",
167+
"INDICATION_CODE": "443684005",
168+
"LOCATION_CODE": "EC1111",
169+
"LOCATION_CODE_TYPE_URI": "https://fhir.nhs.uk/Id/ods-organization-code",
170+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import unittest
2+
3+
4+
class TestDataQualityChecker(unittest.TestCase):
5+
def setUp(self):
6+
super().setUp()
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import unittest
2+
from copy import deepcopy
3+
4+
from common.data_quality.completeness import DataQualityCompletenessChecker, MissingFields
5+
from test_common.data_quality.sample_values import VALID_BATCH_IMMUNISATION
6+
7+
8+
class TestDataQualityCompletenessChecker(unittest.TestCase):
9+
def setUp(self):
10+
super().setUp()
11+
self.DataQualityCompletenessChecker = DataQualityCompletenessChecker()
12+
13+
def test_check_completeness_no_missing_fields(self):
14+
complete_immunisation = deepcopy(VALID_BATCH_IMMUNISATION)
15+
16+
expected_missing_fields = MissingFields(required_fields=[], mandatory_fields=[], optional_fields=[])
17+
18+
actual_missing_fields = self.DataQualityCompletenessChecker.run_checks(complete_immunisation)
19+
20+
self.assertEqual(expected_missing_fields, actual_missing_fields)
21+
22+
def test_check_completeness_empty_strings(self):
23+
incomplete_immunisation = deepcopy(VALID_BATCH_IMMUNISATION)
24+
incomplete_immunisation["NHS_NUMBER"] = "" # required
25+
incomplete_immunisation["PERSON_FORENAME"] = "" # mandatory
26+
incomplete_immunisation["PERFORMING_PROFESSIONAL_FORENAME"] = "" # optional
27+
28+
expected_missing_fields = MissingFields(
29+
required_fields=["NHS_NUMBER"],
30+
mandatory_fields=["PERSON_FORENAME"],
31+
optional_fields=["PERFORMING_PROFESSIONAL_FORENAME"],
32+
)
33+
34+
actual_missing_fields = self.DataQualityCompletenessChecker.run_checks(incomplete_immunisation)
35+
36+
self.assertEqual(expected_missing_fields, actual_missing_fields)
37+
38+
def test_check_completeness_missing(self):
39+
incomplete_immunisation = deepcopy(VALID_BATCH_IMMUNISATION)
40+
incomplete_immunisation.pop("NHS_NUMBER") # required
41+
incomplete_immunisation.pop("PERSON_FORENAME") # mandatory
42+
incomplete_immunisation.pop("PERFORMING_PROFESSIONAL_FORENAME") # optional
43+
44+
expected_missing_fields = MissingFields(
45+
required_fields=["NHS_NUMBER"],
46+
mandatory_fields=["PERSON_FORENAME"],
47+
optional_fields=["PERFORMING_PROFESSIONAL_FORENAME"],
48+
)
49+
50+
actual_missing_fields = self.DataQualityCompletenessChecker.run_checks(incomplete_immunisation)
51+
52+
self.assertEqual(expected_missing_fields, actual_missing_fields)
53+
54+
def test_check_completeness_multiple_missing(self):
55+
incomplete_immunisation = deepcopy(VALID_BATCH_IMMUNISATION)
56+
incomplete_immunisation.pop("NHS_NUMBER") # required
57+
incomplete_immunisation.pop("VACCINATION_PROCEDURE_TERM") # required
58+
incomplete_immunisation.pop("PERSON_FORENAME") # mandatory
59+
incomplete_immunisation.pop("PERSON_SURNAME") # mandatory
60+
incomplete_immunisation.pop("PERFORMING_PROFESSIONAL_FORENAME") # optional
61+
incomplete_immunisation.pop("PERFORMING_PROFESSIONAL_SURNAME") # optional
62+
63+
expected_missing_fields = MissingFields(
64+
required_fields=["NHS_NUMBER", "VACCINATION_PROCEDURE_TERM"],
65+
mandatory_fields=["PERSON_FORENAME", "PERSON_SURNAME"],
66+
optional_fields=["PERFORMING_PROFESSIONAL_FORENAME", "PERFORMING_PROFESSIONAL_SURNAME"],
67+
)
68+
69+
actual_missing_fields = self.DataQualityCompletenessChecker.run_checks(incomplete_immunisation)
70+
71+
self.assertEqual(expected_missing_fields, actual_missing_fields)

lambdas/shared/tests/test_common/data_quality/reporter.py renamed to lambdas/shared/tests/test_common/data_quality/test_reporter.py

File renamed without changes.

lambdas/shared/tests/test_common/data_quality/validator.py renamed to lambdas/shared/tests/test_common/data_quality/test_validator.py

File renamed without changes.

0 commit comments

Comments
 (0)