Skip to content

Commit 6f06d3b

Browse files
committed
Add timeliness checks
1 parent fdd14dd commit 6f06d3b

File tree

4 files changed

+180
-17
lines changed

4 files changed

+180
-17
lines changed

lambdas/shared/src/common/data_quality/checker.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,48 @@
11
from dataclasses import dataclass
2+
from datetime import datetime, timezone
23

34
from pydantic import ValidationError
45

56
from common.data_quality.completeness import DataQualityCompletenessChecker, MissingFields
67
from common.data_quality.models.immunization_batch_row_model import ImmunizationBatchRowModel
8+
from common.data_quality.timeliness import (
9+
TimelinessCheckOutput,
10+
get_ingested_timeliness_seconds,
11+
get_recorded_timeliness_days,
12+
)
713
from common.models.fhir_converter.converter import Converter
814

915

1016
@dataclass
1117
class DataQualityOutput:
1218
missing_fields: MissingFields
1319
invalid_fields: list[str]
14-
timeliness: dict[str, int]
20+
timeliness: TimelinessCheckOutput
21+
validation_datetime: str
1522

1623

1724
class DataQualityChecker:
1825
"""Runs data quality checks against an Immunisation and creates a Data Quality Output object"""
1926

2027
def __init__(
2128
self,
22-
completeness_checker: DataQualityCompletenessChecker,
2329
is_batch_csv: bool,
2430
):
25-
self.completeness_checker = completeness_checker
31+
self.completeness_checker = DataQualityCompletenessChecker()
2632
self.data_quality_model = ImmunizationBatchRowModel
2733
self.is_batch_csv = is_batch_csv
2834

2935
def run_checks(self, immunisation: dict) -> DataQualityOutput:
36+
dq_checks_executed_timestamp = datetime.now(timezone.utc)
37+
3038
if not self.is_batch_csv:
3139
immunisation = Converter(fhir_data=immunisation).run_conversion()
3240

3341
return DataQualityOutput(
3442
missing_fields=self._check_completeness(immunisation),
3543
invalid_fields=self._check_validity(immunisation),
36-
timeliness=self._check_timeliness(immunisation),
44+
timeliness=self._check_timeliness(immunisation, dq_checks_executed_timestamp),
45+
validation_datetime=dq_checks_executed_timestamp.isoformat(timespec="milliseconds").replace("+00:00", "Z"),
3746
)
3847

3948
def _check_completeness(self, immunisation: dict) -> MissingFields:
@@ -56,5 +65,9 @@ def _check_validity(self, immunisation: dict) -> list[str]:
5665

5766
return fields_with_errors
5867

59-
def _check_timeliness(self, immunisation: dict) -> dict[str, int]:
60-
pass
68+
@staticmethod
69+
def _check_timeliness(immunisation: dict, datetime_now: datetime) -> TimelinessCheckOutput:
70+
return TimelinessCheckOutput(
71+
recorded_timeliness_days=get_recorded_timeliness_days(immunisation),
72+
ingested_timeliness_seconds=get_ingested_timeliness_seconds(immunisation, datetime_now),
73+
)

lambdas/shared/src/common/data_quality/timeliness.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,25 @@
11
"""Module containing functions to aid with timeliness checks"""
22

33
import datetime
4+
import math
5+
from dataclasses import dataclass
6+
from typing import Optional
7+
8+
9+
@dataclass
10+
class TimelinessCheckOutput:
11+
"""
12+
The required outputs for the data quality team for timeliness checks.
13+
14+
recorded_timeliness_days - the number of days between the vaccination taking place and being recorded into the
15+
clinical system
16+
17+
ingested_timeliness_seconds - the seconds between the vaccination taking place and the record being provided to the
18+
Immunisation FHIR API/batch processor for ingestion
19+
"""
20+
21+
recorded_timeliness_days: Optional[int]
22+
ingested_timeliness_seconds: Optional[int]
423

524

625
def parse_csv_date(d: str) -> datetime.date:
@@ -18,3 +37,48 @@ def parse_csv_datetime(d: str) -> datetime.datetime:
1837
return datetime.datetime.strptime(d, "%Y%m%dT%H%M%S").replace(tzinfo=datetime.timezone.utc)
1938
case _:
2039
raise ValueError("Invalid datetime format provided")
40+
41+
42+
def get_recorded_date_from_immunisation(immunization: dict) -> Optional[datetime.date]:
43+
try:
44+
parsed_recorded_date = parse_csv_date(immunization.get("RECORDED_DATE"))
45+
except ValueError:
46+
# Completeness and validity checks will catch these issues separately
47+
return None
48+
49+
return parsed_recorded_date
50+
51+
52+
def get_occurrence_datetime_from_immunisation(immunization: dict) -> Optional[datetime.datetime]:
53+
try:
54+
parsed_recorded_date = parse_csv_datetime(immunization.get("DATE_AND_TIME"))
55+
except ValueError:
56+
# Completeness and validity checks will catch these issues separately
57+
return None
58+
59+
return parsed_recorded_date
60+
61+
62+
def get_recorded_timeliness_days(immunisation: dict) -> Optional[int]:
63+
"""Gets the time delta in days between the recorded date and occurrence date. Returns None is either of the fields
64+
are not provided or are invalid dates."""
65+
recorded_date = get_recorded_date_from_immunisation(immunisation)
66+
occurrence_date_time = get_occurrence_datetime_from_immunisation(immunisation)
67+
68+
if recorded_date is None or occurrence_date_time is None:
69+
return None
70+
71+
occurrence_date = datetime.date(occurrence_date_time.year, occurrence_date_time.month, occurrence_date_time.day)
72+
73+
return (recorded_date - occurrence_date).days
74+
75+
76+
def get_ingested_timeliness_seconds(immunisation: dict, datetime_now: datetime.datetime) -> Optional[int]:
77+
"""Gets the time delta in seconds between the time of ingestion into the system and the vaccination occurrence
78+
datetime. Returns None if the occurrence datetime field is not provided or is invalid."""
79+
occurrence_date_time = get_occurrence_datetime_from_immunisation(immunisation)
80+
81+
if not occurrence_date_time:
82+
return None
83+
84+
return math.floor((datetime_now - occurrence_date_time).total_seconds())

lambdas/shared/tests/test_common/data_quality/sample_values.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@
6464
]
6565
},
6666
"patient": {"reference": "#Pat1"},
67-
"occurrenceDateTime": "2021-02-07T13:28:17+00:00",
68-
"recorded": "2021-02-07T13:28:17+00:00",
67+
"occurrenceDateTime": "2024-05-11T12:00:00+00:00",
68+
"recorded": "2024-05-15",
6969
"primarySource": True,
7070
"manufacturer": {"display": "AstraZeneca Ltd"},
7171
"location": {
@@ -76,7 +76,7 @@
7676
},
7777
},
7878
"lotNumber": "4120Z001",
79-
"expirationDate": "2021-07-02",
79+
"expirationDate": "2024-12-02",
8080
"site": {
8181
"coding": [
8282
{
@@ -99,7 +99,7 @@
9999
"value": str(Decimal(0.5)),
100100
"unit": "milliliter",
101101
"system": "http://snomed.info/sct",
102-
"code": "ml",
102+
"code": "258773002",
103103
},
104104
"performer": [
105105
{"actor": {"reference": "#Pract1"}},
@@ -147,7 +147,7 @@
147147
"ACTION_FLAG": "UPDATE",
148148
"PERFORMING_PROFESSIONAL_FORENAME": "ALICE",
149149
"PERFORMING_PROFESSIONAL_SURNAME": "SMITH",
150-
"RECORDED_DATE": "20250306",
150+
"RECORDED_DATE": "20240515",
151151
"PRIMARY_SOURCE": "True",
152152
"VACCINATION_PROCEDURE_CODE": "1324681000000101",
153153
"VACCINATION_PROCEDURE_TERM": "Procedure Term",

lambdas/shared/tests/test_common/data_quality/test_checker.py

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
import unittest
44
from unittest.mock import patch
55

6-
from common.data_quality.checker import DataQualityChecker
7-
from common.data_quality.completeness import DataQualityCompletenessChecker
8-
from test_common.data_quality.sample_values import VALID_BATCH_IMMUNISATION
6+
from common.data_quality.checker import DataQualityChecker, DataQualityOutput
7+
from test_common.data_quality.sample_values import VALID_BATCH_IMMUNISATION, VALID_FHIR_IMMUNISATION
98

109

1110
class TestDataQualityChecker(unittest.TestCase):
@@ -15,9 +14,22 @@ def setUp(self):
1514
self.mock_date_today = date_today_patcher.start()
1615
self.mock_date_today.date.today.return_value = datetime.date(2024, 5, 20)
1716

18-
completeness_checker = DataQualityCompletenessChecker()
19-
self.batch_dq_checker = DataQualityChecker(completeness_checker, is_batch_csv=True)
20-
self.fhir_json_dq_checker = DataQualityChecker(completeness_checker, is_batch_csv=False)
17+
# Fix datetime.now
18+
self.mock_fixed_datetime = datetime.datetime(2024, 5, 20, 14, 12, 30, 123, tzinfo=datetime.timezone.utc)
19+
datetime_now_patcher = patch("common.data_quality.checker.datetime", wraps=datetime.datetime)
20+
self.mock_datetime_now = datetime_now_patcher.start()
21+
self.mock_datetime_now.now.return_value = self.mock_fixed_datetime
22+
23+
self.batch_dq_checker = DataQualityChecker(is_batch_csv=True)
24+
self.fhir_json_dq_checker = DataQualityChecker(is_batch_csv=False)
25+
26+
def assert_successful_result(self, result: DataQualityOutput) -> None:
27+
self.assertEqual([], result.missing_fields.optional_fields)
28+
self.assertEqual([], result.missing_fields.mandatory_fields)
29+
self.assertEqual([], result.missing_fields.required_fields)
30+
self.assertEqual([], result.invalid_fields)
31+
self.assertEqual(4, result.timeliness.recorded_timeliness_days)
32+
self.assertEqual(785550, result.timeliness.ingested_timeliness_seconds)
2133

2234
def test_check_validity_returns_empty_list_when_data_is_valid(self):
2335
validation_result = self.batch_dq_checker._check_validity(VALID_BATCH_IMMUNISATION)
@@ -76,3 +88,77 @@ def test_check_validity_returns_list_of_multiple_invalid_fields_for_multiple_fai
7688
self.assertEqual(
7789
["NHS_NUMBER", "PERSON_POSTCODE", "EXPIRY_DATE", "DOSE_AMOUNT", "INDICATION_CODE"], validation_result
7890
)
91+
92+
def test_check_timeliness_calculates_the_timeliness_diffs(self):
93+
timeliness_output = self.batch_dq_checker._check_timeliness(VALID_BATCH_IMMUNISATION, self.mock_fixed_datetime)
94+
95+
self.assertEqual(4, timeliness_output.recorded_timeliness_days)
96+
self.assertEqual(785550, timeliness_output.ingested_timeliness_seconds)
97+
98+
def test_check_timeliness_returns_none_for_recorded_timeliness_when_relevant_field_invalid(self):
99+
invalid_batch_imms_payload = copy.deepcopy(VALID_BATCH_IMMUNISATION)
100+
invalid_batch_imms_payload["RECORDED_DATE"] = ""
101+
102+
timeliness_output = self.batch_dq_checker._check_timeliness(invalid_batch_imms_payload, self.mock_fixed_datetime)
103+
104+
self.assertIsNone(timeliness_output.recorded_timeliness_days)
105+
self.assertEqual(785550, timeliness_output.ingested_timeliness_seconds)
106+
107+
def test_check_timeliness_returns_none_for_both_when_date_and_time_field_invalid(self):
108+
invalid_batch_imms_payload = copy.deepcopy(VALID_BATCH_IMMUNISATION)
109+
invalid_batch_imms_payload["DATE_AND_TIME"] = "20245"
110+
111+
timeliness_output = self.batch_dq_checker._check_timeliness(invalid_batch_imms_payload, self.mock_fixed_datetime)
112+
113+
self.assertIsNone(timeliness_output.recorded_timeliness_days)
114+
self.assertIsNone(timeliness_output.ingested_timeliness_seconds)
115+
116+
def test_run_checks_returns_correct_output_for_valid_data_for_csv_payload(self):
117+
result = self.batch_dq_checker.run_checks(VALID_BATCH_IMMUNISATION)
118+
self.assert_successful_result(result)
119+
120+
def test_run_checks_returns_correct_output_for_valid_data_for_fhir_payload(self):
121+
result = self.fhir_json_dq_checker.run_checks(VALID_FHIR_IMMUNISATION)
122+
self.assert_successful_result(result)
123+
124+
def test_run_checks_returns_correct_output_for_invalid_data_for_csv_payload(self):
125+
invalid_batch_imms_payload = copy.deepcopy(VALID_BATCH_IMMUNISATION)
126+
invalid_batch_imms_payload["NHS_NUMBER"] = "12345678901"
127+
invalid_batch_imms_payload["RECORDED_DATE"] = "20240137"
128+
invalid_batch_imms_payload["PERSON_DOB"] = ""
129+
invalid_batch_imms_payload["DOSE_AMOUNT"] = "6.789"
130+
invalid_batch_imms_payload["BATCH_NUMBER"] = ""
131+
132+
result = self.batch_dq_checker.run_checks(invalid_batch_imms_payload)
133+
134+
self.assertEqual([], result.missing_fields.optional_fields)
135+
self.assertEqual(["PERSON_DOB"], result.missing_fields.mandatory_fields)
136+
self.assertEqual(["BATCH_NUMBER"], result.missing_fields.required_fields)
137+
138+
# Fields which are subject to validation and are also empty will appear in both the completeness and validity
139+
# checks e.g. PERSON_DOB
140+
self.assertEqual(["NHS_NUMBER", "PERSON_DOB", "DOSE_AMOUNT"], result.invalid_fields)
141+
self.assertIsNone(result.timeliness.recorded_timeliness_days)
142+
self.assertEqual(785550, result.timeliness.ingested_timeliness_seconds)
143+
144+
def test_run_checks_returns_correct_output_for_invalid_data_for_fhir_payload(self):
145+
invalid_fhir_imms_payload = copy.deepcopy(VALID_FHIR_IMMUNISATION)
146+
invalid_fhir_imms_payload["contained"][1]["identifier"][0]["value"] = "12345678901"
147+
invalid_fhir_imms_payload["recorded"] = "2024-01-37"
148+
del invalid_fhir_imms_payload["contained"][1]["birthDate"]
149+
invalid_fhir_imms_payload["doseQuantity"]["value"] = "6.789"
150+
invalid_fhir_imms_payload["lotNumber"] = ""
151+
152+
result = self.fhir_json_dq_checker.run_checks(invalid_fhir_imms_payload)
153+
154+
self.assertEqual([], result.missing_fields.optional_fields)
155+
# Worth noting that due to the use of the fhir converter, invalid dates will be mapped to an empty string hence
156+
# will also show up here where they would not in batch validation
157+
self.assertEqual(["PERSON_DOB", "RECORDED_DATE"], result.missing_fields.mandatory_fields)
158+
self.assertEqual(["BATCH_NUMBER"], result.missing_fields.required_fields)
159+
160+
# Fields which are subject to validation and are also empty will appear in both the completeness and validity
161+
# checks e.g. PERSON_DOB
162+
self.assertEqual(["NHS_NUMBER", "PERSON_DOB", "DOSE_AMOUNT"], result.invalid_fields)
163+
self.assertIsNone(result.timeliness.recorded_timeliness_days)
164+
self.assertEqual(785550, result.timeliness.ingested_timeliness_seconds)

0 commit comments

Comments
 (0)