Skip to content

Commit 839fd21

Browse files
committed
Add dq reporter
1 parent f674511 commit 839fd21

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import json
2+
import uuid
3+
from dataclasses import asdict, dataclass
4+
5+
from botocore.exceptions import ClientError
6+
7+
from common.clients import get_s3_client, logger
8+
from common.data_quality.checker import DataQualityChecker
9+
from common.data_quality.completeness import MissingFields
10+
11+
12+
@dataclass
13+
class DataQualityReport:
14+
data_quality_report_id: uuid.UUID
15+
validationDate: str
16+
completeness: MissingFields
17+
validity: list[str]
18+
timeliness_recorded_days: int
19+
timeliness_ingested_seconds: int
20+
21+
22+
class DataQualityReporter:
23+
"""Generates and sends a data quality report to the relevant S3 bucket."""
24+
25+
def __init__(self, is_batch_csv: bool, bucket: str):
26+
self.s3_client = get_s3_client()
27+
self.bucket = bucket
28+
self.dq_checker = DataQualityChecker(is_batch_csv=is_batch_csv)
29+
30+
def generate_and_send_report(self, immunisation: dict) -> None:
31+
"""Formats and sends a data quality report to the S3 bucket."""
32+
dq_output = self.dq_checker.run_checks(immunisation)
33+
event_id = uuid.uuid4()
34+
file_key = f"{event_id}.json"
35+
36+
# Build report
37+
dq_report = DataQualityReport(
38+
data_quality_report_id=event_id,
39+
validationDate=dq_output.validation_datetime,
40+
completeness=dq_output.missing_fields,
41+
validity=dq_output.invalid_fields,
42+
timeliness_recorded_days=dq_output.timeliness.recorded_timeliness_days,
43+
timeliness_ingested_seconds=dq_output.timeliness.ingested_timeliness_seconds,
44+
)
45+
46+
# Send to S3 bucket
47+
try:
48+
self.s3_client.put_object(
49+
Bucket=self.bucket, Key=file_key, Body=json.dumps(asdict(dq_report)), ContentType="application/json"
50+
)
51+
except ClientError as error:
52+
# We only log the error here because we want the data quality checks to have minimal impact on the API's
53+
# functionality. This should only happen in the case of AWS infrastructure issues.
54+
logger.error("error whilst sending data quality for report id: %s with error: %s", file_key, str(error))
55+
return None
56+
57+
logger.info("data quality report sent successfully for report id: %s", file_key)
58+
59+
return None

0 commit comments

Comments
 (0)