Skip to content

Commit d780012

Browse files
Adding subject assertion util
1 parent 67ec692 commit d780012

File tree

4 files changed

+277
-0
lines changed

4 files changed

+277
-0
lines changed

classes/subject.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from classes.sdd_reason_for_change_type import SDDReasonForChangeType
1111
from classes.ss_reason_for_change_type import SSReasonForChangeType
1212
from classes.ssdd_reason_for_change_type import SSDDReasonForChangeType
13+
import pandas as pd
1314

1415

1516
@dataclass
@@ -1241,3 +1242,102 @@ def __str__(self):
12411242
f"datestamp={self.datestamp}"
12421243
f"]"
12431244
)
1245+
1246+
@staticmethod
1247+
def from_dataframe_row(row: pd.Series) -> "Subject":
1248+
"""
1249+
Populates a Subject object from a pandas DataFrame row.
1250+
Handles type conversions for dates and datetimes.
1251+
Only fields present in the SQL query are populated.
1252+
"""
1253+
1254+
def parse_date(
1255+
val: Optional[Union[pd.Timestamp, str, datetime, date]],
1256+
) -> Optional[date]:
1257+
"""
1258+
Converts a value to a Python date object if possible.
1259+
1260+
Args:
1261+
val: The value to convert (can be pandas.Timestamp, string, datetime, date, or None).
1262+
1263+
Returns:
1264+
Optional[date]: The converted date object, or None if conversion fails.
1265+
"""
1266+
if pd.isnull(val):
1267+
return None
1268+
if isinstance(val, pd.Timestamp):
1269+
return val.to_pydatetime().date()
1270+
if isinstance(val, str):
1271+
try:
1272+
return datetime.strptime(val[:10], "%Y-%m-%d").date()
1273+
except Exception:
1274+
return None
1275+
if isinstance(val, datetime):
1276+
return val.date()
1277+
if isinstance(val, date):
1278+
return val
1279+
return None
1280+
1281+
def parse_datetime(
1282+
val: Optional[Union[pd.Timestamp, str, datetime, date]],
1283+
) -> Optional[datetime]:
1284+
"""
1285+
Converts a value to a Python datetime object if possible.
1286+
1287+
Args:
1288+
val: The value to convert (can be pandas.Timestamp, string, datetime, or None).
1289+
1290+
Returns:
1291+
Optional[datetime]: The converted datetime object, or None if conversion fails.
1292+
"""
1293+
if pd.isnull(val):
1294+
return None
1295+
if isinstance(val, pd.Timestamp):
1296+
return val.to_pydatetime()
1297+
if isinstance(val, str):
1298+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
1299+
try:
1300+
return datetime.strptime(val[:19], fmt)
1301+
except Exception:
1302+
continue
1303+
return None
1304+
if isinstance(val, datetime):
1305+
return val
1306+
return None
1307+
1308+
field_map = {
1309+
"screening_subject_id": row.get("screening_subject_id"),
1310+
"nhs_number": row.get("subject_nhs_number"),
1311+
"surname": row.get("person_family_name"),
1312+
"forename": row.get("person_given_name"),
1313+
"datestamp": parse_datetime(row.get("datestamp")),
1314+
"screening_status_id": row.get("screening_status_id"),
1315+
"screening_status_change_reason_id": row.get("ss_reason_for_change_id"),
1316+
"screening_status_change_date": parse_date(
1317+
row.get("screening_status_change_date")
1318+
),
1319+
"screening_due_date": parse_date(row.get("screening_due_date")),
1320+
"screening_due_date_change_reason_id": row.get("sdd_reason_for_change_id"),
1321+
"screening_due_date_change_date": parse_date(row.get("sdd_change_date")),
1322+
"calculated_screening_due_date": parse_date(row.get("calculated_sdd")),
1323+
"surveillance_screening_due_date": parse_date(
1324+
row.get("surveillance_screen_due_date")
1325+
),
1326+
"calculated_surveillance_due_date": parse_date(row.get("calculated_ssdd")),
1327+
"surveillance_due_date_change_reason_id": row.get(
1328+
"surveillance_sdd_rsn_change_id"
1329+
),
1330+
"surveillance_due_date_change_date": parse_date(
1331+
row.get("surveillance_sdd_change_date")
1332+
),
1333+
"lynch_due_date": parse_date(row.get("lynch_screening_due_date")),
1334+
"lynch_due_date_change_reason_id": row.get(
1335+
"lynch_sdd_reason_for_change_id"
1336+
),
1337+
"lynch_due_date_change_date": parse_date(row.get("lynch_sdd_change_date")),
1338+
"calculated_lynch_due_date": parse_date(row.get("lynch_calculated_sdd")),
1339+
"date_of_birth": parse_date(row.get("date_of_birth")),
1340+
"date_of_death": parse_date(row.get("date_of_death")),
1341+
}
1342+
1343+
return Subject(**field_map)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Utility Guide: Subject Assertion Utility
2+
3+
This guide explains the purpose and usage of the `subject_assertion` utility found in [`utils/subject_assertion.py`](../../utils/subject_assertion.py).
4+
It is designed to assert that a subject with a given NHS number matches specified criteria in the database, and provides detailed logging when criteria do not match.
5+
6+
---
7+
8+
## Table of Contents
9+
10+
- [Utility Guide: Subject Assertion Utility](#utility-guide-subject-assertion-utility)
11+
- [Table of Contents](#table-of-contents)
12+
- [Overview](#overview)
13+
- [Required Arguments](#required-arguments)
14+
- [How It Works](#how-it-works)
15+
- [Example Usage](#example-usage)
16+
- [Behaviour Details](#behaviour-details)
17+
- [Best Practices](#best-practices)
18+
- [Reference](#reference)
19+
20+
---
21+
22+
## Overview
23+
24+
The `subject_assertion` function is used to verify that a subject in the database matches a set of criteria.
25+
If the subject does not match all criteria, the function will iteratively remove criteria (except NHS number) and retry, logging any criteria that caused the assertion to fail.
26+
27+
---
28+
29+
## Required Arguments
30+
31+
- `nhs_number` (`str`): The NHS number of the subject to check.
32+
- `criteria` (`dict`): A dictionary of criteria to match against the subject's attributes.
33+
34+
---
35+
36+
## How It Works
37+
38+
1. The function first checks if the subject with the given NHS number matches all provided criteria.
39+
2. If not, it removes one criterion at a time (except NHS number) and retries the assertion.
40+
3. This process continues until either a match is found or all criteria (except NHS number) have been removed.
41+
4. If a match is found only after removing criteria, the failed criteria are logged.
42+
5. The function returns `True` only if all criteria match on the first attempt; otherwise, it returns `False`.
43+
44+
---
45+
46+
## Example Usage
47+
48+
Below are examples of how to use `subject_assertion` in your tests:
49+
50+
```python
51+
import pytest
52+
from utils.subject_assertion import subject_assertion
53+
54+
pytestmark = [pytest.mark.utils_local]
55+
56+
def test_subject_assertion_true():
57+
nhs_number = "9233639266"
58+
criteria = {"screening status": "Inactive", "subject age": "> 28"}
59+
assert subject_assertion(nhs_number, criteria) is True
60+
```
61+
62+
See `tests_utils/test_subject_assertion_util.py` for more examples.
63+
64+
---
65+
66+
## Behaviour Details
67+
68+
- The function always keeps the NHS number criterion.
69+
- If a match is found only after removing criteria, the failed criteria are logged in the format:
70+
- Failed criteria: Key: 'key1', Value: 'value1'
71+
- The function will only return `True` if all criteria match on the first attempt.
72+
73+
---
74+
75+
## Best Practices
76+
77+
- Use this utility to validate subject data in database-driven tests.
78+
- Review logs for failed criteria to diagnose why assertions did not pass.
79+
- Always provide the NHS number as part of your criteria.
80+
81+
---
82+
83+
## Reference
84+
85+
- [`utils/subject_assertion.py`](../../utils/subject_assertion.py)
86+
- [`tests_utils/test_subject_assertion_util.py`](../../tests_utils/test_subject_assertion_util.py)
87+
- [SubjectSelectionQueryBuilder Utility Guide](SubjectSelectionQueryBuilder.md)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pytest
2+
from utils.subject_assertion import subject_assertion
3+
4+
pytestmark = [pytest.mark.utils_local]
5+
6+
7+
def test_subject_assertion_true():
8+
nhs_number = "9233639266"
9+
criteria = {"screening status": "Inactive", "subject age": "> 28"}
10+
assert subject_assertion(nhs_number, criteria) is True
11+
12+
13+
def test_subject_assertion_false():
14+
nhs_number = "9233639266"
15+
criteria = {"screening status": "Call", "subject age": "< 28"}
16+
assert subject_assertion(nhs_number, criteria) is False
17+
18+
19+
def test_subject_assertion_false_with_some_true():
20+
nhs_number = "9233639266"
21+
criteria = {"screening status": "Inactive", "subject age": "< 28"}
22+
assert subject_assertion(nhs_number, criteria) is False

utils/subject_assertion.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from utils.oracle.subject_selection_query_builder import SubjectSelectionQueryBuilder
2+
from utils.oracle.oracle import OracleDB
3+
from classes.subject import Subject
4+
from classes.user import User
5+
import logging
6+
7+
8+
def subject_assertion(nhs_number: str, criteria: dict) -> bool:
9+
"""
10+
Asserts that a subject with the given NHS number exists and matches the provided criteria.
11+
Args:
12+
nhs_number (str): The NHS number of the subject to find.
13+
criteria (dict): A dictionary of criteria to match against the subject's attributes.
14+
Returns:
15+
bool: True if the subject matches the provided criteria, False if it does not.
16+
"""
17+
nhs_no_criteria = {"nhs number": nhs_number}
18+
subject = Subject()
19+
user = User()
20+
builder = SubjectSelectionQueryBuilder()
21+
22+
query, bind_vars = builder.build_subject_selection_query(
23+
criteria=nhs_no_criteria,
24+
user=user,
25+
subject=subject,
26+
subjects_to_retrieve=1,
27+
)
28+
29+
subject_df = OracleDB().execute_query(query, bind_vars)
30+
subject = Subject.from_dataframe_row(subject_df.iloc[0])
31+
32+
criteria["nhs number"] = nhs_number
33+
query, bind_vars = builder.build_subject_selection_query(
34+
criteria=criteria,
35+
user=user,
36+
subject=subject,
37+
subjects_to_retrieve=1,
38+
)
39+
40+
df = OracleDB().execute_query(query, bind_vars)
41+
42+
if nhs_number in df["subject_nhs_number"].values:
43+
return True
44+
45+
# Try removing criteria one by one (except nhs number)
46+
failed_criteria = []
47+
criteria_keys = [key for key in criteria if key != "nhs number"]
48+
for key in criteria_keys:
49+
reduced_criteria = {key: value for key, value in criteria.items() if key != key}
50+
query, bind_vars = builder.build_subject_selection_query(
51+
criteria=reduced_criteria,
52+
user=user,
53+
subject=subject,
54+
subjects_to_retrieve=1,
55+
)
56+
df = OracleDB().execute_query(query, bind_vars)
57+
if nhs_number in df["subject_nhs_number"].values:
58+
failed_criteria.append((key, criteria[key]))
59+
break
60+
else:
61+
failed_criteria.append((key, criteria[key]))
62+
63+
if failed_criteria:
64+
log_message = "Subject Assertion Failed\nFailed criteria:\n" + "\n".join(
65+
[f"Key: '{key}' - Value: '{value}'" for key, value in failed_criteria]
66+
)
67+
logging.error(log_message)
68+
return False

0 commit comments

Comments
 (0)