Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions classes/subject.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from classes.sdd_reason_for_change_type import SDDReasonForChangeType
from classes.ss_reason_for_change_type import SSReasonForChangeType
from classes.ssdd_reason_for_change_type import SSDDReasonForChangeType
import pandas as pd


@dataclass
Expand Down Expand Up @@ -1241,3 +1242,114 @@ def __str__(self):
f"datestamp={self.datestamp}"
f"]"
)

@staticmethod
def from_dataframe_row(row: pd.Series) -> "Subject":
"""
Populates a Subject object from a pandas DataFrame row.
Handles type conversions for dates and datetimes.
Only fields present in the SQL query are populated.
"""

field_map = {
"screening_subject_id": row.get("screening_subject_id"),
"nhs_number": row.get("subject_nhs_number"),
"surname": row.get("person_family_name"),
"forename": row.get("person_given_name"),
"datestamp": Subject.parse_datetime(row.get("datestamp")),
"screening_status_id": row.get("screening_status_id"),
"screening_status_change_reason_id": row.get("ss_reason_for_change_id"),
"screening_status_change_date": Subject.parse_date(
row.get("screening_status_change_date")
),
"screening_due_date": Subject.parse_date(row.get("screening_due_date")),
"screening_due_date_change_reason_id": row.get("sdd_reason_for_change_id"),
"screening_due_date_change_date": Subject.parse_date(
row.get("sdd_change_date")
),
"calculated_screening_due_date": Subject.parse_date(
row.get("calculated_sdd")
),
"surveillance_screening_due_date": Subject.parse_date(
row.get("surveillance_screen_due_date")
),
"calculated_surveillance_due_date": Subject.parse_date(
row.get("calculated_ssdd")
),
"surveillance_due_date_change_reason_id": row.get(
"surveillance_sdd_rsn_change_id"
),
"surveillance_due_date_change_date": Subject.parse_date(
row.get("surveillance_sdd_change_date")
),
"lynch_due_date": Subject.parse_date(row.get("lynch_screening_due_date")),
"lynch_due_date_change_reason_id": row.get(
"lynch_sdd_reason_for_change_id"
),
"lynch_due_date_change_date": Subject.parse_date(
row.get("lynch_sdd_change_date")
),
"calculated_lynch_due_date": Subject.parse_date(
row.get("lynch_calculated_sdd")
),
"date_of_birth": Subject.parse_date(row.get("date_of_birth")),
"date_of_death": Subject.parse_date(row.get("date_of_death")),
}

return Subject(**field_map)

@staticmethod
def parse_date(
val: Optional[Union[pd.Timestamp, str, datetime, date]],
) -> Optional[date]:
"""
Converts a value to a Python date object if possible.

Args:
val: The value to convert (can be pandas.Timestamp, string, datetime, date, or None).

Returns:
Optional[date]: The converted date object, or None if conversion fails.
"""
if pd.isnull(val):
return None
if isinstance(val, pd.Timestamp):
return val.to_pydatetime().date()
if isinstance(val, str):
try:
return datetime.strptime(val[:10], "%Y-%m-%d").date()
except Exception:
return None
if isinstance(val, datetime):
return val.date()
if isinstance(val, date):
return val
return None

@staticmethod
def parse_datetime(
val: Optional[Union[pd.Timestamp, str, datetime, date]],
) -> Optional[datetime]:
"""
Converts a value to a Python datetime object if possible.

Args:
val: The value to convert (can be pandas.Timestamp, string, datetime, or None).

Returns:
Optional[datetime]: The converted datetime object, or None if conversion fails.
"""
if pd.isnull(val):
return None
if isinstance(val, pd.Timestamp):
return val.to_pydatetime()
if isinstance(val, str):
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
try:
return datetime.strptime(val[:19], fmt)
except Exception:
continue
return None
if isinstance(val, datetime):
return val
return None
87 changes: 87 additions & 0 deletions docs/utility-guides/SubjectAssertion.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Utility Guide: Subject Assertion Utility

This guide explains the purpose and usage of the `subject_assertion` utility found in [`utils/subject_assertion.py`](../../utils/subject_assertion.py).
It is designed to assert that a subject with a given NHS number matches specified criteria in the database, and provides detailed logging when criteria do not match.

---

## Table of Contents

- [Utility Guide: Subject Assertion Utility](#utility-guide-subject-assertion-utility)
- [Table of Contents](#table-of-contents)
- [Overview](#overview)
- [Required Arguments](#required-arguments)
- [How It Works](#how-it-works)
- [Example Usage](#example-usage)
- [Behaviour Details](#behaviour-details)
- [Best Practices](#best-practices)
- [Reference](#reference)

---

## Overview

The `subject_assertion` function is used to verify that a subject in the database matches a set of criteria.
If the subject does not match all criteria, the function will iteratively loop through each criteria (except NHS number), logging any criteria that caused the assertion to fail.

---

## Required Arguments

- `nhs_number` (`str`): The NHS number of the subject to check.
- `criteria` (`dict`): A dictionary of criteria to match against the subject's attributes.

---

## How It Works

1. The function first checks if the subject with the given NHS number matches all provided criteria.
2. If not, it removes checks one criterion at a time and retries the assertion.
3. This process continues until all criteria have been checked.
4. If a match is found only after removing criteria, the failed criteria are logged.
5. The function returns `True` only if all criteria match on the first attempt; otherwise, it returns `False`.

---

## Example Usage

Below are examples of how to use `subject_assertion` in your tests:

```python
import pytest
from utils.subject_assertion import subject_assertion

pytestmark = [pytest.mark.utils_local]

def test_subject_assertion_true():
nhs_number = "9233639266"
criteria = {"screening status": "Inactive", "subject age": "> 28"}
assert subject_assertion(nhs_number, criteria) is True
```

See `tests_utils/test_subject_assertion_util.py` for more examples.

---

## Behaviour Details

- The function always keeps the NHS number criterion.
- If a match is found only after removing criteria, the failed criteria are logged in the format:
- Failed criteria: Key: 'key1', Value: 'value1'
- The function will only return `True` if all criteria match on the first attempt.

---

## Best Practices

- Use this utility to validate subject data in database-driven tests.
- Review logs for failed criteria to diagnose why assertions did not pass.
- Always provide the NHS number as part of your criteria.

---

## Reference

- [`utils/subject_assertion.py`](../../utils/subject_assertion.py)
- [`tests_utils/test_subject_assertion_util.py`](../../tests_utils/test_subject_assertion_util.py)
- [SubjectSelectionQueryBuilder Utility Guide](SubjectSelectionQueryBuilder.md)
29 changes: 29 additions & 0 deletions tests_utils/test_subject_assertion_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pytest
from utils.subject_assertion import subject_assertion

pytestmark = [pytest.mark.utils_local]

nhs_number = "9233639266"


def test_subject_assertion_true():
criteria = {"screening status": "Inactive", "subject age": "> 28"}
assert subject_assertion(nhs_number, criteria) is True


def test_subject_assertion_false():
criteria = {"screening status": "Call", "subject age": "< 28"}
assert subject_assertion(nhs_number, criteria) is False


def test_subject_assertion_false_with_some_true():
criteria = {
"screening status": "Inactive",
"subject age": "> 28",
"latest episode type": "FOBT",
"latest episode status": "Open",
"latest episode has referral date": "Past",
"latest episode has diagnosis date": "No",
"latest episode diagnosis date reason": "NULL",
}
assert subject_assertion(nhs_number, criteria) is False
75 changes: 75 additions & 0 deletions utils/subject_assertion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from utils.oracle.subject_selection_query_builder import SubjectSelectionQueryBuilder
from utils.oracle.oracle import OracleDB
from classes.subject import Subject
from classes.user import User
import logging


def subject_assertion(nhs_number: str, criteria: dict) -> bool:
"""
Asserts that a subject with the given NHS number exists and matches the provided criteria.
Args:
nhs_number (str): The NHS number of the subject to find.
criteria (dict): A dictionary of criteria to match against the subject's attributes.
Returns:
bool: True if the subject matches the provided criteria, False if it does not.
"""
nhs_number_string = "nhs number"
subject_nhs_number_string = "subject_nhs_number"
nhs_no_criteria = {nhs_number_string: nhs_number}
subject = Subject()
user = User()
builder = SubjectSelectionQueryBuilder()

query, bind_vars = builder.build_subject_selection_query(
criteria=nhs_no_criteria,
user=user,
subject=subject,
subjects_to_retrieve=1,
)

subject_df = OracleDB().execute_query(query, bind_vars)
subject = Subject.from_dataframe_row(subject_df.iloc[0])

criteria[nhs_number_string] = nhs_number

# Check all criteria together first
query, bind_vars = builder.build_subject_selection_query(
criteria=criteria,
user=user,
subject=subject,
subjects_to_retrieve=1,
)
df = OracleDB().execute_query(query, bind_vars)
if nhs_number in df[subject_nhs_number_string].values:
return True

# Check each criterion independently
failed_criteria = []
criteria_keys = [key for key in criteria if key != nhs_number_string]
for key in criteria_keys:
single_criteria = {nhs_number_string: nhs_number, key: criteria[key]}
query, bind_vars = builder.build_subject_selection_query(
criteria=single_criteria,
user=user,
subject=subject,
subjects_to_retrieve=1,
)
df = OracleDB().execute_query(query, bind_vars)
if (
subject_nhs_number_string not in df.columns
or nhs_number not in df[subject_nhs_number_string].values
):
failed_criteria.append((key, criteria[key]))

if failed_criteria:
log_message = "Subject Assertion Failed\nFailed criteria:\n" + "\n".join(
[f"{key}, {value}" for key, value in failed_criteria]
)
logging.error(log_message)
else:
logging.error(
"Subject Assertion Failed: Criteria combination is invalid or conflicting."
)

return False