Feature/bcss 21355 subject assertion util (#125)

adrianoaru-nhs · web-flow · commit a973edd49a10 · 2025-08-28T13:24:33.000+01:00
## Description  Adding a new util to manage assertions on subjects. This is very similar to how the selenium tests do it except if the subject does not match the criteria, it checks each criterion individually to see what is causing the error and logs it: ``` ERROR root:subject_assertion.py:69 Subject Assertion Failed Failed criteria: latest episode type, FOBT latest episode status, Open latest episode has referral date, Past latest episode has diagnosis date, No latest episode diagnosis date reason, NULL ``` ## Context  Allows us to perform assertions on subjects using a common util. ## Type of changes  - [x] Refactoring (non-breaking change) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would change existing functionality) - [ ] Bug fix (non-breaking change which fixes an issue) ## Checklist  - [x] I am familiar with the [contributing guidelines](https://github.com/nhs-england-tools/playwright-python-blueprint/blob/main/CONTRIBUTING.md) - [x] I have followed the code style of the project - [x] I have added tests to cover my changes (where appropriate) - [x] I have updated the documentation accordingly - [ ] This PR is a result of pair or mob programming --- ## Sensitive Information Declaration To ensure the utmost confidentiality and protect your and others privacy, we kindly ask you to NOT including [PII (Personal Identifiable Information) / PID (Personal Identifiable Data)](https://digital.nhs.uk/data-and-information/keeping-data-safe-and-benefitting-the-public) or any other sensitive data in this PR (Pull Request) and the codebase changes. We will remove any PR that do contain any sensitive information. We really appreciate your cooperation in this matter. - [x] I confirm that neither PII/PID nor sensitive data are included in this PR and the codebase changes.
diff --git a/classes/subject.py b/classes/subject.py
@@ -10,6 +10,8 @@
 from classes.sdd_reason_for_change_type import SDDReasonForChangeType
 from classes.ss_reason_for_change_type import SSReasonForChangeType
 from classes.ssdd_reason_for_change_type import SSDDReasonForChangeType
+from utils.date_time_utils import DateTimeUtils
+import pandas as pd
 
 
 @dataclass
@@ -1241,3 +1243,62 @@ def __str__(self):
             f"datestamp={self.datestamp}"
             f"]"
         )
+
+    @staticmethod
+    def from_dataframe_row(row: pd.Series) -> "Subject":
+        """
+        Populates a Subject object from a pandas DataFrame row.
+        Handles type conversions for dates and datetimes.
+        Only fields present in the SQL query are populated.
+        """
+
+        field_map = {
+            "screening_subject_id": row.get("screening_subject_id"),
+            "nhs_number": row.get("subject_nhs_number"),
+            "surname": row.get("person_family_name"),
+            "forename": row.get("person_given_name"),
+            "datestamp": DateTimeUtils.parse_datetime(row.get("datestamp")),
+            "screening_status_id": row.get("screening_status_id"),
+            "screening_status_change_reason_id": row.get("ss_reason_for_change_id"),
+            "screening_status_change_date": DateTimeUtils.parse_date(
+                row.get("screening_status_change_date")
+            ),
+            "screening_due_date": DateTimeUtils.parse_date(
+                row.get("screening_due_date")
+            ),
+            "screening_due_date_change_reason_id": row.get("sdd_reason_for_change_id"),
+            "screening_due_date_change_date": DateTimeUtils.parse_date(
+                row.get("sdd_change_date")
+            ),
+            "calculated_screening_due_date": DateTimeUtils.parse_date(
+                row.get("calculated_sdd")
+            ),
+            "surveillance_screening_due_date": DateTimeUtils.parse_date(
+                row.get("surveillance_screen_due_date")
+            ),
+            "calculated_surveillance_due_date": DateTimeUtils.parse_date(
+                row.get("calculated_ssdd")
+            ),
+            "surveillance_due_date_change_reason_id": row.get(
+                "surveillance_sdd_rsn_change_id"
+            ),
+            "surveillance_due_date_change_date": DateTimeUtils.parse_date(
+                row.get("surveillance_sdd_change_date")
+            ),
+            "lynch_due_date": DateTimeUtils.parse_date(
+                row.get("lynch_screening_due_date")
+            ),
+            "lynch_due_date_change_reason_id": row.get(
+                "lynch_sdd_reason_for_change_id"
+            ),
+            "lynch_due_date_change_date": DateTimeUtils.parse_date(
+                row.get("lynch_sdd_change_date")
+            ),
+            "calculated_lynch_due_date": DateTimeUtils.parse_date(
+                row.get("lynch_calculated_sdd")
+            ),
+            "date_of_birth": DateTimeUtils.parse_date(row.get("date_of_birth")),
+            "date_of_death": DateTimeUtils.parse_date(row.get("date_of_death")),
+        }
+
+        return Subject(**field_map)
diff --git a/docs/utility-guides/SubjectAssertion.md b/docs/utility-guides/SubjectAssertion.md
@@ -0,0 +1,87 @@
+# Utility Guide: Subject Assertion Utility
+
+This guide explains the purpose and usage of the `subject_assertion` utility found in [`utils/subject_assertion.py`](../../utils/subject_assertion.py).
+It is designed to assert that a subject with a given NHS number matches specified criteria in the database, and provides detailed logging when criteria do not match.
+
+---
+
+## Table of Contents
+
+- [Utility Guide: Subject Assertion Utility](#utility-guide-subject-assertion-utility)
+  - [Table of Contents](#table-of-contents)
+  - [Overview](#overview)
+  - [Required Arguments](#required-arguments)
+  - [How It Works](#how-it-works)
+  - [Example Usage](#example-usage)
+  - [Behaviour Details](#behaviour-details)
+  - [Best Practices](#best-practices)
+  - [Reference](#reference)
+
+---
+
+## Overview
+
+The `subject_assertion` function is used to verify that a subject in the database matches a set of criteria.
+If the subject does not match all criteria, the function will iteratively loop through each criteria (except NHS number), logging any criteria that caused the assertion to fail.
+
+---
+
+## Required Arguments
+
+- `nhs_number` (`str`): The NHS number of the subject to check.
+- `criteria` (`dict`): A dictionary of criteria to match against the subject's attributes.
+
+---
+
+## How It Works
+
+1. The function first checks if the subject with the given NHS number matches all provided criteria.
+2. If not, it checks one criterion at a time and retries the assertion.
+3. This process continues until all criteria have been checked.
+4. If a match is found only after removing criteria, the failed criteria are logged.
+5. The function returns `True` only if all criteria match on the first attempt; otherwise, it returns `False`.
+
+---
+
+## Example Usage
+
+Below are examples of how to use `subject_assertion` in your tests:
+
+```python
+import pytest
+from utils.subject_assertion import subject_assertion
+
+pytestmark = [pytest.mark.utils_local]
+
+def test_subject_assertion_true():
+    nhs_number = "9233639266"
+    criteria = {"screening status": "Inactive", "subject age": "> 28"}
+    assert subject_assertion(nhs_number, criteria) is True
+```
+
+See `tests_utils/test_subject_assertion_util.py` for more examples.
+
+---
+
+## Behaviour Details
+
+- The function always keeps the NHS number criterion.
+- If a match is found only after removing criteria, the failed criteria are logged in the format:
+  - Failed criteria: Key: 'key1', Value: 'value1'
+- The function will only return `True` if all criteria match on the first attempt.
+
+---
+
+## Best Practices
+
+- Use this utility to validate subject data in database-driven tests.
+- Review logs for failed criteria to diagnose why assertions did not pass.
+- Always provide the NHS number as part of your criteria.
+
+---
+
+## Reference
+
+- [`utils/subject_assertion.py`](../../utils/subject_assertion.py)
+- [`tests_utils/test_subject_assertion_util.py`](../../tests_utils/test_subject_assertion_util.py)
+- [SubjectSelectionQueryBuilder Utility Guide](SubjectSelectionQueryBuilder.md)
diff --git a/tests_utils/test_subject_assertion_util.py b/tests_utils/test_subject_assertion_util.py
@@ -0,0 +1,29 @@
+import pytest
+from utils.subject_assertion import subject_assertion
+
+pytestmark = [pytest.mark.utils_local]
+
+nhs_number = "9233639266"
+
+
+def test_subject_assertion_true():
+    criteria = {"screening status": "Inactive", "subject age": "> 28"}
+    assert subject_assertion(nhs_number, criteria) is True
+
+
+def test_subject_assertion_false():
+    criteria = {"screening status": "Call", "subject age": "< 28"}
+    assert subject_assertion(nhs_number, criteria) is False
+
+
+def test_subject_assertion_false_with_some_true():
+    criteria = {
+        "screening status": "Inactive",
+        "subject age": "> 28",
+        "latest episode type": "FOBT",
+        "latest episode status": "Open",
+        "latest episode has referral date": "Past",
+        "latest episode has diagnosis date": "No",
+        "latest episode diagnosis date reason": "NULL",
+    }
+    assert subject_assertion(nhs_number, criteria) is False
diff --git a/utils/date_time_utils.py b/utils/date_time_utils.py
@@ -1,5 +1,6 @@
-from datetime import datetime, timedelta
-from typing import Optional
+from datetime import datetime, timedelta, date
+from typing import Optional, Union
+import pandas as pd
 import random
 
 
@@ -152,3 +153,59 @@ def generate_unique_weekday_date(start_year: int = 2025) -> str:
             base_date += timedelta(days=1)
 
         return base_date.strftime("%d/%m/%Y")
+
+    @staticmethod
+    def parse_date(
+        val: Optional[Union[pd.Timestamp, str, datetime, date]],
+    ) -> Optional[date]:
+        """
+        Converts a value to a Python date object if possible.
+
+        Args:
+            val: The value to convert (can be pandas.Timestamp, string, datetime, date, or None).
+
+        Returns:
+            Optional[date]: The converted date object, or None if conversion fails.
+        """
+        if pd.isnull(val):
+            return None
+        if isinstance(val, pd.Timestamp):
+            return val.to_pydatetime().date()
+        if isinstance(val, str):
+            try:
+                return datetime.strptime(val[:10], "%Y-%m-%d").date()
+            except Exception:
+                return None
+        if isinstance(val, datetime):
+            return val.date()
+        if isinstance(val, date):
+            return val
+        return None
+
+    @staticmethod
+    def parse_datetime(
+        val: Optional[Union[pd.Timestamp, str, datetime, date]],
+    ) -> Optional[datetime]:
+        """
+        Converts a value to a Python datetime object if possible.
+
+        Args:
+            val: The value to convert (can be pandas.Timestamp, string, datetime, or None).
+
+        Returns:
+            Optional[datetime]: The converted datetime object, or None if conversion fails.
+        """
+        if pd.isnull(val):
+            return None
+        if isinstance(val, pd.Timestamp):
+            return val.to_pydatetime()
+        if isinstance(val, str):
+            for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
+                try:
+                    return datetime.strptime(val[:19], fmt)
+                except Exception:
+                    continue
+            return None
+        if isinstance(val, datetime):
+            return val
+        return None
diff --git a/utils/subject_assertion.py b/utils/subject_assertion.py
@@ -0,0 +1,75 @@
+from utils.oracle.subject_selection_query_builder import SubjectSelectionQueryBuilder
+from utils.oracle.oracle import OracleDB
+from classes.subject import Subject
+from classes.user import User
+import logging
+
+
+def subject_assertion(nhs_number: str, criteria: dict) -> bool:
+    """
+    Asserts that a subject with the given NHS number exists and matches the provided criteria.
+    Args:
+        nhs_number (str): The NHS number of the subject to find.
+        criteria (dict): A dictionary of criteria to match against the subject's attributes.
+    Returns:
+        bool: True if the subject matches the provided criteria, False if it does not.
+    """
+    nhs_number_string = "nhs number"
+    subject_nhs_number_string = "subject_nhs_number"
+    nhs_no_criteria = {nhs_number_string: nhs_number}
+    subject = Subject()
+    user = User()
+    builder = SubjectSelectionQueryBuilder()
+
+    query, bind_vars = builder.build_subject_selection_query(
+        criteria=nhs_no_criteria,
+        user=user,
+        subject=subject,
+        subjects_to_retrieve=1,
+    )
+
+    subject_df = OracleDB().execute_query(query, bind_vars)
+    subject = Subject.from_dataframe_row(subject_df.iloc[0])
+
+    criteria[nhs_number_string] = nhs_number
+
+    # Check all criteria together first
+    query, bind_vars = builder.build_subject_selection_query(
+        criteria=criteria,
+        user=user,
+        subject=subject,
+        subjects_to_retrieve=1,
+    )
+    df = OracleDB().execute_query(query, bind_vars)
+    if nhs_number in df[subject_nhs_number_string].values:
+        return True
+
+    # Check each criterion independently
+    failed_criteria = []
+    criteria_keys = [key for key in criteria if key != nhs_number_string]
+    for key in criteria_keys:
+        single_criteria = {nhs_number_string: nhs_number, key: criteria[key]}
+        query, bind_vars = builder.build_subject_selection_query(
+            criteria=single_criteria,
+            user=user,
+            subject=subject,
+            subjects_to_retrieve=1,
+        )
+        df = OracleDB().execute_query(query, bind_vars)
+        if (
+            subject_nhs_number_string not in df.columns
+            or nhs_number not in df[subject_nhs_number_string].values
+        ):
+            failed_criteria.append((key, criteria[key]))
+
+    if failed_criteria:
+        log_message = "Subject Assertion Failed\nFailed criteria:\n" + "\n".join(
+            [f"{key}, {value}" for key, value in failed_criteria]
+        )
+        logging.error(log_message)
+    else:
+        logging.error(
+            "Subject Assertion Failed: Criteria combination is invalid or conflicting."
+        )
+
+    return False