NHSDigital · adrianoaru-nhs · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/classes/subject.py b/classes/subject.py
@@ -10,6 +10,7 @@
 from classes.sdd_reason_for_change_type import SDDReasonForChangeType
 from classes.ss_reason_for_change_type import SSReasonForChangeType
 from classes.ssdd_reason_for_change_type import SSDDReasonForChangeType
+import pandas as pd
 
 
 @dataclass
@@ -1241,3 +1242,114 @@ def __str__(self):
             f"datestamp={self.datestamp}"
             f"]"
         )
+
+    @staticmethod
+    def from_dataframe_row(row: pd.Series) -> "Subject":
+        """
+        Populates a Subject object from a pandas DataFrame row.
+        Handles type conversions for dates and datetimes.
+        Only fields present in the SQL query are populated.
+        """
+
+        field_map = {
+            "screening_subject_id": row.get("screening_subject_id"),
+            "nhs_number": row.get("subject_nhs_number"),
+            "surname": row.get("person_family_name"),
+            "forename": row.get("person_given_name"),
+            "datestamp": Subject.parse_datetime(row.get("datestamp")),
+            "screening_status_id": row.get("screening_status_id"),
+            "screening_status_change_reason_id": row.get("ss_reason_for_change_id"),
+            "screening_status_change_date": Subject.parse_date(
+                row.get("screening_status_change_date")
+            ),
+            "screening_due_date": Subject.parse_date(row.get("screening_due_date")),
+            "screening_due_date_change_reason_id": row.get("sdd_reason_for_change_id"),
+            "screening_due_date_change_date": Subject.parse_date(
+                row.get("sdd_change_date")
+            ),
+            "calculated_screening_due_date": Subject.parse_date(
+                row.get("calculated_sdd")
+            ),
+            "surveillance_screening_due_date": Subject.parse_date(
+                row.get("surveillance_screen_due_date")
+            ),
+            "calculated_surveillance_due_date": Subject.parse_date(
+                row.get("calculated_ssdd")
+            ),
+            "surveillance_due_date_change_reason_id": row.get(
+                "surveillance_sdd_rsn_change_id"
+            ),
+            "surveillance_due_date_change_date": Subject.parse_date(
+                row.get("surveillance_sdd_change_date")
+            ),
+            "lynch_due_date": Subject.parse_date(row.get("lynch_screening_due_date")),
+            "lynch_due_date_change_reason_id": row.get(
+                "lynch_sdd_reason_for_change_id"
+            ),
+            "lynch_due_date_change_date": Subject.parse_date(
+                row.get("lynch_sdd_change_date")
+            ),
+            "calculated_lynch_due_date": Subject.parse_date(
+                row.get("lynch_calculated_sdd")
+            ),
+            "date_of_birth": Subject.parse_date(row.get("date_of_birth")),
+            "date_of_death": Subject.parse_date(row.get("date_of_death")),
+        }
+
+        return Subject(**field_map)
+
+    @staticmethod
+    def parse_date(
+        val: Optional[Union[pd.Timestamp, str, datetime, date]],
+    ) -> Optional[date]:
+        """
+        Converts a value to a Python date object if possible.
+
+        Args:
+            val: The value to convert (can be pandas.Timestamp, string, datetime, date, or None).
+
+        Returns:
+            Optional[date]: The converted date object, or None if conversion fails.
+        """
+        if pd.isnull(val):
+            return None
+        if isinstance(val, pd.Timestamp):
+            return val.to_pydatetime().date()
+        if isinstance(val, str):
+            try:
+                return datetime.strptime(val[:10], "%Y-%m-%d").date()
+            except Exception:
+                return None
+        if isinstance(val, datetime):
+            return val.date()
+        if isinstance(val, date):
+            return val
+        return None
+
+    @staticmethod
+    def parse_datetime(
+        val: Optional[Union[pd.Timestamp, str, datetime, date]],
+    ) -> Optional[datetime]:
+        """
+        Converts a value to a Python datetime object if possible.
+
+        Args:
+            val: The value to convert (can be pandas.Timestamp, string, datetime, or None).
+
+        Returns:
+            Optional[datetime]: The converted datetime object, or None if conversion fails.
+        """
+        if pd.isnull(val):
+            return None
+        if isinstance(val, pd.Timestamp):
+            return val.to_pydatetime()
+        if isinstance(val, str):
+            for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"):
+                try:
+                    return datetime.strptime(val[:19], fmt)
+                except Exception:
+                    continue
+            return None
+        if isinstance(val, datetime):
+            return val
+        return None
diff --git a/docs/utility-guides/SubjectAssertion.md b/docs/utility-guides/SubjectAssertion.md
@@ -0,0 +1,87 @@
+# Utility Guide: Subject Assertion Utility
+
+This guide explains the purpose and usage of the `subject_assertion` utility found in [`utils/subject_assertion.py`](../../utils/subject_assertion.py).
+It is designed to assert that a subject with a given NHS number matches specified criteria in the database, and provides detailed logging when criteria do not match.
+
+---
+
+## Table of Contents
+
+- [Utility Guide: Subject Assertion Utility](#utility-guide-subject-assertion-utility)
+  - [Table of Contents](#table-of-contents)
+  - [Overview](#overview)
+  - [Required Arguments](#required-arguments)
+  - [How It Works](#how-it-works)
+  - [Example Usage](#example-usage)
+  - [Behaviour Details](#behaviour-details)
+  - [Best Practices](#best-practices)
+  - [Reference](#reference)
+
+---
+
+## Overview
+
+The `subject_assertion` function is used to verify that a subject in the database matches a set of criteria.
+If the subject does not match all criteria, the function will iteratively loop through each criteria (except NHS number), logging any criteria that caused the assertion to fail.
+
+---
+
+## Required Arguments
+
+- `nhs_number` (`str`): The NHS number of the subject to check.
+- `criteria` (`dict`): A dictionary of criteria to match against the subject's attributes.
+
+---
+
+## How It Works
+
+1. The function first checks if the subject with the given NHS number matches all provided criteria.
+2. If not, it removes checks one criterion at a time and retries the assertion.
+3. This process continues until all criteria have been checked.
+4. If a match is found only after removing criteria, the failed criteria are logged.
+5. The function returns `True` only if all criteria match on the first attempt; otherwise, it returns `False`.
+
+---
+
+## Example Usage
+
+Below are examples of how to use `subject_assertion` in your tests:
+
+```python
+import pytest
+from utils.subject_assertion import subject_assertion
+
+pytestmark = [pytest.mark.utils_local]
+
+def test_subject_assertion_true():
+    nhs_number = "9233639266"
+    criteria = {"screening status": "Inactive", "subject age": "> 28"}
+    assert subject_assertion(nhs_number, criteria) is True
+```
+
+See `tests_utils/test_subject_assertion_util.py` for more examples.
+
+---
+
+## Behaviour Details
+
+- The function always keeps the NHS number criterion.
+- If a match is found only after removing criteria, the failed criteria are logged in the format:
+  - Failed criteria: Key: 'key1', Value: 'value1'
+- The function will only return `True` if all criteria match on the first attempt.
+
+---
+
+## Best Practices
+
+- Use this utility to validate subject data in database-driven tests.
+- Review logs for failed criteria to diagnose why assertions did not pass.
+- Always provide the NHS number as part of your criteria.
+
+---
+
+## Reference
+
+- [`utils/subject_assertion.py`](../../utils/subject_assertion.py)
+- [`tests_utils/test_subject_assertion_util.py`](../../tests_utils/test_subject_assertion_util.py)
+- [SubjectSelectionQueryBuilder Utility Guide](SubjectSelectionQueryBuilder.md)
diff --git a/tests_utils/test_subject_assertion_util.py b/tests_utils/test_subject_assertion_util.py
@@ -0,0 +1,29 @@
+import pytest
+from utils.subject_assertion import subject_assertion
+
+pytestmark = [pytest.mark.utils_local]
+
+nhs_number = "9233639266"
+
+
+def test_subject_assertion_true():
+    criteria = {"screening status": "Inactive", "subject age": "> 28"}
+    assert subject_assertion(nhs_number, criteria) is True
+
+
+def test_subject_assertion_false():
+    criteria = {"screening status": "Call", "subject age": "< 28"}
+    assert subject_assertion(nhs_number, criteria) is False
+
+
+def test_subject_assertion_false_with_some_true():
+    criteria = {
+        "screening status": "Inactive",
+        "subject age": "> 28",
+        "latest episode type": "FOBT",
+        "latest episode status": "Open",
+        "latest episode has referral date": "Past",
+        "latest episode has diagnosis date": "No",
+        "latest episode diagnosis date reason": "NULL",
+    }
+    assert subject_assertion(nhs_number, criteria) is False
diff --git a/utils/subject_assertion.py b/utils/subject_assertion.py
@@ -0,0 +1,75 @@
+from utils.oracle.subject_selection_query_builder import SubjectSelectionQueryBuilder
+from utils.oracle.oracle import OracleDB
+from classes.subject import Subject
+from classes.user import User
+import logging
+
+
+def subject_assertion(nhs_number: str, criteria: dict) -> bool:
+    """
+    Asserts that a subject with the given NHS number exists and matches the provided criteria.
+    Args:
+        nhs_number (str): The NHS number of the subject to find.
+        criteria (dict): A dictionary of criteria to match against the subject's attributes.
+    Returns:
+        bool: True if the subject matches the provided criteria, False if it does not.
+    """
+    nhs_number_string = "nhs number"
+    subject_nhs_number_string = "subject_nhs_number"
+    nhs_no_criteria = {nhs_number_string: nhs_number}
+    subject = Subject()
+    user = User()
+    builder = SubjectSelectionQueryBuilder()
+
+    query, bind_vars = builder.build_subject_selection_query(
+        criteria=nhs_no_criteria,
+        user=user,
+        subject=subject,
+        subjects_to_retrieve=1,
+    )
+
+    subject_df = OracleDB().execute_query(query, bind_vars)
+    subject = Subject.from_dataframe_row(subject_df.iloc[0])
+
+    criteria[nhs_number_string] = nhs_number
+
+    # Check all criteria together first
+    query, bind_vars = builder.build_subject_selection_query(
+        criteria=criteria,
+        user=user,
+        subject=subject,
+        subjects_to_retrieve=1,
+    )
+    df = OracleDB().execute_query(query, bind_vars)
+    if nhs_number in df[subject_nhs_number_string].values:
+        return True
+
+    # Check each criterion independently
+    failed_criteria = []
+    criteria_keys = [key for key in criteria if key != nhs_number_string]
+    for key in criteria_keys:
+        single_criteria = {nhs_number_string: nhs_number, key: criteria[key]}
+        query, bind_vars = builder.build_subject_selection_query(
+            criteria=single_criteria,
+            user=user,
+            subject=subject,
+            subjects_to_retrieve=1,
+        )
+        df = OracleDB().execute_query(query, bind_vars)
+        if (
+            subject_nhs_number_string not in df.columns
+            or nhs_number not in df[subject_nhs_number_string].values
+        ):
+            failed_criteria.append((key, criteria[key]))
+
+    if failed_criteria:
+        log_message = "Subject Assertion Failed\nFailed criteria:\n" + "\n".join(
+            [f"{key}, {value}" for key, value in failed_criteria]
+        )
+        logging.error(log_message)
+    else:
+        logging.error(
+            "Subject Assertion Failed: Criteria combination is invalid or conflicting."
+        )
+
+    return False