Skip to content

Commit 00c0b07

Browse files
Merge pull request #782 from NHSDigital/DTOSS-11710-sequential-extracts
[DTOSS-11710] Add sequential check for Extract
2 parents 2b4b044 + a9e9b4d commit 00c0b07

File tree

2 files changed

+96
-1
lines changed

2 files changed

+96
-1
lines changed

manage_breast_screening/notifications/management/commands/create_appointments.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
logger = getLogger(__name__)
2424

2525

26+
class ExtractValidationError(Exception):
27+
pass
28+
2629
class Command(BaseCommand):
2730
"""
2831
Django Admin command which reads NBSS appointment data from Azure blob storage
@@ -56,9 +59,11 @@ def handle(self, *args, **options):
5659
).readall()
5760

5861
data_frame = self.raw_data_to_data_frame(blob_content)
62+
63+
self.validate_extract(blob.name, blob_content)
5964

6065
extract = self.create_extract(blob.name, blob_content)
61-
66+
6267
for idx, row in data_frame.iterrows():
6368
if self.is_not_holding_clinic(row):
6469
clinic, clinic_created = self.find_or_create_clinic(row)
@@ -72,13 +77,33 @@ def handle(self, *args, **options):
7277
extract.appointments.add(appt) if appt is not None else None
7378

7479
logger.info("Processed %s rows from %s", len(data_frame), blob.name)
80+
81+
def validate_extract(self, filename: str, raw_data: str) -> None:
82+
bso_code = filename.split("/")[1].split("_")[0]
83+
type_id, extract_id, start_date, start_time, record_count = raw_data.split(
84+
"\n"
85+
)[0].split("|")
86+
formatted_extract_id = int(extract_id.replace('"', "").replace("\r", ""))
87+
88+
latest_extract = Extract.objects.filter(bso_code = bso_code).order_by("sequence_number").last()
89+
90+
if latest_extract:
91+
if formatted_extract_id != (latest_extract.sequence_number + 1):
92+
93+
log_msg = "Extract ID %s is not sequential to last extract ID %s." % (
94+
formatted_extract_id,
95+
latest_extract.sequence_number,
96+
)
97+
98+
raise ExtractValidationError(log_msg)
7599

76100
def create_extract(self, filename: str, raw_data: str) -> Extract:
77101
bso_code = filename.split("/")[1].split("_")[0]
78102
type_id, extract_id, start_date, start_time, record_count = raw_data.split(
79103
"\n"
80104
)[0].split("|")
81105
formatted_extract_id = int(extract_id.replace('"', "").replace("\r", ""))
106+
82107
formatted_record_count = int(record_count.replace('"', "").replace("\r", ""))
83108

84109
return Extract.objects.create(

manage_breast_screening/notifications/tests/management/commands/test_create_appointments.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,25 @@ def stored_blob_data(prefix_dir: str, filenames: list[str]):
5959
mock_blob_contents
6060
)
6161
yield
62+
63+
@contextmanager
64+
def mocked_blob_storage_contents(prefix_dir: str, filename: str, contents: str):
65+
with mocked_blob_storage() as mock_blob_storage:
66+
mock_container_client = (
67+
mock_blob_storage.return_value.find_or_create_container.return_value
68+
)
69+
mock_blobs = []
70+
mock_blob_contents = []
71+
mock_blob = Mock(spec=BlobProperties)
72+
mock_blob.name = f"{prefix_dir}/{filename}"
73+
mock_blobs.append(mock_blob)
74+
mock_blob_contents = contents
6275

76+
mock_container_client.list_blobs.return_value = mock_blobs
77+
mock_container_client.get_blob_client().download_blob().readall.side_effect = (
78+
mock_blob_contents
79+
)
80+
yield
6381

6482
@pytest.mark.django_db
6583
class TestCreateAppointments:
@@ -391,3 +409,55 @@ def test_errors_with_wrong_format_data(self):
391409
Command().handle(**{"date_str": today_dirname})
392410

393411
assert Extract.objects.count() == 0
412+
413+
def test_extract_id_not_sequential_previous(self, mock_insights_logger):
414+
""" Test when an extract is not sequential to the previous extract, a warning is logged """
415+
today_dirname = datetime.now().strftime("%Y-%m-%d")
416+
417+
first_filename = f"{today_dirname}/{VALID_DATA_FILE}"
418+
419+
raw_data = '"NBSSAPPT_HDR"|"00000013"|"20250128"|"170922"|"000001"'
420+
421+
previous_raw_data = '"NBSSAPPT_HDR"|"00000012"|"20250128"|"170922"|"000001"'
422+
423+
# add previous extract so we can test the non sequential extract
424+
Command().create_extract(first_filename, raw_data)
425+
426+
with mocked_blob_storage_contents(today_dirname, "ANOTHER_FILE_NAME.dat", previous_raw_data):
427+
with pytest.raises(CommandError) as error:
428+
Command().handle(**{"date_str": today_dirname})
429+
assert str(error.value) == "Extract ID 12 is not sequential to last extract ID 13."
430+
431+
assert Extract.objects.count() == 1
432+
433+
434+
def test_extract_same_bso_and_extract_id(self, mock_insights_logger):
435+
""" Test when an extract has the same extract id and bso code, a warning is logged """
436+
today_dirname = datetime.now().strftime("%Y-%m-%d")
437+
438+
filename = f"{today_dirname}/{VALID_DATA_FILE}"
439+
raw_data = '"NBSSAPPT_HDR"|"00000013"|"20250128"|"170922"|"000001"'
440+
Command().create_extract(filename, raw_data)
441+
442+
same_bso_and_extract_id_raw_data = '"NBSSAPPT_HDR"|"00000013"|"20250128"|"170922"|"000001"'
443+
444+
with mocked_blob_storage_contents(today_dirname, "ANOTHER_FILE_NAME.dat", same_bso_and_extract_id_raw_data):
445+
with pytest.raises(CommandError) as error:
446+
Command().handle(**{"date_str": today_dirname})
447+
assert str(error.value) == "Extract ID 13 is not sequential to last extract ID 13."
448+
449+
def test_extract_not_sequential_skipped_extract(self, mock_insights_logger):
450+
""" Test when an extract is not the next extract in order (i.e. skipped an extract), a warning is logged """
451+
today_dirname = datetime.now().strftime("%Y-%m-%d")
452+
filename = f"{today_dirname}/{VALID_DATA_FILE}"
453+
454+
raw_data = '"NBSSAPPT_HDR"|"00000013"|"20250128"|"170922"|"000001"'
455+
456+
Command().create_extract(filename, raw_data)
457+
458+
skip_extract_raw_data = '"NBSSAPPT_HDR"|"00000025"|"20250128"|"170922"|"000001"'
459+
460+
with mocked_blob_storage_contents(today_dirname, "ANOTHER_FILE_NAME.dat", skip_extract_raw_data):
461+
with pytest.raises(CommandError) as error:
462+
Command().handle(**{"date_str": today_dirname})
463+
assert str(error.value) == "Extract ID 25 is not sequential to last extract ID 13."

0 commit comments

Comments
 (0)