Skip to content

Commit b24a95a

Browse files
authored
Merge pull request #798 from NHSDigital/DTOSS11769
Remove retries for failed but recoverable message batches
2 parents 9655825 + e4e1406 commit b24a95a

File tree

12 files changed

+56
-628
lines changed

12 files changed

+56
-628
lines changed

infrastructure/modules/container-apps/jobs.tf

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,6 @@ locals {
2828
job_short_name = "smb"
2929
job_container_args = "send_message_batch"
3030
}
31-
retry_failed_message_batch = {
32-
# cron_expression = "0,30 9-12 * * 1-5"
33-
cron_expression = null
34-
environment_variables = {
35-
API_OAUTH_TOKEN_URL = var.api_oauth_token_url
36-
NHS_NOTIFY_API_MESSAGE_BATCH_URL = var.nhs_notify_api_message_batch_url
37-
RETRY_QUEUE_NAME = "notifications-message-batch-retries"
38-
}
39-
job_short_name = "rmb"
40-
job_container_args = "retry_failed_message_batch"
41-
}
4231
save_message_status = {
4332
# cron_expression = "0,30 * * * *"
4433
cron_expression = null
@@ -69,7 +58,6 @@ locals {
6958
collect_metrics = {
7059
cron_expression = "*/5 * * * *"
7160
environment_variables = {
72-
RETRY_QUEUE_NAME = "notifications-message-batch-retries"
7361
STATUS_UPDATES_QUEUE_NAME = "notifications-message-status-updates"
7462
ENVIRONMENT = var.environment
7563
}

manage_breast_screening/notifications/management/commands/collect_metrics.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@
1111
class Command(BaseCommand):
1212
def handle(self, *args, **options):
1313
try:
14-
# Set queue_size metrics
15-
for queue in [Queue.RetryMessageBatches(), Queue.MessageStatusUpdates()]:
16-
Metrics().set_gauge_value(
17-
f"queue_size_{queue.queue_name}",
18-
"messages",
19-
"Queue length",
20-
queue.get_message_count(),
21-
)
14+
queue = Queue.MessageStatusUpdates()
15+
Metrics().set_gauge_value(
16+
f"queue_size_{queue.queue_name}",
17+
"messages",
18+
"Queue length",
19+
queue.get_message_count(),
20+
)
2221

2322
except Exception as e:
2423
logger.error(e, exc_info=True)

manage_breast_screening/notifications/management/commands/helpers/message_batch_helpers.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
import logging
32
import re
43
from datetime import datetime
@@ -15,7 +14,6 @@
1514
from manage_breast_screening.notifications.services.application_insights_logging import (
1615
ApplicationInsightsLogging,
1716
)
18-
from manage_breast_screening.notifications.services.queue import Queue
1917

2018
logger = logging.getLogger(__name__)
2119

@@ -100,23 +98,14 @@ def process_validation_errors(message_batch: MessageBatch, retry_count: int = 0)
10098

10199
message.save()
102100

103-
message_batch.status = MessageBatchStatusChoices.FAILED_RECOVERABLE.value
101+
message_batch.status = MessageBatchStatusChoices.FAILED_UNRECOVERABLE.value
104102
message_batch.save()
105103

106-
logger.info(
107-
"Adding MessageBatch %s to retry queue after validation failure",
104+
logger.error(
105+
"MessageBatch %s failed to send.",
108106
message_batch.id,
109107
)
110108

111-
Queue.RetryMessageBatches().add(
112-
json.dumps(
113-
{
114-
"message_batch_id": str(message_batch.id),
115-
"retry_count": retry_count,
116-
}
117-
)
118-
)
119-
120109
@staticmethod
121110
def process_unrecoverable_batch(message_batch: MessageBatch):
122111
message_batch.status = MessageBatchStatusChoices.FAILED_UNRECOVERABLE.value
@@ -132,19 +121,15 @@ def process_unrecoverable_batch(message_batch: MessageBatch):
132121

133122
@staticmethod
134123
def process_recoverable_batch(message_batch: MessageBatch, retry_count: int):
135-
message_batch.status = MessageBatchStatusChoices.FAILED_RECOVERABLE.value
124+
message_batch.status = MessageBatchStatusChoices.FAILED_UNRECOVERABLE.value
136125
message_batch.save()
137126

138-
logger.info(
139-
"Adding MessageBatch %s to retry queue after recoverable failure",
140-
message_batch.id,
141-
)
127+
for message in message_batch.messages.all():
128+
message.status = MessageStatusChoices.FAILED.value
129+
message.sent_at = datetime.now(tz=ZONE_INFO)
130+
message.save()
142131

143-
Queue.RetryMessageBatches().add(
144-
json.dumps(
145-
{
146-
"message_batch_id": str(message_batch.id),
147-
"retry_count": retry_count,
148-
}
149-
)
132+
logger.error(
133+
"MessageBatch %s failed to send.",
134+
message_batch.id,
150135
)

manage_breast_screening/notifications/management/commands/retry_failed_message_batch.py

Lines changed: 0 additions & 93 deletions
This file was deleted.

manage_breast_screening/notifications/services/queue.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,3 @@ def MessageStatusUpdates(cls):
6969
"STATUS_UPDATES_QUEUE_NAME", "notifications-message-status-updates"
7070
)
7171
)
72-
73-
@classmethod
74-
def RetryMessageBatches(cls):
75-
return cls(os.getenv("RETRY_QUEUE_NAME", "notifications-message-batch-retries"))

manage_breast_screening/notifications/tests/end_to_end/test_end_to_end.py

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from manage_breast_screening.notifications.management.commands.create_appointments import (
1515
Command as CreateAppointments,
1616
)
17-
from manage_breast_screening.notifications.management.commands.retry_failed_message_batch import (
18-
Command as RetryFailedMessageBatch,
19-
)
2017
from manage_breast_screening.notifications.management.commands.save_message_status import (
2118
Command as SaveMessageStatus,
2219
)
@@ -29,11 +26,9 @@
2926
from manage_breast_screening.notifications.models import (
3027
Appointment,
3128
ChannelStatus,
32-
Message,
3329
MessageBatch,
3430
MessageBatchStatusChoices,
3531
MessageStatus,
36-
MessageStatusChoices,
3732
)
3833
from manage_breast_screening.notifications.services.queue import Queue
3934
from manage_breast_screening.notifications.tests.integration.helpers import Helpers
@@ -67,7 +62,6 @@ def setup_environment(self):
6762
f"{os.path.dirname(os.path.realpath(__file__))}/ABC_20250302091221_APPT_100.dat"
6863
)
6964
Queue.MessageStatusUpdates().client.clear_messages()
70-
Queue.RetryMessageBatches().client.clear_messages()
7165

7266
@pytest.mark.django_db
7367
def test_all_commands_in_sequence(self, mock_jwt_encode):
@@ -88,55 +82,11 @@ def test_all_commands_in_sequence(self, mock_jwt_encode):
8882

8983
assert appointments.count() == 3
9084

91-
# Respond with a 408 recoverable error
92-
self.env.set(
93-
"NHS_NOTIFY_API_MESSAGE_BATCH_URL",
94-
"http://localhost:8888/message/batch/recoverable-error",
95-
)
96-
97-
SendMessageBatch().handle()
98-
99-
failed_batches = MessageBatch.objects.filter(
100-
status=MessageBatchStatusChoices.FAILED_RECOVERABLE.value
101-
)
102-
103-
assert failed_batches.count() == 1
104-
105-
messages = Message.objects.filter(
106-
batch=failed_batches.first(),
107-
)
108-
109-
assert messages.count() == 3
110-
111-
# Respond with a 400 validation error
112-
self.env.set(
113-
"NHS_NOTIFY_API_MESSAGE_BATCH_URL",
114-
"http://localhost:8888/message/batch/validation-error",
115-
)
116-
117-
RetryFailedMessageBatch().handle()
118-
119-
failed_batches = MessageBatch.objects.filter(
120-
status=MessageBatchStatusChoices.FAILED_RECOVERABLE.value
121-
)
122-
123-
assert failed_batches.first().messages.count() == 2
124-
125-
invalid_messages = Message.objects.filter(
126-
status=MessageStatusChoices.FAILED.value
127-
).all()
128-
assert invalid_messages.count() == 1
129-
assert (
130-
invalid_messages.first().nhs_notify_errors[0]["code"]
131-
== "CM_INVALID_NHS_NUMBER"
132-
)
133-
134-
# Respond with 201 success
13585
self.env.set(
13686
"NHS_NOTIFY_API_MESSAGE_BATCH_URL", "http://localhost:8888/message/batch"
13787
)
13888

139-
RetryFailedMessageBatch().handle()
89+
SendMessageBatch().handle()
14090

14191
sent_batches = MessageBatch.objects.filter(
14292
status=MessageBatchStatusChoices.SENT.value
@@ -145,11 +95,10 @@ def test_all_commands_in_sequence(self, mock_jwt_encode):
14595
assert sent_batches.count() == 1
14696
sent_batch = sent_batches.first()
14797
sent_messages = sent_batch.messages.all()
148-
assert sent_messages.count() == 2
98+
assert sent_messages.count() == 3
14999
appointments_from_sent_messages = [m.appointment for m in sent_messages]
150100

151-
assert appointments_from_sent_messages == [appointments[1], appointments[2]]
152-
assert len(Queue.RetryMessageBatches().peek()) == 0
101+
assert appointments_from_sent_messages == list(appointments)
153102

154103
self.make_callbacks(sent_messages)
155104

0 commit comments

Comments
 (0)