Skip to content

Commit 7abd009

Browse files
authored
[Service Bus] Retry when throttled with short try (Azure#50394)
* [Service Bus] Retry when throttled with short try The focus of these changes is to improve the retry logic in the Service Bus SDK when throttling occurs. Behavior has been adjusted to allow short `TryTimeout` configurations to consume retries when waiting for service throttling rather than triggering an immediate `ServiceBusy` exception.
1 parent ecf5de7 commit 7abd009

File tree

4 files changed

+175
-8
lines changed

4 files changed

+175
-8
lines changed

sdk/servicebus/Azure.Messaging.ServiceBus/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
### Other Changes
1414

15+
- Updated retry policy behavior when the service is throttling and the `TryTimeout` is shorter than the standard throttling time of 30 seconds. Previously, the operation was immediately canceled with a server busy exception. With these changes, the operation will begin consuming retry attempts while throttling until either the server busy state is cleared or all configured retry attempts are exhausted. ([#50121](https://github.com/Azure/azure-sdk-for-net/issues/50121))
16+
1517
## 7.19.0 (2025-04-08)
1618

1719
### Features Added

sdk/servicebus/Azure.Messaging.ServiceBus/src/Core/BasicRetryPolicy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ internal class BasicRetryPolicy : ServiceBusRetryPolicy
2929
private static readonly ThreadLocal<Random> RandomNumberGenerator = new ThreadLocal<Random>(() => new Random(Interlocked.Increment(ref s_randomSeed)), false);
3030

3131
/// <summary>The maximum number of seconds allowed for a <see cref="TimeSpan" />.</summary>
32-
private static double MaximumTimeSpanSeconds = TimeSpan.MaxValue.TotalSeconds;
32+
private static readonly double MaximumTimeSpanSeconds = TimeSpan.MaxValue.TotalSeconds;
3333

3434
/// <summary>
3535
/// The set of options responsible for configuring the retry

sdk/servicebus/Azure.Messaging.ServiceBus/src/Primitives/ServiceBusRetryPolicy.cs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ public abstract class ServiceBusRetryPolicy
3535
private const int ServerNotBusyState = 0; // default value of serverBusy
3636
private const int ServerBusyState = 1;
3737

38+
/// <summary>A generic retriable busy exception to use for delay calculations.</summary>
39+
private static readonly ServiceBusException DefaultServiceBusyException = new ServiceBusException(Resources.DefaultServerBusyException, ServiceBusFailureReason.ServiceBusy);
40+
3841
/// <summary>
3942
/// Determines whether or not the server returned a busy error.
4043
/// </summary>
@@ -130,17 +133,42 @@ internal async ValueTask<TResult> RunOperation<T1, TResult>(
130133
bool logTimeoutRetriesAsVerbose = false)
131134
{
132135
var failedAttemptCount = 0;
136+
var tryTimeout = CalculateTryTimeout(0);
133137

134-
TimeSpan tryTimeout = CalculateTryTimeout(0);
135138
if (IsServerBusy && tryTimeout < ServerBusyBaseSleepTime)
136139
{
137-
// We are in a server busy state before we start processing.
138-
// Since ServerBusyBaseSleepTime > remaining time for the operation, we don't wait for the entire Sleep time.
139-
await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);
140-
throw new ServiceBusException(
141-
ServerBusyExceptionMessage,
142-
ServiceBusFailureReason.ServiceBusy);
140+
while (IsServerBusy && !cancellationToken.IsCancellationRequested)
141+
{
142+
// If we are in a server busy state, we will wait for the try timeout.
143+
144+
await Task.Delay(tryTimeout, cancellationToken).ConfigureAwait(false);
145+
146+
// If the server is still busy, consider this a retry attempt and wait for the
147+
// calculated retry delay before trying again.
148+
149+
if (IsServerBusy)
150+
{
151+
++failedAttemptCount;
152+
var delay = CalculateRetryDelay(DefaultServiceBusyException, failedAttemptCount);
153+
154+
if (delay.HasValue)
155+
{
156+
Logger.RunOperationExceptionEncountered(DefaultServiceBusyException.ToString());
157+
158+
await Task.Delay(delay.Value, cancellationToken).ConfigureAwait(false);
159+
tryTimeout = CalculateTryTimeout(failedAttemptCount);
160+
}
161+
else
162+
{
163+
// If there are no retries left, then fail because the server is busy.
164+
throw new ServiceBusException(
165+
ServerBusyExceptionMessage,
166+
ServiceBusFailureReason.ServiceBusy);
167+
}
168+
}
169+
}
143170
}
171+
144172
while (!cancellationToken.IsCancellationRequested)
145173
{
146174
if (IsServerBusy)

sdk/servicebus/Azure.Messaging.ServiceBus/tests/Primitives/ServiceBusRetryPolicyTests.cs

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,67 @@ await policy.RunOperation((state, timeout, token) =>
102102
Assert.That(policy.IsServerBusy, Is.False);
103103
}
104104

105+
[Test]
106+
public void RunOperationServerBusyTryTimeoutShorterThanBaseWaitTimeThrowsAfterRetries()
107+
{
108+
var policy = new CustomServerBusyMockRetryPolicy
109+
{
110+
ServerBusyBaseSleepTime = TimeSpan.FromSeconds(30)
111+
};
112+
113+
// Set the server busy state before running the operation
114+
policy.SetServerBusyForTest();
115+
116+
var operationCallCount = 0;
117+
118+
// The operation is never actually called in the busy block, but we provide a dummy
119+
Func<object, TimeSpan, CancellationToken, ValueTask<bool>> operation = (state, timeout, token) =>
120+
{
121+
operationCallCount++;
122+
return new ValueTask<bool>(true);
123+
};
124+
125+
// Should throw after retries are exhausted
126+
var ex = Assert.ThrowsAsync<ServiceBusException>(async () =>
127+
{
128+
await policy.RunOperation(operation, null, null, CancellationToken.None);
129+
});
130+
131+
Assert.That(ex.Reason, Is.EqualTo(ServiceBusFailureReason.ServiceBusy));
132+
Assert.That(policy.CalculateRetryDelayCallCount, Is.EqualTo(CustomServerBusyMockRetryPolicy.MaxRetries + 1));
133+
}
134+
135+
[Test]
136+
public async Task RunOperationServerBusyResolvesBeforeRetriesInvokesOperation()
137+
{
138+
var policy = new ServerBusyResolvesMockRetryPolicy
139+
{
140+
ServerBusyBaseSleepTime = TimeSpan.FromMilliseconds(10)
141+
};
142+
143+
// Set the server busy state before running the operation
144+
policy.SetServerBusyForTest();
145+
146+
var operationCallCount = 0;
147+
var operationInvoked = false;
148+
149+
Func<object, TimeSpan, CancellationToken, ValueTask<bool>> operation = (state, timeout, token) =>
150+
{
151+
++operationCallCount;
152+
operationInvoked = true;
153+
return new ValueTask<bool>(true);
154+
};
155+
156+
// Should eventually invoke the operation and return true
157+
var result = await policy.RunOperation(operation, null, null, CancellationToken.None);
158+
159+
Assert.That(result, Is.True);
160+
Assert.That(operationInvoked, Is.True);
161+
Assert.That(operationCallCount, Is.EqualTo(1));
162+
Assert.That(policy.CalculateRetryDelayCallCount, Is.EqualTo(ServerBusyResolvesMockRetryPolicy.BusyResolveAfterRetries + 1));
163+
}
164+
165+
// Private test helper classes as nested types
105166
private class MockServiceBusRetryPolicy : ServiceBusRetryPolicy
106167
{
107168
public override TimeSpan CalculateTryTimeout(int attemptCount)
@@ -114,5 +175,81 @@ public override TimeSpan CalculateTryTimeout(int attemptCount)
114175
return null;
115176
}
116177
}
178+
179+
private class CustomServerBusyMockRetryPolicy : ServiceBusRetryPolicy
180+
{
181+
public const int MaxRetries = 3;
182+
183+
private int _retryCount = 0;
184+
185+
public int CalculateRetryDelayCallCount { get; private set; }
186+
187+
public void SetServerBusyForTest()
188+
{
189+
// Set the private _serverBusyState to ServerBusyState (1)
190+
var field = typeof(ServiceBusRetryPolicy).GetField("_serverBusyState", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
191+
field.SetValue(this, 1);
192+
}
193+
194+
public override TimeSpan CalculateTryTimeout(int attemptCount)
195+
{
196+
// Always return 1 second, which is less than the base sleep time
197+
return TimeSpan.FromSeconds(1);
198+
}
199+
200+
public override TimeSpan? CalculateRetryDelay(Exception lastException, int attemptCount)
201+
{
202+
++CalculateRetryDelayCallCount;
203+
204+
if (++_retryCount <= MaxRetries)
205+
{
206+
return TimeSpan.FromMilliseconds(10);
207+
}
208+
209+
return null;
210+
}
211+
}
212+
213+
private class ServerBusyResolvesMockRetryPolicy : ServiceBusRetryPolicy
214+
{
215+
public const int BusyResolveAfterRetries = 2;
216+
217+
private int _retryCount = 0;
218+
private bool _serverBusyCleared = false;
219+
220+
public int CalculateRetryDelayCallCount { get; private set; }
221+
222+
public void SetServerBusyForTest(bool isBusy = true)
223+
{
224+
var field = typeof(ServiceBusRetryPolicy).GetField("_serverBusyState", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
225+
field.SetValue(this, isBusy ? 1 : 0);
226+
}
227+
228+
public override TimeSpan CalculateTryTimeout(int attemptCount)
229+
{
230+
return TimeSpan.FromMilliseconds(1);
231+
}
232+
233+
public override TimeSpan? CalculateRetryDelay(Exception lastException, int attemptCount)
234+
{
235+
++CalculateRetryDelayCallCount;
236+
237+
if (++_retryCount <= BusyResolveAfterRetries)
238+
{
239+
return TimeSpan.FromMilliseconds(1);
240+
}
241+
242+
// Simulate server busy resolving
243+
if (!_serverBusyCleared)
244+
{
245+
SetServerBusyForTest(false); // Clear the server busy state
246+
_serverBusyCleared = true;
247+
248+
return TimeSpan.FromMilliseconds(1);
249+
}
250+
251+
return null;
252+
}
253+
}
117254
}
118255
}

0 commit comments

Comments
 (0)