-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Nats consumer retry and replicas #9975
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -111,6 +111,7 @@ public async Task Initialize(CancellationToken cancellationToken = default) | |
| var streamConfig = new StreamConfig(this._options.StreamName, [$"{this._providerName}.>"]) | ||
| { | ||
| Retention = StreamConfigRetention.Workqueue, | ||
| NumReplicas = this._options.NumReplicas, | ||
| SubjectTransform = new SubjectTransform | ||
|
Comment on lines
111
to
115
|
||
| { | ||
| Src = $"{this._providerName}.*.*", | ||
|
|
@@ -125,6 +126,28 @@ public async Task Initialize(CancellationToken cancellationToken = default) | |
| { | ||
| // ignore, stream already exists | ||
| } | ||
| catch (NatsJSApiException e) when (e.Error.ErrCode == 10058) | ||
| { | ||
| // Stream exists with different config — attempt in-place update | ||
| // (safe for NumReplicas changes; NATS allows replica count upgrades) | ||
| this._logger.LogInformation( | ||
| "Stream {Stream} exists with different config — updating.", | ||
| this._options.StreamName); | ||
|
|
||
| var streamConfig = new StreamConfig(this._options.StreamName, [$"{this._providerName}.>"]) | ||
| { | ||
| Retention = StreamConfigRetention.Workqueue, | ||
| NumReplicas = this._options.NumReplicas, | ||
| SubjectTransform = new SubjectTransform | ||
| { | ||
| Src = $"{this._providerName}.*.*", | ||
| Dest = | ||
| @$"{this._providerName}.{{{{partition({this._options.PartitionCount},1,2)}}}}.{{{{wildcard(1)}}}}.{{{{wildcard(2)}}}}" | ||
| } | ||
| }; | ||
|
|
||
| await this._natsContext.UpdateStreamAsync(streamConfig, cancellationToken); | ||
| } | ||
|
|
||
| this._logger.LogTrace( | ||
| "Initialized to NATS JetStream stream {Stream} on server {NatsServer}", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,10 +38,30 @@ internal sealed class NatsStreamConsumer( | |
| { | ||
| if (this._consumer is null) | ||
| { | ||
| this._logger.LogError( | ||
| "Internal NATS Consumer is not initialized. Provider: {Provider} | Stream: {Stream} | Partition: {Partition}.", | ||
| provider, stream, partition); | ||
| return ([], 0); | ||
| // Lazy retry: attempt re-initialization on each poll cycle. | ||
| // This handles transient failures during initial Initialize() | ||
| // (leader election, timeout, network blip). | ||
| try | ||
| { | ||
| this._logger.LogWarning( | ||
| "NATS Consumer not initialized — attempting re-initialization. Provider: {Provider} | Stream: {Stream} | Partition: {Partition}.", | ||
| provider, stream, partition); | ||
|
|
||
| await Initialize(cancellationToken); | ||
|
Comment on lines
+46
to
+50
|
||
| } | ||
| catch (Exception ex) | ||
| { | ||
| this._logger.LogWarning(ex, | ||
| "NATS Consumer re-initialization failed. Provider: {Provider} | Stream: {Stream} | Partition: {Partition}. Will retry on next poll.", | ||
| provider, stream, partition); | ||
| return ([], 0); | ||
| } | ||
|
|
||
| // If still null after retry, bail (next poll will retry again) | ||
| if (this._consumer is null) | ||
| { | ||
| return ([], 0); | ||
| } | ||
| } | ||
|
|
||
| var batchCount = messageCount > 0 && messageCount < batchSize ? messageCount : batchSize; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| using NATS.Client.Core; | ||
| using NATS.Client.JetStream; | ||
| using NATS.Client.JetStream.Models; | ||
| using Orleans.Runtime; | ||
| using Orleans.Streaming.NATS; | ||
| using TestExtensions; | ||
| using Xunit; | ||
|
|
||
| namespace NATS.Tests; | ||
|
|
||
| [TestCategory("NATS")] | ||
| public sealed class NatsOptionsTests | ||
| { | ||
| [Fact] | ||
| public void DefaultNumReplicas_ShouldBeOne() | ||
| { | ||
| var options = new NatsOptions(); | ||
|
|
||
| Assert.Equal(1, options.NumReplicas); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData(0)] | ||
| [InlineData(-1)] | ||
| [InlineData(-100)] | ||
| public void Validator_InvalidNumReplicas_ShouldThrow(int numReplicas) | ||
| { | ||
| var options = new NatsOptions | ||
| { | ||
| StreamName = "test-stream", | ||
| NumReplicas = numReplicas | ||
| }; | ||
|
|
||
| var validator = new NatsStreamOptionsValidator(options, "test-provider"); | ||
|
|
||
| Assert.Throws<OrleansConfigurationException>(validator.ValidateConfiguration); | ||
| } | ||
|
|
||
| [Theory] | ||
| [InlineData(1)] | ||
| [InlineData(3)] | ||
| [InlineData(5)] | ||
| public void Validator_ValidNumReplicas_ShouldNotThrow(int numReplicas) | ||
| { | ||
| var options = new NatsOptions | ||
| { | ||
| StreamName = "test-stream", | ||
| NumReplicas = numReplicas | ||
| }; | ||
|
|
||
| var validator = new NatsStreamOptionsValidator(options, "test-provider"); | ||
|
|
||
| validator.ValidateConfiguration(); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void Validator_MissingStreamName_ShouldThrow() | ||
| { | ||
| var options = new NatsOptions | ||
| { | ||
| StreamName = null!, | ||
| NumReplicas = 1 | ||
| }; | ||
|
|
||
| var validator = new NatsStreamOptionsValidator(options, "test-provider"); | ||
|
|
||
| Assert.Throws<OrleansConfigurationException>(validator.ValidateConfiguration); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void Validator_EmptyStreamName_ShouldThrow() | ||
| { | ||
| var options = new NatsOptions | ||
| { | ||
| StreamName = " ", | ||
| NumReplicas = 1 | ||
| }; | ||
|
|
||
| var validator = new NatsStreamOptionsValidator(options, "test-provider"); | ||
|
|
||
| Assert.Throws<OrleansConfigurationException>(validator.ValidateConfiguration); | ||
| } | ||
|
|
||
| [SkippableFact] | ||
| public async Task NumReplicas_IsAppliedToJetStreamConfig() | ||
| { | ||
| if (!NatsTestConstants.IsNatsAvailable) | ||
| { | ||
| throw new SkipException("Nats Server is not available"); | ||
| } | ||
|
|
||
| var streamName = $"test-replicas-{Guid.NewGuid()}"; | ||
| await using var natsConnection = new NatsConnection(); | ||
| var natsContext = new NatsJSContext(natsConnection); | ||
|
|
||
| await natsConnection.ConnectAsync(); | ||
|
|
||
| try | ||
| { | ||
| var streamConfig = new StreamConfig(streamName, [$"test-replicas-provider.>"]) | ||
| { | ||
| Retention = StreamConfigRetention.Workqueue, | ||
| NumReplicas = 1 | ||
| }; | ||
|
Comment on lines
+100
to
+104
|
||
|
|
||
| var stream = await natsContext.CreateStreamAsync(streamConfig); | ||
| var info = stream.Info; | ||
|
|
||
| Assert.Equal(1, info.Config.NumReplicas); | ||
| } | ||
| finally | ||
| { | ||
| try | ||
| { | ||
| var stream = await natsContext.GetStreamAsync(streamName); | ||
| await stream.DeleteAsync(); | ||
| } | ||
| catch (NatsJSApiException) | ||
| { | ||
| // Ignore cleanup errors | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // NOTE: Testing NumReplicas > 1 (e.g. R3) requires a multi-node NATS JetStream | ||
| // cluster. A single NATS node only supports NumReplicas = 1. R3 integration | ||
| // testing should be done in a CI environment with a 3-node cluster configured | ||
| // via docker-compose or similar infrastructure. | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The XML docs state
NumReplicas"must be an odd number" and "cannot exceed the number of NATS nodes", but the validator only enforces>= 1. Either relax the docs to match what is actually enforced, or add validation for the odd-number requirement (and consider how to handle/enforce the upper bound, if possible).