From eb975a4a61edc464da681d7009855c228cfcd31f Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Thu, 26 Jun 2025 11:03:27 -0700 Subject: [PATCH 01/74] asdf --- .../RPC/CommandInvoker.cs | 19 +++++++++++++++++++ .../RPC/ExtendedResponse.cs | 2 +- .../RPC/StreamingExtendedResponse.cs | 14 ++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 250f40fc1d..90e965603c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -685,6 +685,25 @@ public async Task> InvokeCommandAsync(TReq request, Comm } } + /// + /// Invoke the specified command. + /// + /// The payload of command request. + /// The metadata of the command request. + /// + /// The topic token replacement map to use in addition to . If this map + /// contains any keys that also has, then values specified in this map will take precedence. + /// + /// How long to wait for a command response. Note that each command executor also has a configurable timeout value that may be shorter than this value. + /// Cancellation token. + /// The command response including the command response metadata + public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + { + // user shouldn't have to do the stitching. We do it. Ordering concerns, though? + throw new NotImplementedException(); + } + + /// /// Dispose this object and the underlying mqtt client. /// diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs index d639b76a0e..d6ce35c8cc 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs @@ -7,7 +7,7 @@ namespace Azure.Iot.Operations.Protocol.RPC { - public struct ExtendedResponse + public class ExtendedResponse where TResp : class { // These two user properties are used to communicate application level errors in an RPC response message. Code is mandatory, but data is optional. diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs new file mode 100644 index 0000000000..52aefe8ab6 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System; +using System.Text.Json; +using System.Text.Json.Nodes; + +namespace Azure.Iot.Operations.Protocol.RPC +{ + public class StreamingExtendedResponse : ExtendedResponse + where TResp : class + { + } +} From 4d271628bbe443014d245202bb3f33088665ac58 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 30 Jun 2025 11:57:03 -0700 Subject: [PATCH 02/74] noodling so far --- doc/dev/adr/0023-rpc-streaming.md | 12 + .../AkriSystemProperties.cs | 8 +- .../RPC/BlockingConcurrentQueue.cs | 99 +++++++ .../RPC/CommandInvoker.cs | 250 +++--------------- .../RPC/StreamingExtendedResponse.cs | 3 + 5 files changed, 155 insertions(+), 217 deletions(-) create mode 100644 doc/dev/adr/0023-rpc-streaming.md create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs diff --git a/doc/dev/adr/0023-rpc-streaming.md b/doc/dev/adr/0023-rpc-streaming.md new file mode 100644 index 0000000000..d0fd652ddf --- /dev/null +++ b/doc/dev/adr/0023-rpc-streaming.md @@ -0,0 +1,12 @@ +# ADR 22: RPC Streaming + +## Context + + +## Decision + +## Sample model + +## Error cases + + - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? \ No newline at end of file diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 19e2ffb597..62e5b1462e 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -76,6 +76,11 @@ public static class AkriSystemProperties // TODO remove this once akri service is code gen'd to expect srcId instead of invId internal const string CommandInvokerId = ReservedPrefix + "invId"; + /// + /// Inidicates that an RPC request expects the executor to + /// + internal const string IsStreamingCommand = ReservedPrefix + "stream"; + internal static bool IsReservedUserProperty(string name) { return name.Equals(Timestamp, StringComparison.Ordinal) @@ -88,7 +93,8 @@ internal static bool IsReservedUserProperty(string name) || name.Equals(SupportedMajorProtocolVersions, StringComparison.Ordinal) || name.Equals(RequestedProtocolVersion, StringComparison.Ordinal) || name.Equals(SourceId, StringComparison.Ordinal) - || name.Equals(CommandInvokerId, StringComparison.Ordinal); + || name.Equals(CommandInvokerId, StringComparison.Ordinal) + || name.Equals(IsStreamingCommand, StringComparison.Ordinal); } } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs new file mode 100644 index 0000000000..14363c7700 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Azure.Iot.Operations.Protocol; +using System; +using System.Collections.Concurrent; +using System.Threading; + +namespace Azure.Iot.Operations.Protocol.RPC +{ + /// + /// A blocking queue that is thread safe. + /// + /// The type of all the elements in the blocking queue. + /// Note that this is a copy of the "BlockingConcurrentDelayableQueue" defined in the MQTT package, but without the "delayable" feature + internal class BlockingConcurrentQueue : IDisposable + { + private readonly ConcurrentQueue _queue; + private readonly ManualResetEventSlim _gate; + + public BlockingConcurrentQueue() + { + _queue = new ConcurrentQueue(); + _gate = new ManualResetEventSlim(false); + } + + /// + /// Delete all entries from this queue. + /// + public void Clear() + { + _queue.Clear(); + } + + public int Count => _queue.Count; + + /// + /// Enqueue the provided item. + /// + /// The item to enqueue. + public void Enqueue(T item) + { + _queue.Enqueue(item); + _gate.Set(); + } + + /// + /// Block until there is a first element in the queue and that element is ready to be dequeued then dequeue and + /// return that element. + /// + /// Cancellation token. + /// The first element in the queue. + public T Dequeue(CancellationToken cancellationToken = default) + { + while (true) + { + if (_queue.IsEmpty) + { + _gate.Reset(); + _gate.Wait(cancellationToken); + cancellationToken.ThrowIfCancellationRequested(); + continue; + } + else + { + if (_queue.TryPeek(out T? item) + && _queue.TryDequeue(out T? dequeuedItem)) + { + return dequeuedItem; + } + else + { + _gate.Reset(); + _gate.Wait(cancellationToken); + cancellationToken.ThrowIfCancellationRequested(); + continue; + } + } + } + } + + /// + /// Wakeup any blocking calls not because a new element was added to the queue, but because + /// one or more elements in the queue is now ready. + /// + /// + /// Generally, this method should be called every time an item in this queue is marked as ready. + /// + public void Signal() + { + _gate.Set(); + } + + public void Dispose() + { + _gate.Dispose(); + } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 90e965603c..06a726ff3c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -1,9 +1,11 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using Azure.Iot.Operations.Protocol.; using Azure.Iot.Operations.Protocol.Events; using Azure.Iot.Operations.Protocol.Models; using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; @@ -218,7 +220,7 @@ internal async Task SubscribeAsNeededAsync(string responseTopicFilter, Cancellat Trace.TraceInformation($"Subscribed to topic filter '{responseTopicFilter}' for command invoker '{_commandName}'"); } - private async Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEventArgs args) + private Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEventArgs args) { if (args.ApplicationMessage.CorrelationData != null && GuidExtensions.TryParseBytes(args.ApplicationMessage.CorrelationData, out Guid? requestGuid)) { @@ -228,154 +230,18 @@ private async Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEv { if (!_requestIdMap.TryGetValue(requestGuidString, out responsePromise)) { - return; + return Task.CompletedTask; } } args.AutoAcknowledge = true; if (MqttTopicProcessor.DoesTopicMatchFilter(args.ApplicationMessage.Topic, responsePromise.ResponseTopic)) { - // Assume a protocol version of 1.0 if no protocol version was specified - string? responseProtocolVersion = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.ProtocolVersion)?.Value; - if (!ProtocolVersion.TryParseProtocolVersion(responseProtocolVersion, out ProtocolVersion? protocolVersion)) - { - AkriMqttException akriException = new($"Received a response with an unparsable protocol version number: {responseProtocolVersion}") - { - Kind = AkriMqttErrorKind.UnsupportedVersion, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, - ProtocolVersion = responseProtocolVersion, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - if (!_supportedMajorProtocolVersions.Contains(protocolVersion!.MajorVersion)) - { - AkriMqttException akriException = new($"Received a response with an unsupported protocol version number: {responseProtocolVersion}") - { - Kind = AkriMqttErrorKind.UnsupportedVersion, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, - ProtocolVersion = responseProtocolVersion, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - MqttUserProperty? statusProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.Status); - - if (!TryValidateResponseHeaders(statusProperty, requestGuidString, out AkriMqttErrorKind errorKind, out string message, out string? headerName, out string? headerValue)) - { - AkriMqttException akriException = new(message) - { - Kind = errorKind, - IsShallow = false, - IsRemote = false, - HeaderName = headerName, - HeaderValue = headerValue, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - int statusCode = int.Parse(statusProperty!.Value, CultureInfo.InvariantCulture); - - if (statusCode is not ((int)CommandStatusCode.OK) and not ((int)CommandStatusCode.NoContent)) - { - MqttUserProperty? invalidNameProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyName); - MqttUserProperty? invalidValueProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyValue); - bool isApplicationError = (args.ApplicationMessage.UserProperties?.TryGetProperty(AkriSystemProperties.IsApplicationError, out string? isAppError) ?? false) && isAppError?.ToLower(CultureInfo.InvariantCulture) != "false"; - string? statusMessage = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.StatusMessage)?.Value; - - errorKind = StatusCodeToErrorKind((CommandStatusCode)statusCode, isApplicationError, invalidNameProperty != null, invalidValueProperty != null); - AkriMqttException akriException = new(statusMessage ?? "Error condition identified by remote service") - { - Kind = errorKind, - IsShallow = false, - IsRemote = true, - HeaderName = UseHeaderFields(errorKind) ? invalidNameProperty?.Value : null, - HeaderValue = UseHeaderFields(errorKind) ? invalidValueProperty?.Value : null, - PropertyName = UsePropertyFields(errorKind) ? invalidNameProperty?.Value : null, - PropertyValue = UsePropertyFields(errorKind) ? invalidValueProperty?.Value : null, - TimeoutName = UseTimeoutFields(errorKind) ? invalidNameProperty?.Value : null, - TimeoutValue = UseTimeoutFields(errorKind) ? GetAsTimeSpan(invalidValueProperty?.Value) : null, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - - if (errorKind == AkriMqttErrorKind.UnsupportedVersion) - { - MqttUserProperty? supportedMajorVersions = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.SupportedMajorProtocolVersions); - MqttUserProperty? requestProtocolVersion = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.RequestedProtocolVersion); - - if (requestProtocolVersion != null) - { - akriException.ProtocolVersion = requestProtocolVersion.Value; - } - else - { - Trace.TraceWarning("Command executor failed to provide the request's protocol version"); - } - - if (supportedMajorVersions != null - && ProtocolVersion.TryParseFromString(supportedMajorVersions!.Value, out int[]? versions)) - { - akriException.SupportedMajorProtocolVersions = versions; - } - else - { - Trace.TraceWarning("Command executor failed to provide the supported major protocol versions"); - } - } - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - TResp response; - CommandResponseMetadata responseMetadata; - try - { - response = _serializer.FromBytes(args.ApplicationMessage.Payload, args.ApplicationMessage.ContentType, args.ApplicationMessage.PayloadFormatIndicator); - responseMetadata = new CommandResponseMetadata(args.ApplicationMessage); - } - catch (Exception ex) - { - SetExceptionSafe(responsePromise.CompletionSource, ex); - return; - } - - if (responseMetadata.Timestamp != null) - { - await _applicationContext.ApplicationHlc.UpdateWithOtherAsync(responseMetadata.Timestamp); - } - else - { - Trace.TraceInformation($"No timestamp present in command response metadata."); - } - - ExtendedResponse extendedResponse = new() { Response = response, ResponseMetadata = responseMetadata }; - - if (!responsePromise.CompletionSource.TrySetResult(extendedResponse)) - { - Trace.TraceWarning("Failed to complete the command response promise. This may be because the operation was cancelled or finished with exception."); - } + responsePromise.Responses.Enqueue(args.ApplicationMessage); } } - return; + return Task.CompletedTask; } private static bool TryValidateResponseHeaders( @@ -473,6 +339,20 @@ private static bool UsePropertyFields(AkriMqttErrorKind errorKind) /// Cancellation token. /// The command response including the command response metadata public async Task> InvokeCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + { + IAsyncEnumerable> response = InvokeCommandAsync(false, request, metadata, additionalTopicTokenMap, commandTimeout, cancellationToken); + var enumerator = response.GetAsyncEnumerator(cancellationToken); + await enumerator.MoveNextAsync(); + return enumerator.Current; + } + + public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + { + // user shouldn't have to do the stitching. We do it. Ordering concerns, though? + return InvokeCommandAsync(true, request, metadata, additionalTopicTokenMap, commandTimeout, cancellationToken); + } + + private async IAsyncEnumerable> InvokeCommandAsync(bool isStreaming, TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); ObjectDisposedException.ThrowIf(_isDisposed, this); @@ -557,6 +437,11 @@ public async Task> InvokeCommandAsync(TReq request, Comm requestMessage.AddUserProperty("$partition", clientId); requestMessage.AddUserProperty(AkriSystemProperties.SourceId, clientId); + if (isStreaming) + { + requestMessage.AddUserProperty(AkriSystemProperties.IsStreamingCommand, "true"); + } + // TODO remove this once akri service is code gen'd to expect srcId instead of invId requestMessage.AddUserProperty(AkriSystemProperties.CommandInvokerId, clientId); @@ -614,65 +499,17 @@ public async Task> InvokeCommandAsync(TReq request, Comm }; } - ExtendedResponse extendedResponse; - try - { - extendedResponse = await WallClock.WaitAsync(responsePromise.CompletionSource.Task, reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); - if (responsePromise.CompletionSource.Task.IsFaulted) - { - throw responsePromise.CompletionSource.Task.Exception?.InnerException - ?? new AkriMqttException($"Command '{_commandName}' failed with unknown exception") - { - Kind = AkriMqttErrorKind.UnknownError, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - } - } - catch (TimeoutException e) - { - SetCanceledSafe(responsePromise.CompletionSource); + //TODO operationCancelled and timeout exceptions were deleted to accomodate IAsyncEnumerable. Catch them elsewhere? + // https://github.com/dotnet/roslyn/issues/39583#issuecomment-728097630 workaround? + StreamingExtendedResponse extendedResponse; - throw new AkriMqttException($"Command '{_commandName}' timed out while waiting for a response", e) - { - Kind = AkriMqttErrorKind.Timeout, - IsShallow = false, - IsRemote = false, - TimeoutName = nameof(commandTimeout), - TimeoutValue = reifiedCommandTimeout, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - } - catch (OperationCanceledException e) + // "do while" since every command should have at least one intended response, but streaming commands may have more + do { - SetCanceledSafe(responsePromise.CompletionSource); - - throw new AkriMqttException($"Command '{_commandName}' was cancelled while waiting for a response", e) - { - Kind = AkriMqttErrorKind.Cancellation, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - } + extendedResponse = await WallClock.WaitAsync(responsePromise.Responses.TryDequeue(out MqttApplicationMessage? responseMessage), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); + yield return extendedResponse; + } while (extendedResponse.StreamingResponseId != null && !extendedResponse.IsLastResponse); - return extendedResponse; - } - catch (ArgumentException ex) - { - throw new AkriMqttException(ex.Message) - { - Kind = AkriMqttErrorKind.ConfigurationInvalid, - IsShallow = true, - IsRemote = false, - PropertyName = ex.ParamName, - CommandName = _commandName, - CorrelationId = requestGuid, - }; } finally { @@ -685,25 +522,6 @@ public async Task> InvokeCommandAsync(TReq request, Comm } } - /// - /// Invoke the specified command. - /// - /// The payload of command request. - /// The metadata of the command request. - /// - /// The topic token replacement map to use in addition to . If this map - /// contains any keys that also has, then values specified in this map will take precedence. - /// - /// How long to wait for a command response. Note that each command executor also has a configurable timeout value that may be shorter than this value. - /// Cancellation token. - /// The command response including the command response metadata - public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) - { - // user shouldn't have to do the stitching. We do it. Ordering concerns, though? - throw new NotImplementedException(); - } - - /// /// Dispose this object and the underlying mqtt client. /// @@ -809,7 +627,7 @@ private sealed class ResponsePromise(string responseTopic) { public string ResponseTopic { get; } = responseTopic; - public TaskCompletionSource> CompletionSource { get; } = new TaskCompletionSource>(); + public BlockingConcurrentQueue Responses { get; } } } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs index 52aefe8ab6..a84fca5a1e 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs @@ -10,5 +10,8 @@ namespace Azure.Iot.Operations.Protocol.RPC public class StreamingExtendedResponse : ExtendedResponse where TResp : class { + public string? StreamingResponseId { get; set; } + + public bool IsLastResponse { get; set; } } } From 0163c767d101949681ce3db996f31a3184c3e0a6 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 30 Jun 2025 11:59:08 -0700 Subject: [PATCH 03/74] sadf --- .../RPC/BlockingConcurrentQueue.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs index 14363c7700..5fee7ddccc 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Concurrent; using System.Threading; +using System.Threading.Tasks; namespace Azure.Iot.Operations.Protocol.RPC { @@ -50,7 +51,7 @@ public void Enqueue(T item) /// /// Cancellation token. /// The first element in the queue. - public T Dequeue(CancellationToken cancellationToken = default) + public Task DequeueAsync(CancellationToken cancellationToken = default) { while (true) { From 73b12d7576d9c5975c7993b6d00094bc9306f382 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 30 Jun 2025 12:02:45 -0700 Subject: [PATCH 04/74] maybe --- .../RPC/BlockingConcurrentQueue.cs | 17 +++++++---------- .../RPC/CommandInvoker.cs | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs index 5fee7ddccc..3f929ffaf7 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs @@ -17,12 +17,12 @@ namespace Azure.Iot.Operations.Protocol.RPC internal class BlockingConcurrentQueue : IDisposable { private readonly ConcurrentQueue _queue; - private readonly ManualResetEventSlim _gate; + private readonly SemaphoreSlim _gate; public BlockingConcurrentQueue() { _queue = new ConcurrentQueue(); - _gate = new ManualResetEventSlim(false); + _gate = new(0, 1); } /// @@ -42,7 +42,7 @@ public void Clear() public void Enqueue(T item) { _queue.Enqueue(item); - _gate.Set(); + _gate.Release(); } /// @@ -51,15 +51,13 @@ public void Enqueue(T item) /// /// Cancellation token. /// The first element in the queue. - public Task DequeueAsync(CancellationToken cancellationToken = default) + public async Task DequeueAsync(CancellationToken cancellationToken = default) { while (true) { if (_queue.IsEmpty) { - _gate.Reset(); - _gate.Wait(cancellationToken); - cancellationToken.ThrowIfCancellationRequested(); + await _gate.WaitAsync(cancellationToken); continue; } else @@ -71,8 +69,7 @@ public Task DequeueAsync(CancellationToken cancellationToken = default) } else { - _gate.Reset(); - _gate.Wait(cancellationToken); + await _gate.WaitAsync(cancellationToken); cancellationToken.ThrowIfCancellationRequested(); continue; } @@ -89,7 +86,7 @@ public Task DequeueAsync(CancellationToken cancellationToken = default) /// public void Signal() { - _gate.Set(); + _gate.Release(); } public void Dispose() diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 06a726ff3c..49153a4f6c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -506,7 +506,7 @@ private async IAsyncEnumerable> InvokeCommandAs // "do while" since every command should have at least one intended response, but streaming commands may have more do { - extendedResponse = await WallClock.WaitAsync(responsePromise.Responses.TryDequeue(out MqttApplicationMessage? responseMessage), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); + MqttApplicationMessage mqttMessage = await WallClock.WaitAsync(responsePromise.Responses.Dequeue(cancellationToken), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); yield return extendedResponse; } while (extendedResponse.StreamingResponseId != null && !extendedResponse.IsLastResponse); From 408010492f8f54d74a47bc95ae29a32ab4f8e6fc Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 30 Jun 2025 14:05:52 -0700 Subject: [PATCH 05/74] doc --- doc/dev/adr/0023-rpc-streaming.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/dev/adr/0023-rpc-streaming.md b/doc/dev/adr/0023-rpc-streaming.md index d0fd652ddf..c7692d317a 100644 --- a/doc/dev/adr/0023-rpc-streaming.md +++ b/doc/dev/adr/0023-rpc-streaming.md @@ -9,4 +9,6 @@ ## Error cases - - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? \ No newline at end of file + - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? + - RPC executor doesn't support streaming but receives a streaming request + - timeout per response vs overall? \ No newline at end of file From 4ac40037cc66675bfd5bef84018e4439e22c2eed Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 7 Jul 2025 11:44:42 -0700 Subject: [PATCH 06/74] more --- .../RPC/CommandInvoker.cs | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 49153a4f6c..556a5d2e13 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -1,15 +1,11 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -using Azure.Iot.Operations.Protocol.; using Azure.Iot.Operations.Protocol.Events; using Azure.Iot.Operations.Protocol.Models; using System; -using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; -using System.Globalization; -using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -506,7 +502,10 @@ private async IAsyncEnumerable> InvokeCommandAs // "do while" since every command should have at least one intended response, but streaming commands may have more do { - MqttApplicationMessage mqttMessage = await WallClock.WaitAsync(responsePromise.Responses.Dequeue(cancellationToken), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); + MqttApplicationMessage mqttMessage = await WallClock.WaitAsync(responsePromise.Responses.DequeueAsync(cancellationToken), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); + + //TODO mqtt message to command response + yield return extendedResponse; } while (extendedResponse.StreamingResponseId != null && !extendedResponse.IsLastResponse); @@ -556,18 +555,6 @@ protected virtual async ValueTask DisposeAsyncCore(bool disposing) _mqttClient.ApplicationMessageReceivedAsync -= MessageReceivedCallbackAsync; - lock (_requestIdMapLock) - { - foreach (KeyValuePair responsePromise in _requestIdMap) - { - if (responsePromise.Value != null && responsePromise.Value.CompletionSource != null) - { - SetCanceledSafe(responsePromise.Value.CompletionSource); - } - } - _requestIdMap.Clear(); - } - try { if (_subscribedTopics.Count > 0) From af7c829d65b22dff2c68002d99ee5a30bc87f17c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 7 Jul 2025 11:47:30 -0700 Subject: [PATCH 07/74] asdf --- .../RPC/CommandInvoker.cs | 144 +++++++++++++++++- 1 file changed, 141 insertions(+), 3 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 556a5d2e13..36e272c159 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -1,8 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -using Azure.Iot.Operations.Protocol.Events; -using Azure.Iot.Operations.Protocol.Models; using System; using System.Collections.Generic; using System.Diagnostics; @@ -10,6 +8,8 @@ using System.Threading; using System.Threading.Tasks; using System.Xml; +using Azure.Iot.Operations.Protocol.Events; +using Azure.Iot.Operations.Protocol.Models; namespace Azure.Iot.Operations.Protocol.RPC { @@ -506,6 +506,144 @@ private async IAsyncEnumerable> InvokeCommandAs //TODO mqtt message to command response + // Assume a protocol version of 1.0 if no protocol version was specified + string? responseProtocolVersion = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.ProtocolVersion)?.Value; + if (!ProtocolVersion.TryParseProtocolVersion(responseProtocolVersion, out ProtocolVersion? protocolVersion)) + { + AkriMqttException akriException = new($"Received a response with an unparsable protocol version number: {responseProtocolVersion}") + { + Kind = AkriMqttErrorKind.UnsupportedVersion, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, + ProtocolVersion = responseProtocolVersion, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + if (!_supportedMajorProtocolVersions.Contains(protocolVersion!.MajorVersion)) + { + AkriMqttException akriException = new($"Received a response with an unsupported protocol version number: {responseProtocolVersion}") + { + Kind = AkriMqttErrorKind.UnsupportedVersion, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, + ProtocolVersion = responseProtocolVersion, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + MqttUserProperty? statusProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.Status); + + if (!TryValidateResponseHeaders(statusProperty, requestGuidString, out AkriMqttErrorKind errorKind, out string message, out string? headerName, out string? headerValue)) + { + AkriMqttException akriException = new(message) + { + Kind = errorKind, + IsShallow = false, + IsRemote = false, + HeaderName = headerName, + HeaderValue = headerValue, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + int statusCode = int.Parse(statusProperty!.Value, CultureInfo.InvariantCulture); + + if (statusCode is not ((int)CommandStatusCode.OK) and not ((int)CommandStatusCode.NoContent)) + { + MqttUserProperty? invalidNameProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyName); + MqttUserProperty? invalidValueProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyValue); + bool isApplicationError = (mqttMessage.UserProperties?.TryGetProperty(AkriSystemProperties.IsApplicationError, out string? isAppError) ?? false) && isAppError?.ToLower(CultureInfo.InvariantCulture) != "false"; + string? statusMessage = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.StatusMessage)?.Value; + + errorKind = StatusCodeToErrorKind((CommandStatusCode)statusCode, isApplicationError, invalidNameProperty != null, invalidValueProperty != null); + AkriMqttException akriException = new(statusMessage ?? "Error condition identified by remote service") + { + Kind = errorKind, + IsShallow = false, + IsRemote = true, + HeaderName = UseHeaderFields(errorKind) ? invalidNameProperty?.Value : null, + HeaderValue = UseHeaderFields(errorKind) ? invalidValueProperty?.Value : null, + PropertyName = UsePropertyFields(errorKind) ? invalidNameProperty?.Value : null, + PropertyValue = UsePropertyFields(errorKind) ? invalidValueProperty?.Value : null, + TimeoutName = UseTimeoutFields(errorKind) ? invalidNameProperty?.Value : null, + TimeoutValue = UseTimeoutFields(errorKind) ? GetAsTimeSpan(invalidValueProperty?.Value) : null, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + + if (errorKind == AkriMqttErrorKind.UnsupportedVersion) + { + MqttUserProperty? supportedMajorVersions = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.SupportedMajorProtocolVersions); + MqttUserProperty? requestProtocolVersion = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.RequestedProtocolVersion); + + if (requestProtocolVersion != null) + { + akriException.ProtocolVersion = requestProtocolVersion.Value; + } + else + { + Trace.TraceWarning("Command executor failed to provide the request's protocol version"); + } + + if (supportedMajorVersions != null + && ProtocolVersion.TryParseFromString(supportedMajorVersions!.Value, out int[]? versions)) + { + akriException.SupportedMajorProtocolVersions = versions; + } + else + { + Trace.TraceWarning("Command executor failed to provide the supported major protocol versions"); + } + } + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + TResp response; + CommandResponseMetadata responseMetadata; + try + { + response = _serializer.FromBytes(mqttMessage.Payload, mqttMessage.ContentType, .PayloadFormatIndicator); + responseMetadata = new CommandResponseMetadata(mqttMessage); + } + catch (Exception ex) + { + SetExceptionSafe(responsePromise.CompletionSource, ex); + return; + } + + if (responseMetadata.Timestamp != null) + { + await _applicationContext.ApplicationHlc.UpdateWithOtherAsync(responseMetadata.Timestamp, cancellationToken: cancellationToken); + } + else + { + Trace.TraceInformation($"No timestamp present in command response metadata."); + } + + extendedResponse = new() { Response = response, ResponseMetadata = responseMetadata }; + + if (!responsePromise.CompletionSource.TrySetResult(extendedResponse)) + { + Trace.TraceWarning("Failed to complete the command response promise. This may be because the operation was cancelled or finished with exception."); + } + yield return extendedResponse; } while (extendedResponse.StreamingResponseId != null && !extendedResponse.IsLastResponse); @@ -614,7 +752,7 @@ private sealed class ResponsePromise(string responseTopic) { public string ResponseTopic { get; } = responseTopic; - public BlockingConcurrentQueue Responses { get; } + public BlockingConcurrentQueue Responses { get; } = new(); } } } From 215eb64e36dd8aabb6b911380aff49365bb6e90d Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Thu, 17 Jul 2025 10:24:27 -0700 Subject: [PATCH 08/74] asdf --- doc/dev/adr/0023-rpc-streaming.md | 14 ---------- doc/dev/adr/0025-rpc-streaming.md | 44 +++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 14 deletions(-) delete mode 100644 doc/dev/adr/0023-rpc-streaming.md create mode 100644 doc/dev/adr/0025-rpc-streaming.md diff --git a/doc/dev/adr/0023-rpc-streaming.md b/doc/dev/adr/0023-rpc-streaming.md deleted file mode 100644 index c7692d317a..0000000000 --- a/doc/dev/adr/0023-rpc-streaming.md +++ /dev/null @@ -1,14 +0,0 @@ -# ADR 22: RPC Streaming - -## Context - - -## Decision - -## Sample model - -## Error cases - - - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? - - RPC executor doesn't support streaming but receives a streaming request - - timeout per response vs overall? \ No newline at end of file diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md new file mode 100644 index 0000000000..04f8ca54fa --- /dev/null +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -0,0 +1,44 @@ +# ADR 25: RPC Streaming + +## Context + +Users have expressed a desire to allow more than one response per RPC invocation. This would enable scenarios like: + +- Execute long-running commands while still being responsive +- + +## Requirements + - Allow for an arbitrary number of command responses + +## Non-requirements + - Different payloads per command response + - Allow for a separate command executor to "take over" execution of a command mid-stream + - ???? Allow command executor to determine mid stream how many responses are needed? Or should the first response outline exactly how many responses? + +## Decision + +RPC response includes a "is streaming and there is at least one more response" flag. If that flag isn't present in the initial RPC response, then the invoker can assume it is not streaming. + +How to announce from invoker side that it supports streaming? + - Can't do protocol version unless all SDKs are at par. + - Protocol exists regardless of SDK support though, so protocol version 2.0 could just mandate streaming support + - We need a document that tracks what each protocol version supports + + + +## Sample model + +## Questions + +- Do we need the invoker to announce in its request that it supports streaming responses? + - Yes. Otherwise, a legacy invoker may receive the first of a stream of requests and ignore the others. Would rather throw an exception along the lines of "Cannot invoke this command because it requires streaming support" +- Do we maintain separate APIs for invoking a streaming method vs non-streaming? + - return Task vs IAsyncEnumerable seems useful + +## Error cases + + - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? + - RPC executor doesn't support streaming but receives a streaming request + - RPC invoker tries to invoke a command that the executor requires streaming on + - timeout per response vs overall? + \ No newline at end of file From 615ba263c55cd50c2641bece75a562ee0de9211c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 18 Jul 2025 16:20:48 -0700 Subject: [PATCH 09/74] thoughts --- doc/dev/adr/0025-rpc-streaming.md | 135 +++++++++++++++--- .../RPC/CommandExecutor.cs | 17 ++- .../RPC/StreamingExtendedResponse.cs | 14 ++ 3 files changed, 148 insertions(+), 18 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 04f8ca54fa..545aa38aa3 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -8,37 +8,138 @@ Users have expressed a desire to allow more than one response per RPC invocation - ## Requirements - - Allow for an arbitrary number of command responses + - Allow for an arbitrary number of command responses for a single command invocation + - The total number of responses does not need to be known before the first response is sent + - When exposed to the user, each response includes an index of where it was in the stream + - Allow for multiple separate commands to be streamed simultaneously + - Even the same command can be executed in parallel to itself? ## Non-requirements - - Different payloads per command response - - Allow for a separate command executor to "take over" execution of a command mid-stream - - ???? Allow command executor to determine mid stream how many responses are needed? Or should the first response outline exactly how many responses? + - Different payload shapes per command response ## Decision -RPC response includes a "is streaming and there is at least one more response" flag. If that flag isn't present in the initial RPC response, then the invoker can assume it is not streaming. +Our command invoker base class will now include a new method ```InvokeCommandWithStreaming``` to go with the existing ```InvokeCommand``` method. -How to announce from invoker side that it supports streaming? - - Can't do protocol version unless all SDKs are at par. - - Protocol exists regardless of SDK support though, so protocol version 2.0 could just mandate streaming support - - We need a document that tracks what each protocol version supports +This new method will take the same parameters as ```InvokeCommand``` but will return an asynchronously iterable list (or callback depending on language?) of command response objects. - +```csharp +public abstract class CommandInvoker + where TReq : class + where TResp : class +{ + // Single response + public Task> InvokeCommandAsync(TReq request, ...) {...} -## Sample model + // Many responses, responses may be staggered + public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, ...) {...} +} +``` -## Questions +Additionally, this new method will return an extended version of the ```ExtendedResponse``` wrapper that will include the streaming-specific information about each response: -- Do we need the invoker to announce in its request that it supports streaming responses? - - Yes. Otherwise, a legacy invoker may receive the first of a stream of requests and ignore the others. Would rather throw an exception along the lines of "Cannot invoke this command because it requires streaming support" -- Do we maintain separate APIs for invoking a streaming method vs non-streaming? - - return Task vs IAsyncEnumerable seems useful +```csharp +public class StreamingExtendedResponse : ExtendedResponse + where TResp : class +{ + /// + /// An optional Id for this response (relative to the other responses in this response stream) + /// + /// + /// Users are allowed to provide Ids for each response, only for specific responses, or for none of the responses. + /// + public string? StreamingResponseId { get; set; } + + /// + /// The index of this response relative to the other responses in this response stream. Starts at 0. + /// + public int StreamingResponseIndex { get; set; } + + /// + /// If true, this response is the final response in this response stream. + /// + public bool IsLastResponse { get; set; } +} +``` + +On the executor side, we will define a separate callback that executes whenever a streaming command is invoked. Instead of returning the single response, this callback will return the asynchronously iterable list of responses. Importantly, this iterable may still be added to by the user after this callback has finished. + +```csharp +public abstract class CommandExecutor : IAsyncDisposable + where TReq : class + where TResp : class +{ + /// + /// The callback to execute each time a non-streaming command request is received. + /// + /// + /// This callback may be null if this command executor only supports commands that stream responses. + /// + public Func, CancellationToken, Task>>? OnCommandReceived { get; set; } + + /// + /// The callback to execute each time a command request that expects streamed responses is received. + /// + /// + /// The callback provides the request itself and requires the user to return one to many responses. This callback may be null + /// if this command executors doesn't have any streaming commands. + /// + public Func, CancellationToken, Task>>>? OnStreamingCommandReceived { get; set; } +} + +``` + +With this design, commands that use streaming are defined at codegen time. Codegen layer changes will be defined in a separate ADR, though. + +## Example with code gen + +TODO which existing client works well for long-running commands? Mem mon ("Report usage for 10 seconds at 1 second intervals")? + +### MQTT layer implementation + +#### Command invoker side + +- The command invoker's request message will include an MQTT user property with name "isStream" and value "true". + - Otherwise, the request message will look the same as a non-streaming RPC request +- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "isLastResp" flag +- The command invoker will acknowledge all messages it receives that match the correlation data of the command request + +#### Command executor side + +- All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. +- The final command response will include an MQTT user property "isLastResp" with value "true" to signal that it is the final response in the stream. +- A streaming command is allowed to have a single response. If the stream only has one response, it should include both the "isStream" and "isLastResp" flags set. +- All **completed** streamed command responses will be added to the command response cache + - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? + - Separate cache for data structure purposes? + +### Protocol version update + +This feature is not backwards compatible (old invoker can't communicate with new executor that may try to stream a response), so it requires a bump in our RPC protocol version from "1.0" to "2.0". + +TODO: Start defining a doc in our repo that defines what features are present in what protocol version. + +## Alternative designs considered + + - Allow the command executor to decide at run time of each command if it will stream responses + - This would force users to call the ```InvokeCommandWithStreaming``` API on the command invoker side and that returned object isn't as easy to use for single responses + - Treat streaming RPC as a separate protocol from RPC, give it its own client like ```CommandInvoker``` and ```TelemetrySender``` + - There is a lot of code re-use between RPC and streaming RPC so this would make implementation very inconvenient + - This would introduce another protocol to version. Future RPC changes would likely be relevant to RPC streaming anyways, so this feels redundant. ## Error cases - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? - RPC executor doesn't support streaming but receives a streaming request + - RPC executor responds with "NotSupportedVersion" error code - RPC invoker tries to invoke a command that the executor requires streaming on - timeout per response vs overall? - \ No newline at end of file + + ## Open Questions + +- Do we need to include response index user property on each streamed response? + - MQTT message ordering suggests this information can just be inferred by the command invoker +- Command timeout/cancellation tokens in single vs streaming? +- When to ack the streaming request? + - In normal RPC, request is Ack'd only after the method finishes invocation, but this would likely clog up Acks since streaming requests can take a while. + - Ack after first response is generated? \ No newline at end of file diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs index fd687e0924..823fc9e556 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs @@ -49,7 +49,22 @@ public abstract class CommandExecutor : IAsyncDisposable /// public TimeSpan ExecutionTimeout { get; set; } - public required Func, CancellationToken, Task>> OnCommandReceived { get; set; } + /// + /// The callback to execute each time a non-streaming command request is received. + /// + /// + /// This callback may be null if this command executor only supports commands that stream responses. + /// + public Func, CancellationToken, Task>>? OnCommandReceived { get; set; } + + /// + /// The callback to execute each time a command request that expects streamed responses is received. + /// + /// + /// The callback provides the request itself and requires the user to return one to many responses. This callback may be null + /// if this command executors doesn't have any streaming commands. + /// + public Func, CancellationToken, Task>>>? OnStreamingCommandReceived { get; set; } public string? ExecutorId { get; init; } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs index a84fca5a1e..84b36bf385 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs @@ -10,8 +10,22 @@ namespace Azure.Iot.Operations.Protocol.RPC public class StreamingExtendedResponse : ExtendedResponse where TResp : class { + /// + /// An optional Id for this response (relative to the other responses in this response stream) + /// + /// + /// Users are allowed to provide Ids for each response, only for specific responses, or for none of the responses. + /// public string? StreamingResponseId { get; set; } + /// + /// The index of this response relative to the other responses in this response stream. Starts at 0. + /// + public int StreamingResponseIndex { get; set; } + + /// + /// If true, this response is the final response in this response stream. + /// public bool IsLastResponse { get; set; } } } From ad17af3410f0ff03682a1173f684e9f66a7430c9 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 18 Jul 2025 16:33:48 -0700 Subject: [PATCH 10/74] more --- doc/dev/adr/0025-rpc-streaming.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 545aa38aa3..46e599ebfb 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -99,18 +99,23 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor #### Command invoker side -- The command invoker's request message will include an MQTT user property with name "isStream" and value "true". +- The command invoker's request message will include an MQTT user property with name "__isStream" and value "true". - Otherwise, the request message will look the same as a non-streaming RPC request -- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "isLastResp" flag +- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag - The command invoker will acknowledge all messages it receives that match the correlation data of the command request #### Command executor side - All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. -- The final command response will include an MQTT user property "isLastResp" with value "true" to signal that it is the final response in the stream. -- A streaming command is allowed to have a single response. If the stream only has one response, it should include both the "isStream" and "isLastResp" flags set. +- Each streamed response will contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. +- The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. +- A streaming command is allowed to have a single response. If the stream only has one response, it should include both the "__isStream" and "__isLastResp" flags set. - All **completed** streamed command responses will be added to the command response cache - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? + - Cache exists for de-duplication, and we want that even for long-running RPC, right? + - Re-sending previous responses would potentially get picked up by the original invoker twice + - Enforced unique stream response Ids would help de-dup on the invoker side + - Needless traffic here though - Separate cache for data structure purposes? ### Protocol version update From a2c2f6ec0a8ad3c2a68de946cc88fadcc6f5023a Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 22 Jul 2025 14:34:49 -0700 Subject: [PATCH 11/74] notes --- doc/dev/adr/0025-rpc-streaming.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 46e599ebfb..6f75fa06c2 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -17,6 +17,10 @@ Users have expressed a desire to allow more than one response per RPC invocation ## Non-requirements - Different payload shapes per command response +## State of the art + +What does gRPC do? + ## Decision Our command invoker base class will now include a new method ```InvokeCommandWithStreaming``` to go with the existing ```InvokeCommand``` method. From 6bf1bccb8d8857a09d0aa8a2779e570423b31f4c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 22 Jul 2025 15:09:38 -0700 Subject: [PATCH 12/74] caching --- doc/dev/adr/0025-rpc-streaming.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 6f75fa06c2..431c397d28 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -117,9 +117,6 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor - All **completed** streamed command responses will be added to the command response cache - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? - Cache exists for de-duplication, and we want that even for long-running RPC, right? - - Re-sending previous responses would potentially get picked up by the original invoker twice - - Enforced unique stream response Ids would help de-dup on the invoker side - - Needless traffic here though - Separate cache for data structure purposes? ### Protocol version update From 180d85efa1b7944f047022a0b691500561346507 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Thu, 24 Jul 2025 12:01:08 -0700 Subject: [PATCH 13/74] more thoughts --- doc/dev/adr/0025-rpc-streaming.md | 44 ++++++++++++++++++++----------- doc/reference/error-model.md | 6 +++++ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 431c397d28..de0af64978 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -103,21 +103,27 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor #### Command invoker side -- The command invoker's request message will include an MQTT user property with name "__isStream" and value "true". - - Otherwise, the request message will look the same as a non-streaming RPC request -- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag +- The command invoker's request message will include an MQTT user property with name "__streamResp" and value "true". + - Executor needs to know if it can stream the response, and this is the flag that tells it that +- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag set to "true" - The command invoker will acknowledge all messages it receives that match the correlation data of the command request #### Command executor side -- All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. -- Each streamed response will contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. -- The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. -- A streaming command is allowed to have a single response. If the stream only has one response, it should include both the "__isStream" and "__isLastResp" flags set. -- All **completed** streamed command responses will be added to the command response cache - - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? - - Cache exists for de-duplication, and we want that even for long-running RPC, right? - - Separate cache for data structure purposes? +- The command executor receives a command with "__streamResp" flag set to "true" + - The command is given to the application layer in a way that allows the application to return at least one response + - All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. + - Each streamed response must contain an MQTT user property with name "__streamIndex" and value equal to the index of this response relative to the other responses (0 for the first response, 1 for the second response, etc.) + - Each streamed response may contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. This is an optional and user-provided value. + - The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. + - A streaming command is allowed to have a single response. It must include the "__isLastResp" flag in that first/final response + - All **completed** streamed command responses will be added to the command response cache + - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? + - Cache exists for de-duplication, and we want that even for long-running RPC, right? + - Separate cache for data structure purposes? + +- The command executor receives a command **without** "__streamResp" flag set to "true" + - The command must be responded to without streaming ### Protocol version update @@ -127,7 +133,7 @@ TODO: Start defining a doc in our repo that defines what features are present in ## Alternative designs considered - - Allow the command executor to decide at run time of each command if it will stream responses + - Allow the command executor to decide at run time of each command if it will stream responses independent of the command invoker's request - This would force users to call the ```InvokeCommandWithStreaming``` API on the command invoker side and that returned object isn't as easy to use for single responses - Treat streaming RPC as a separate protocol from RPC, give it its own client like ```CommandInvoker``` and ```TelemetrySender``` - There is a lot of code re-use between RPC and streaming RPC so this would make implementation very inconvenient @@ -135,10 +141,16 @@ TODO: Start defining a doc in our repo that defines what features are present in ## Error cases - - RPC executor dies after sending X out of Y responses. Just time out waiting on X+1'th reply? - - RPC executor doesn't support streaming but receives a streaming request - - RPC executor responds with "NotSupportedVersion" error code - - RPC invoker tries to invoke a command that the executor requires streaming on + - RPC executor dies before sending the final stream response. + - Command invoker throws time out exception waiting on the next response + - RPC executor receives command request with "__streamResp", but that executor doesn't understand streaming requests because it uses an older protocol version + - Command executor responds with "not supported protocol" error code + - RPC executor receives command request with "__streamResp", and the executor understands that it is a streaming request (protocol versions align) but that particular command doesn't support streaming + - RPC executor treats it like a non-streaming command, but adds the "__isLastResp" flag to the one and only response + - RPC invoker tries to invoke a non-streaming command that the executor requires streaming on + - Atypical case since codegen will prevent this + - But, for the sake of non-codegen users, a new error code "StreamingRequired" would be returned by the executor + - Or should this just be "invalid header" error since the executor expects the "__streamResp" header? - timeout per response vs overall? ## Open Questions diff --git a/doc/reference/error-model.md b/doc/reference/error-model.md index 4172cfce55..0c3b0d8b70 100644 --- a/doc/reference/error-model.md +++ b/doc/reference/error-model.md @@ -151,6 +151,7 @@ public enum AkriMqttErrorKind ExecutionException, MqttError, UnsupportedVersion, + StreamingRequired } ``` @@ -263,6 +264,7 @@ public enum AkriMqttErrorKind { EXECUTION_EXCEPTION, MQTT_ERROR, UNSUPPORTED_VERSION, + STREAMING_REQUIRED, } ``` @@ -327,6 +329,7 @@ pub enum AIOProtocolErrorKind { ExecutionException, MqttError, UnsupportedVersion, + StreamingRequired, } ``` @@ -400,6 +403,7 @@ const { ExecutionError MqttError UnsupportedVersion + StreamingRequired } ``` @@ -455,6 +459,7 @@ class AkriMqttErrorKind(Enum): EXECUTION_EXCEPTION = 10 MQTT_ERROR = 11 UNSUPPORTED_VERSION = 12 + STREAMING_REQUIRED = 13 ``` The Akri.Mqtt error type is defined as follows: @@ -570,6 +575,7 @@ The following table lists the HTTP status codes, conditions on other fields in t | 400 | Bad Request | false | no | | invalid payload | | 408 | Request Timeout | false | yes | yes | timeout | | 415 | Unsupported Media Type | false | yes | yes | invalid header | +| 452 | Streaming Required | false | no | no | streaming required | | 500 | Internal Server Error | false | no | | unknown error | | 500 | Internal Server Error | false | yes | | internal logic error | | 500 | Internal Server Error | true | maybe | | execution error | From a1d2e2121e55e28a1248df2c4a9f36a3964e996e Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:27:58 -0700 Subject: [PATCH 14/74] no new error code, some gRPC notes --- doc/dev/adr/0025-rpc-streaming.md | 24 +++++++++++++++--------- doc/reference/error-model.md | 6 ------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index de0af64978..f4348ec1e2 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -16,10 +16,19 @@ Users have expressed a desire to allow more than one response per RPC invocation ## Non-requirements - Different payload shapes per command response + - "Client Streaming" RPC (multiples requests -> One command response) + - Bi-directional streaming RPC (multiples requests -> multiple responses) + - Allow for invoker to cancel streamed responses mid-stream ## State of the art -What does gRPC do? +gRPC supports these patterns for RPC: +- Unary RPC (1 request message, 1 response message) +- Server streaming RPC (1 request message, many response messages) +- Client streaming RPC (many request messages, one response message) +- Bi-directional streaming RPC (many request messages, many response messages) + +gRPC relies on the HTTP streaming protocol to delineate each message in the stream and to indicate the end of the stream. ## Decision @@ -117,17 +126,14 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor - Each streamed response may contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. This is an optional and user-provided value. - The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. - A streaming command is allowed to have a single response. It must include the "__isLastResp" flag in that first/final response - - All **completed** streamed command responses will be added to the command response cache - - If we cache incompleted commands, will the cache hit just wait on cache additions to get the remaining responses? - - Cache exists for de-duplication, and we want that even for long-running RPC, right? - - Separate cache for data structure purposes? + - Cache is only updated once the stream has completed and it is updated to include all of the responses (in order) for the command so they can be re-played if the streaming command is invoked again by the same client - The command executor receives a command **without** "__streamResp" flag set to "true" - The command must be responded to without streaming ### Protocol version update -This feature is not backwards compatible (old invoker can't communicate with new executor that may try to stream a response), so it requires a bump in our RPC protocol version from "1.0" to "2.0". +This feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". TODO: Start defining a doc in our repo that defines what features are present in what protocol version. @@ -149,9 +155,9 @@ TODO: Start defining a doc in our repo that defines what features are present in - RPC executor treats it like a non-streaming command, but adds the "__isLastResp" flag to the one and only response - RPC invoker tries to invoke a non-streaming command that the executor requires streaming on - Atypical case since codegen will prevent this - - But, for the sake of non-codegen users, a new error code "StreamingRequired" would be returned by the executor - - Or should this just be "invalid header" error since the executor expects the "__streamResp" header? - - timeout per response vs overall? + - But, for the sake of non-codegen users, executor returns "invalid header" error pointing to the "__streamResp" header + - Invoker understands that, if the "invalid header" value is "__streamResp", it attempted a invoke a streaming method + - timeout per response vs overall? Both? ## Open Questions diff --git a/doc/reference/error-model.md b/doc/reference/error-model.md index 0c3b0d8b70..4172cfce55 100644 --- a/doc/reference/error-model.md +++ b/doc/reference/error-model.md @@ -151,7 +151,6 @@ public enum AkriMqttErrorKind ExecutionException, MqttError, UnsupportedVersion, - StreamingRequired } ``` @@ -264,7 +263,6 @@ public enum AkriMqttErrorKind { EXECUTION_EXCEPTION, MQTT_ERROR, UNSUPPORTED_VERSION, - STREAMING_REQUIRED, } ``` @@ -329,7 +327,6 @@ pub enum AIOProtocolErrorKind { ExecutionException, MqttError, UnsupportedVersion, - StreamingRequired, } ``` @@ -403,7 +400,6 @@ const { ExecutionError MqttError UnsupportedVersion - StreamingRequired } ``` @@ -459,7 +455,6 @@ class AkriMqttErrorKind(Enum): EXECUTION_EXCEPTION = 10 MQTT_ERROR = 11 UNSUPPORTED_VERSION = 12 - STREAMING_REQUIRED = 13 ``` The Akri.Mqtt error type is defined as follows: @@ -575,7 +570,6 @@ The following table lists the HTTP status codes, conditions on other fields in t | 400 | Bad Request | false | no | | invalid payload | | 408 | Request Timeout | false | yes | yes | timeout | | 415 | Unsupported Media Type | false | yes | yes | invalid header | -| 452 | Streaming Required | false | no | no | streaming required | | 500 | Internal Server Error | false | no | | unknown error | | 500 | Internal Server Error | false | yes | | internal logic error | | 500 | Internal Server Error | true | maybe | | execution error | From e6802639637cda1889e4d95b521801082aeeba35 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:32:10 -0700 Subject: [PATCH 15/74] save impl for later --- .../AkriSystemProperties.cs | 4 +- .../RPC/BlockingConcurrentQueue.cs | 97 ----- .../RPC/CommandInvoker.cs | 393 ++++++++++-------- 3 files changed, 226 insertions(+), 268 deletions(-) delete mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index f1ae126f8e..386e0406f1 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -80,9 +80,9 @@ public static class AkriSystemProperties internal const string CommandInvokerId = ReservedPrefix + "invId"; /// - /// Inidicates that an RPC request expects the executor to + /// Inidicates that an RPC request expects the executor to stream one or many responses. /// - internal const string IsStreamingCommand = ReservedPrefix + "stream"; + internal const string IsStreamingCommand = ReservedPrefix + "streamResp"; internal static bool IsReservedUserProperty(string name) { diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs deleted file mode 100644 index 3f929ffaf7..0000000000 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/BlockingConcurrentQueue.cs +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -using Azure.Iot.Operations.Protocol; -using System; -using System.Collections.Concurrent; -using System.Threading; -using System.Threading.Tasks; - -namespace Azure.Iot.Operations.Protocol.RPC -{ - /// - /// A blocking queue that is thread safe. - /// - /// The type of all the elements in the blocking queue. - /// Note that this is a copy of the "BlockingConcurrentDelayableQueue" defined in the MQTT package, but without the "delayable" feature - internal class BlockingConcurrentQueue : IDisposable - { - private readonly ConcurrentQueue _queue; - private readonly SemaphoreSlim _gate; - - public BlockingConcurrentQueue() - { - _queue = new ConcurrentQueue(); - _gate = new(0, 1); - } - - /// - /// Delete all entries from this queue. - /// - public void Clear() - { - _queue.Clear(); - } - - public int Count => _queue.Count; - - /// - /// Enqueue the provided item. - /// - /// The item to enqueue. - public void Enqueue(T item) - { - _queue.Enqueue(item); - _gate.Release(); - } - - /// - /// Block until there is a first element in the queue and that element is ready to be dequeued then dequeue and - /// return that element. - /// - /// Cancellation token. - /// The first element in the queue. - public async Task DequeueAsync(CancellationToken cancellationToken = default) - { - while (true) - { - if (_queue.IsEmpty) - { - await _gate.WaitAsync(cancellationToken); - continue; - } - else - { - if (_queue.TryPeek(out T? item) - && _queue.TryDequeue(out T? dequeuedItem)) - { - return dequeuedItem; - } - else - { - await _gate.WaitAsync(cancellationToken); - cancellationToken.ThrowIfCancellationRequested(); - continue; - } - } - } - } - - /// - /// Wakeup any blocking calls not because a new element was added to the queue, but because - /// one or more elements in the queue is now ready. - /// - /// - /// Generally, this method should be called every time an item in this queue is marked as ready. - /// - public void Signal() - { - _gate.Release(); - } - - public void Dispose() - { - _gate.Dispose(); - } - } -} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index 4ce94b5ff1..f89d6871cb 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -1,15 +1,17 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using Azure.Iot.Operations.Protocol.Events; +using Azure.Iot.Operations.Protocol.Models; using System; using System.Collections.Generic; using System.Diagnostics; +using System.Globalization; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; using System.Xml; -using Azure.Iot.Operations.Protocol.Events; -using Azure.Iot.Operations.Protocol.Models; namespace Azure.Iot.Operations.Protocol.RPC { @@ -216,7 +218,7 @@ internal async Task SubscribeAsNeededAsync(string responseTopicFilter, Cancellat Trace.TraceInformation($"Subscribed to topic filter '{responseTopicFilter}' for command invoker '{_commandName}'"); } - private Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEventArgs args) + private async Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEventArgs args) { if (args.ApplicationMessage.CorrelationData != null && GuidExtensions.TryParseBytes(args.ApplicationMessage.CorrelationData, out Guid? requestGuid)) { @@ -226,18 +228,154 @@ private Task MessageReceivedCallbackAsync(MqttApplicationMessageReceivedEventArg { if (!_requestIdMap.TryGetValue(requestGuidString, out responsePromise)) { - return Task.CompletedTask; + return; } } args.AutoAcknowledge = true; if (MqttTopicProcessor.DoesTopicMatchFilter(args.ApplicationMessage.Topic, responsePromise.ResponseTopic)) { - responsePromise.Responses.Enqueue(args.ApplicationMessage); + // Assume a protocol version of 1.0 if no protocol version was specified + string? responseProtocolVersion = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.ProtocolVersion)?.Value; + if (!ProtocolVersion.TryParseProtocolVersion(responseProtocolVersion, out ProtocolVersion? protocolVersion)) + { + AkriMqttException akriException = new($"Received a response with an unparsable protocol version number: {responseProtocolVersion}") + { + Kind = AkriMqttErrorKind.UnsupportedVersion, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, + ProtocolVersion = responseProtocolVersion, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + if (!_supportedMajorProtocolVersions.Contains(protocolVersion!.MajorVersion)) + { + AkriMqttException akriException = new($"Received a response with an unsupported protocol version number: {responseProtocolVersion}") + { + Kind = AkriMqttErrorKind.UnsupportedVersion, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, + ProtocolVersion = responseProtocolVersion, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + MqttUserProperty? statusProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.Status); + + if (!TryValidateResponseHeaders(statusProperty, requestGuidString, out AkriMqttErrorKind errorKind, out string message, out string? headerName, out string? headerValue)) + { + AkriMqttException akriException = new(message) + { + Kind = errorKind, + IsShallow = false, + IsRemote = false, + HeaderName = headerName, + HeaderValue = headerValue, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + int statusCode = int.Parse(statusProperty!.Value, CultureInfo.InvariantCulture); + + if (statusCode is not ((int)CommandStatusCode.OK) and not ((int)CommandStatusCode.NoContent)) + { + MqttUserProperty? invalidNameProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyName); + MqttUserProperty? invalidValueProperty = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyValue); + bool isApplicationError = (args.ApplicationMessage.UserProperties?.TryGetProperty(AkriSystemProperties.IsApplicationError, out string? isAppError) ?? false) && isAppError?.ToLower(CultureInfo.InvariantCulture) != "false"; + string? statusMessage = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.StatusMessage)?.Value; + + errorKind = StatusCodeToErrorKind((CommandStatusCode)statusCode, isApplicationError, invalidNameProperty != null, invalidValueProperty != null); + AkriMqttException akriException = new(statusMessage ?? "Error condition identified by remote service") + { + Kind = errorKind, + IsShallow = false, + IsRemote = true, + HeaderName = UseHeaderFields(errorKind) ? invalidNameProperty?.Value : null, + HeaderValue = UseHeaderFields(errorKind) ? invalidValueProperty?.Value : null, + PropertyName = UsePropertyFields(errorKind) ? invalidNameProperty?.Value : null, + PropertyValue = UsePropertyFields(errorKind) ? invalidValueProperty?.Value : null, + TimeoutName = UseTimeoutFields(errorKind) ? invalidNameProperty?.Value : null, + TimeoutValue = UseTimeoutFields(errorKind) ? GetAsTimeSpan(invalidValueProperty?.Value) : null, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + + if (errorKind == AkriMqttErrorKind.UnsupportedVersion) + { + MqttUserProperty? supportedMajorVersions = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.SupportedMajorProtocolVersions); + MqttUserProperty? requestProtocolVersion = args.ApplicationMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.RequestedProtocolVersion); + + if (requestProtocolVersion != null) + { + akriException.ProtocolVersion = requestProtocolVersion.Value; + } + else + { + Trace.TraceWarning("Command executor failed to provide the request's protocol version"); + } + + if (supportedMajorVersions != null + && ProtocolVersion.TryParseFromString(supportedMajorVersions!.Value, out int[]? versions)) + { + akriException.SupportedMajorProtocolVersions = versions; + } + else + { + Trace.TraceWarning("Command executor failed to provide the supported major protocol versions"); + } + } + + SetExceptionSafe(responsePromise.CompletionSource, akriException); + return; + } + + TResp response; + CommandResponseMetadata responseMetadata; + try + { + response = _serializer.FromBytes(args.ApplicationMessage.Payload, args.ApplicationMessage.ContentType, args.ApplicationMessage.PayloadFormatIndicator); + responseMetadata = new CommandResponseMetadata(args.ApplicationMessage); + } + catch (Exception ex) + { + SetExceptionSafe(responsePromise.CompletionSource, ex); + return; + } + + if (responseMetadata.Timestamp != null) + { + await _applicationContext.ApplicationHlc.UpdateWithOtherAsync(responseMetadata.Timestamp); + } + else + { + Trace.TraceInformation($"No timestamp present in command response metadata."); + } + + ExtendedResponse extendedResponse = new() { Response = response, ResponseMetadata = responseMetadata }; + + if (!responsePromise.CompletionSource.TrySetResult(extendedResponse)) + { + Trace.TraceWarning("Failed to complete the command response promise. This may be because the operation was cancelled or finished with exception."); + } } } - return Task.CompletedTask; + return; } private static bool TryValidateResponseHeaders( @@ -335,20 +473,6 @@ private static bool UsePropertyFields(AkriMqttErrorKind errorKind) /// Cancellation token. /// The command response including the command response metadata public async Task> InvokeCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) - { - IAsyncEnumerable> response = InvokeCommandAsync(false, request, metadata, additionalTopicTokenMap, commandTimeout, cancellationToken); - var enumerator = response.GetAsyncEnumerator(cancellationToken); - await enumerator.MoveNextAsync(); - return enumerator.Current; - } - - public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) - { - // user shouldn't have to do the stitching. We do it. Ordering concerns, though? - return InvokeCommandAsync(true, request, metadata, additionalTopicTokenMap, commandTimeout, cancellationToken); - } - - private async IAsyncEnumerable> InvokeCommandAsync(bool isStreaming, TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); ObjectDisposedException.ThrowIf(_isDisposed, this); @@ -433,11 +557,6 @@ private async IAsyncEnumerable> InvokeCommandAs requestMessage.AddUserProperty("$partition", clientId); requestMessage.AddUserProperty(AkriSystemProperties.SourceId, clientId); - if (isStreaming) - { - requestMessage.AddUserProperty(AkriSystemProperties.IsStreamingCommand, "true"); - } - // TODO remove this once akri service is code gen'd to expect srcId instead of invId requestMessage.AddUserProperty(AkriSystemProperties.CommandInvokerId, clientId); @@ -496,158 +615,65 @@ private async IAsyncEnumerable> InvokeCommandAs }; } - //TODO operationCancelled and timeout exceptions were deleted to accomodate IAsyncEnumerable. Catch them elsewhere? - // https://github.com/dotnet/roslyn/issues/39583#issuecomment-728097630 workaround? - StreamingExtendedResponse extendedResponse; - - // "do while" since every command should have at least one intended response, but streaming commands may have more - do + ExtendedResponse extendedResponse; + try { - MqttApplicationMessage mqttMessage = await WallClock.WaitAsync(responsePromise.Responses.DequeueAsync(cancellationToken), reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); - - //TODO mqtt message to command response - - // Assume a protocol version of 1.0 if no protocol version was specified - string? responseProtocolVersion = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.ProtocolVersion)?.Value; - if (!ProtocolVersion.TryParseProtocolVersion(responseProtocolVersion, out ProtocolVersion? protocolVersion)) + extendedResponse = await WallClock.WaitAsync(responsePromise.CompletionSource.Task, reifiedCommandTimeout, cancellationToken).ConfigureAwait(false); + if (responsePromise.CompletionSource.Task.IsFaulted) { - AkriMqttException akriException = new($"Received a response with an unparsable protocol version number: {responseProtocolVersion}") - { - Kind = AkriMqttErrorKind.UnsupportedVersion, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, - ProtocolVersion = responseProtocolVersion, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - if (!_supportedMajorProtocolVersions.Contains(protocolVersion!.MajorVersion)) - { - AkriMqttException akriException = new($"Received a response with an unsupported protocol version number: {responseProtocolVersion}") - { - Kind = AkriMqttErrorKind.UnsupportedVersion, - IsShallow = false, - IsRemote = false, - CommandName = _commandName, - CorrelationId = requestGuid, - SupportedMajorProtocolVersions = _supportedMajorProtocolVersions, - ProtocolVersion = responseProtocolVersion, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - MqttUserProperty? statusProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.Status); - - if (!TryValidateResponseHeaders(statusProperty, requestGuidString, out AkriMqttErrorKind errorKind, out string message, out string? headerName, out string? headerValue)) - { - AkriMqttException akriException = new(message) - { - Kind = errorKind, - IsShallow = false, - IsRemote = false, - HeaderName = headerName, - HeaderValue = headerValue, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - int statusCode = int.Parse(statusProperty!.Value, CultureInfo.InvariantCulture); - - if (statusCode is not ((int)CommandStatusCode.OK) and not ((int)CommandStatusCode.NoContent)) - { - MqttUserProperty? invalidNameProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyName); - MqttUserProperty? invalidValueProperty = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.InvalidPropertyValue); - bool isApplicationError = (mqttMessage.UserProperties?.TryGetProperty(AkriSystemProperties.IsApplicationError, out string? isAppError) ?? false) && isAppError?.ToLower(CultureInfo.InvariantCulture) != "false"; - string? statusMessage = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.StatusMessage)?.Value; - - errorKind = StatusCodeToErrorKind((CommandStatusCode)statusCode, isApplicationError, invalidNameProperty != null, invalidValueProperty != null); - AkriMqttException akriException = new(statusMessage ?? "Error condition identified by remote service") - { - Kind = errorKind, - IsShallow = false, - IsRemote = true, - HeaderName = UseHeaderFields(errorKind) ? invalidNameProperty?.Value : null, - HeaderValue = UseHeaderFields(errorKind) ? invalidValueProperty?.Value : null, - PropertyName = UsePropertyFields(errorKind) ? invalidNameProperty?.Value : null, - PropertyValue = UsePropertyFields(errorKind) ? invalidValueProperty?.Value : null, - TimeoutName = UseTimeoutFields(errorKind) ? invalidNameProperty?.Value : null, - TimeoutValue = UseTimeoutFields(errorKind) ? GetAsTimeSpan(invalidValueProperty?.Value) : null, - CommandName = _commandName, - CorrelationId = requestGuid, - }; - - if (errorKind == AkriMqttErrorKind.UnsupportedVersion) - { - MqttUserProperty? supportedMajorVersions = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.SupportedMajorProtocolVersions); - MqttUserProperty? requestProtocolVersion = mqttMessage.UserProperties?.FirstOrDefault(p => p.Name == AkriSystemProperties.RequestedProtocolVersion); - - if (requestProtocolVersion != null) - { - akriException.ProtocolVersion = requestProtocolVersion.Value; - } - else - { - Trace.TraceWarning("Command executor failed to provide the request's protocol version"); - } - - if (supportedMajorVersions != null - && ProtocolVersion.TryParseFromString(supportedMajorVersions!.Value, out int[]? versions)) + throw responsePromise.CompletionSource.Task.Exception?.InnerException + ?? new AkriMqttException($"Command '{_commandName}' failed with unknown exception") { - akriException.SupportedMajorProtocolVersions = versions; - } - else - { - Trace.TraceWarning("Command executor failed to provide the supported major protocol versions"); - } - } - - SetExceptionSafe(responsePromise.CompletionSource, akriException); - return; - } - - TResp response; - CommandResponseMetadata responseMetadata; - try - { - response = _serializer.FromBytes(mqttMessage.Payload, mqttMessage.ContentType, .PayloadFormatIndicator); - responseMetadata = new CommandResponseMetadata(mqttMessage); - } - catch (Exception ex) - { - SetExceptionSafe(responsePromise.CompletionSource, ex); - return; + Kind = AkriMqttErrorKind.UnknownError, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + }; } + } + catch (TimeoutException e) + { + SetCanceledSafe(responsePromise.CompletionSource); - if (responseMetadata.Timestamp != null) + throw new AkriMqttException($"Command '{_commandName}' timed out while waiting for a response", e) { - await _applicationContext.ApplicationHlc.UpdateWithOtherAsync(responseMetadata.Timestamp, cancellationToken: cancellationToken); - } - else - { - Trace.TraceInformation($"No timestamp present in command response metadata."); - } - - extendedResponse = new() { Response = response, ResponseMetadata = responseMetadata }; + Kind = AkriMqttErrorKind.Timeout, + IsShallow = false, + IsRemote = false, + TimeoutName = nameof(commandTimeout), + TimeoutValue = reifiedCommandTimeout, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + } + catch (OperationCanceledException e) + { + SetCanceledSafe(responsePromise.CompletionSource); - if (!responsePromise.CompletionSource.TrySetResult(extendedResponse)) + throw new AkriMqttException($"Command '{_commandName}' was cancelled while waiting for a response", e) { - Trace.TraceWarning("Failed to complete the command response promise. This may be because the operation was cancelled or finished with exception."); - } - - yield return extendedResponse; - } while (extendedResponse.StreamingResponseId != null && !extendedResponse.IsLastResponse); + Kind = AkriMqttErrorKind.Cancellation, + IsShallow = false, + IsRemote = false, + CommandName = _commandName, + CorrelationId = requestGuid, + }; + } + return extendedResponse; + } + catch (ArgumentException ex) + { + throw new AkriMqttException(ex.Message) + { + Kind = AkriMqttErrorKind.ConfigurationInvalid, + IsShallow = true, + IsRemote = false, + PropertyName = ex.ParamName, + CommandName = _commandName, + CorrelationId = requestGuid, + }; } finally { @@ -660,6 +686,23 @@ private async IAsyncEnumerable> InvokeCommandAs } } + /// + /// Invoke a command and receive a stream of responses. + /// + /// The payload of command request. + /// The metadata of the command request. + /// + /// The topic token replacement map to use in addition to . If this map + /// contains any keys that also has, then values specified in this map will take precedence. + /// + /// How long to wait for a command response. Note that each command executor also has a configurable timeout value that may be shorter than this value. + /// Cancellation token. + /// The asynchronously stream of responses and their respective metadata. + public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + { + throw new NotImplementedException(); + } + /// /// Dispose this object and the underlying mqtt client. /// @@ -694,6 +737,18 @@ protected virtual async ValueTask DisposeAsyncCore(bool disposing) _mqttClient.ApplicationMessageReceivedAsync -= MessageReceivedCallbackAsync; + lock (_requestIdMapLock) + { + foreach (KeyValuePair responsePromise in _requestIdMap) + { + if (responsePromise.Value != null && responsePromise.Value.CompletionSource != null) + { + SetCanceledSafe(responsePromise.Value.CompletionSource); + } + } + _requestIdMap.Clear(); + } + try { if (_subscribedTopics.Count > 0) @@ -753,7 +808,7 @@ private sealed class ResponsePromise(string responseTopic) { public string ResponseTopic { get; } = responseTopic; - public BlockingConcurrentQueue Responses { get; } = new(); + public TaskCompletionSource> CompletionSource { get; } = new TaskCompletionSource>(); } } } From 629dc2f984e663b62faf2875aa52d585b7fc2801 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:41:33 -0700 Subject: [PATCH 16/74] ordering q --- doc/dev/adr/0025-rpc-streaming.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index f4348ec1e2..85326b4f52 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -161,8 +161,6 @@ TODO: Start defining a doc in our repo that defines what features are present in ## Open Questions -- Do we need to include response index user property on each streamed response? - - MQTT message ordering suggests this information can just be inferred by the command invoker - Command timeout/cancellation tokens in single vs streaming? - When to ack the streaming request? - In normal RPC, request is Ack'd only after the method finishes invocation, but this would likely clog up Acks since streaming requests can take a while. From f2a2c60cdad36872495165d5ef2cba9ca24f7ec6 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:43:23 -0700 Subject: [PATCH 17/74] wording --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 85326b4f52..d32b302dcf 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -163,5 +163,5 @@ TODO: Start defining a doc in our repo that defines what features are present in - Command timeout/cancellation tokens in single vs streaming? - When to ack the streaming request? - - In normal RPC, request is Ack'd only after the method finishes invocation, but this would likely clog up Acks since streaming requests can take a while. + - In normal RPC, request is Ack'd only after the method finishes invocation. Waiting until a streamed RPC finishes could clog up Acks since streaming requests can take a while. - Ack after first response is generated? \ No newline at end of file From 710a425d03fedfd4c1a60c5d15114acc97a8fdf3 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:49:59 -0700 Subject: [PATCH 18/74] links --- doc/dev/adr/0025-rpc-streaming.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index d32b302dcf..1b26c89b92 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -23,10 +23,10 @@ Users have expressed a desire to allow more than one response per RPC invocation ## State of the art gRPC supports these patterns for RPC: -- Unary RPC (1 request message, 1 response message) -- Server streaming RPC (1 request message, many response messages) -- Client streaming RPC (many request messages, one response message) -- Bi-directional streaming RPC (many request messages, many response messages) +- (Unary RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#unary-rpc] (1 request message, 1 response message) +- (Server streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc] (1 request message, many response messages) +- (Client streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc] (many request messages, one response message) +- (Bi-directional streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc] (many request messages, many response messages) gRPC relies on the HTTP streaming protocol to delineate each message in the stream and to indicate the end of the stream. @@ -161,7 +161,6 @@ TODO: Start defining a doc in our repo that defines what features are present in ## Open Questions -- Command timeout/cancellation tokens in single vs streaming? - When to ack the streaming request? - In normal RPC, request is Ack'd only after the method finishes invocation. Waiting until a streamed RPC finishes could clog up Acks since streaming requests can take a while. - Ack after first response is generated? \ No newline at end of file From 0006c024690a60ef88258b8944c654293ec1af03 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 15:51:31 -0700 Subject: [PATCH 19/74] backwards --- doc/dev/adr/0025-rpc-streaming.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 1b26c89b92..ee4f5a7af8 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -23,10 +23,10 @@ Users have expressed a desire to allow more than one response per RPC invocation ## State of the art gRPC supports these patterns for RPC: -- (Unary RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#unary-rpc] (1 request message, 1 response message) -- (Server streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc] (1 request message, many response messages) -- (Client streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc] (many request messages, one response message) -- (Bi-directional streaming RPC)[https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc] (many request messages, many response messages) +- [Unary RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#unary-rpc) (1 request message, 1 response message) +- [Server streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc) (1 request message, many response messages) +- [Client streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc) (many request messages, one response message) +- [Bi-directional streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc) (many request messages, many response messages) gRPC relies on the HTTP streaming protocol to delineate each message in the stream and to indicate the end of the stream. From b457888ac21cbfb0d07df059353cb61f2d274fcc Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 16:30:30 -0700 Subject: [PATCH 20/74] first thoughts on cancellation, re-order doc a bit --- doc/dev/adr/0025-rpc-streaming.md | 25 ++++++++++++++----- doc/reference/error-model.md | 6 +++++ .../AkriSystemProperties.cs | 5 ++++ .../RPC/CommandStatusCode.cs | 3 +++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index ee4f5a7af8..f70e6dd83f 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -13,12 +13,13 @@ Users have expressed a desire to allow more than one response per RPC invocation - When exposed to the user, each response includes an index of where it was in the stream - Allow for multiple separate commands to be streamed simultaneously - Even the same command can be executed in parallel to itself? + - Allow for invoker to cancel streamed responses mid-stream ## Non-requirements - Different payload shapes per command response - "Client Streaming" RPC (multiples requests -> One command response) - Bi-directional streaming RPC (multiples requests -> multiple responses) - - Allow for invoker to cancel streamed responses mid-stream + - Allow for executor to cancel streamed responses mid-stream ## State of the art @@ -30,8 +31,12 @@ gRPC supports these patterns for RPC: gRPC relies on the HTTP streaming protocol to delineate each message in the stream and to indicate the end of the stream. +[gRPC also allows for either the client or server to cancel an RPC at any time](https://grpc.io/docs/what-is-grpc/core-concepts/#cancelling-an-rpc) + ## Decision +### API design, .NET + Our command invoker base class will now include a new method ```InvokeCommandWithStreaming``` to go with the existing ```InvokeCommand``` method. This new method will take the same parameters as ```InvokeCommand``` but will return an asynchronously iterable list (or callback depending on language?) of command response objects. @@ -104,11 +109,7 @@ public abstract class CommandExecutor : IAsyncDisposable With this design, commands that use streaming are defined at codegen time. Codegen layer changes will be defined in a separate ADR, though. -## Example with code gen - -TODO which existing client works well for long-running commands? Mem mon ("Report usage for 10 seconds at 1 second intervals")? - -### MQTT layer implementation +### MQTT layer protocol #### Command invoker side @@ -116,6 +117,12 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor - Executor needs to know if it can stream the response, and this is the flag that tells it that - The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag set to "true" - The command invoker will acknowledge all messages it receives that match the correlation data of the command request +- The command invoker may cancel a normal or streaming RPC call at an arbitrary time by sending an MQTT message with: + - The same MQTT topic as the invoked method + - The same correlation data as the invoked method + - The user property "__cancelRpc" set to "true". + - No payload + - TODO what would API look like? gRPC uses cancellation token #### Command executor side @@ -131,12 +138,18 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor - The command executor receives a command **without** "__streamResp" flag set to "true" - The command must be responded to without streaming +regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__cancelRpc" flag set to "true", the command executor should notify the application layer that that RPC has been canceled if it is still running. The executor should then send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. + ### Protocol version update This feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". TODO: Start defining a doc in our repo that defines what features are present in what protocol version. +## Example with code gen + +TODO which existing client works well for long-running commands? Mem mon ("Report usage for 10 seconds at 1 second intervals")? + ## Alternative designs considered - Allow the command executor to decide at run time of each command if it will stream responses independent of the command invoker's request diff --git a/doc/reference/error-model.md b/doc/reference/error-model.md index 4172cfce55..6463d6efe1 100644 --- a/doc/reference/error-model.md +++ b/doc/reference/error-model.md @@ -151,6 +151,7 @@ public enum AkriMqttErrorKind ExecutionException, MqttError, UnsupportedVersion, + Canceled, } ``` @@ -263,6 +264,7 @@ public enum AkriMqttErrorKind { EXECUTION_EXCEPTION, MQTT_ERROR, UNSUPPORTED_VERSION, + CANCELED, } ``` @@ -327,6 +329,7 @@ pub enum AIOProtocolErrorKind { ExecutionException, MqttError, UnsupportedVersion, + Canceled, } ``` @@ -400,6 +403,7 @@ const { ExecutionError MqttError UnsupportedVersion + Canceled } ``` @@ -455,6 +459,7 @@ class AkriMqttErrorKind(Enum): EXECUTION_EXCEPTION = 10 MQTT_ERROR = 11 UNSUPPORTED_VERSION = 12 + CANCELED = 13 ``` The Akri.Mqtt error type is defined as follows: @@ -570,6 +575,7 @@ The following table lists the HTTP status codes, conditions on other fields in t | 400 | Bad Request | false | no | | invalid payload | | 408 | Request Timeout | false | yes | yes | timeout | | 415 | Unsupported Media Type | false | yes | yes | invalid header | +| 452 | Request Cancelled | false | no | no | canceled | | 500 | Internal Server Error | false | no | | unknown error | | 500 | Internal Server Error | false | yes | | internal logic error | | 500 | Internal Server Error | true | maybe | | execution error | diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 386e0406f1..9dda831473 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -84,6 +84,11 @@ public static class AkriSystemProperties /// internal const string IsStreamingCommand = ReservedPrefix + "streamResp"; + /// + /// Inidicates that an RPC request should be cancelled if it is still executing + /// + internal const string CancelCommand = ReservedPrefix + "cancelRpc"; + internal static bool IsReservedUserProperty(string name) { return name.Equals(Timestamp, StringComparison.Ordinal) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs index bcfa68182c..447ec74e50 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs @@ -23,6 +23,9 @@ public enum CommandStatusCode : int /// Unsupported Media Type. The content type specified in the request is not supported by this implementation. UnsupportedMediaType = 415, + /// The RPC was canceled prior to it finishing. + Canceled = 452, + /// Internal Server. Unknown error, internal logic error, or command processor error other than . InternalServerError = 500, From f16ad433c29cd8e6481b54bca20013148ecee4af Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 25 Jul 2025 16:38:44 -0700 Subject: [PATCH 21/74] more notes, more re-ordering --- doc/dev/adr/0025-rpc-streaming.md | 32 ++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index f70e6dd83f..e185935357 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -117,12 +117,6 @@ With this design, commands that use streaming are defined at codegen time. Codeg - Executor needs to know if it can stream the response, and this is the flag that tells it that - The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag set to "true" - The command invoker will acknowledge all messages it receives that match the correlation data of the command request -- The command invoker may cancel a normal or streaming RPC call at an arbitrary time by sending an MQTT message with: - - The same MQTT topic as the invoked method - - The same correlation data as the invoked method - - The user property "__cancelRpc" set to "true". - - No payload - - TODO what would API look like? gRPC uses cancellation token #### Command executor side @@ -138,11 +132,31 @@ With this design, commands that use streaming are defined at codegen time. Codeg - The command executor receives a command **without** "__streamResp" flag set to "true" - The command must be responded to without streaming -regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__cancelRpc" flag set to "true", the command executor should notify the application layer that that RPC has been canceled if it is still running. The executor should then send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. +### Cancellation support + +To avoid scenarios where long-running streaming responses are no longer wanted, we will want to support cancelling RPC calls. This feature is moreso applicable for RPC streaming, but the design allows for it to work for non-streaming RPC as well. + +#### Invoker side + +- The command invoker may cancel a normal or streaming RPC call at an arbitrary time by sending an MQTT message with: + - The same MQTT topic as the invoked method + - The same correlation data as the invoked method + - The user property "__cancelRpc" set to "true". + - No payload + - TODO what would API look like? gRPC uses cancellation token +- The command invoker should still listen on the response topic for a response from the executor which may still contain a successful response (if cancellation was received after the command completed successfully) + +#### Executor side + +Regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__cancelRpc" flag set to "true", the command executor should: + - Notify the application layer that that RPC has been canceled if it is still running + - Send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. + +If the executor receives a cancellation request for a command that has already completed, then the cancellation request should be ignored. ### Protocol version update -This feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". +This RPC streaming feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". TODO: Start defining a doc in our repo that defines what features are present in what protocol version. @@ -153,7 +167,7 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor ## Alternative designs considered - Allow the command executor to decide at run time of each command if it will stream responses independent of the command invoker's request - - This would force users to call the ```InvokeCommandWithStreaming``` API on the command invoker side and that returned object isn't as easy to use for single responses + - This would force users to always call the ```InvokeCommandWithStreaming``` API on the command invoker side and that returned object isn't as easy to use for single responses - Treat streaming RPC as a separate protocol from RPC, give it its own client like ```CommandInvoker``` and ```TelemetrySender``` - There is a lot of code re-use between RPC and streaming RPC so this would make implementation very inconvenient - This would introduce another protocol to version. Future RPC changes would likely be relevant to RPC streaming anyways, so this feels redundant. From e7c98d55a0853b78c34322ed1348a1e8eceaa0b0 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 10:38:24 -0700 Subject: [PATCH 22/74] cleanup --- doc/dev/adr/0025-rpc-streaming.md | 32 ++++++++----------- .../AkriSystemProperties.cs | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index e185935357..d2f560698e 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -5,17 +5,18 @@ Users have expressed a desire to allow more than one response per RPC invocation. This would enable scenarios like: - Execute long-running commands while still being responsive -- +- Allow users to report status over time for a long-running command ## Requirements + - Allow for an arbitrary number of command responses for a single command invocation - The total number of responses does not need to be known before the first response is sent - - When exposed to the user, each response includes an index of where it was in the stream + - When exposed to the user, each response includes an index of where it was in the stream and an optional response Id - Allow for multiple separate commands to be streamed simultaneously - - Even the same command can be executed in parallel to itself? - Allow for invoker to cancel streamed responses mid-stream ## Non-requirements + - Different payload shapes per command response - "Client Streaming" RPC (multiples requests -> One command response) - Bi-directional streaming RPC (multiples requests -> multiple responses) @@ -29,14 +30,14 @@ gRPC supports these patterns for RPC: - [Client streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc) (many request messages, one response message) - [Bi-directional streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc) (many request messages, many response messages) -gRPC relies on the HTTP streaming protocol to delineate each message in the stream and to indicate the end of the stream. - [gRPC also allows for either the client or server to cancel an RPC at any time](https://grpc.io/docs/what-is-grpc/core-concepts/#cancelling-an-rpc) ## Decision ### API design, .NET +#### Invoker side + Our command invoker base class will now include a new method ```InvokeCommandWithStreaming``` to go with the existing ```InvokeCommand``` method. This new method will take the same parameters as ```InvokeCommand``` but will return an asynchronously iterable list (or callback depending on language?) of command response objects. @@ -80,6 +81,8 @@ public class StreamingExtendedResponse : ExtendedResponse } ``` +#### Executor side + On the executor side, we will define a separate callback that executes whenever a streaming command is invoked. Instead of returning the single response, this callback will return the asynchronously iterable list of responses. Importantly, this iterable may still be added to by the user after this callback has finished. ```csharp @@ -114,14 +117,13 @@ With this design, commands that use streaming are defined at codegen time. Codeg #### Command invoker side - The command invoker's request message will include an MQTT user property with name "__streamResp" and value "true". - - Executor needs to know if it can stream the response, and this is the flag that tells it that + - Executor needs to know if it can stream the response, and this is the flag that affirms it - The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag set to "true" - The command invoker will acknowledge all messages it receives that match the correlation data of the command request #### Command executor side - The command executor receives a command with "__streamResp" flag set to "true" - - The command is given to the application layer in a way that allows the application to return at least one response - All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. - Each streamed response must contain an MQTT user property with name "__streamIndex" and value equal to the index of this response relative to the other responses (0 for the first response, 1 for the second response, etc.) - Each streamed response may contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. This is an optional and user-provided value. @@ -141,14 +143,13 @@ To avoid scenarios where long-running streaming responses are no longer wanted, - The command invoker may cancel a normal or streaming RPC call at an arbitrary time by sending an MQTT message with: - The same MQTT topic as the invoked method - The same correlation data as the invoked method - - The user property "__cancelRpc" set to "true". + - The user property "__stopRpc" set to "true". - No payload - - TODO what would API look like? gRPC uses cancellation token -- The command invoker should still listen on the response topic for a response from the executor which may still contain a successful response (if cancellation was received after the command completed successfully) +- The command invoker should still listen on the response topic for a response from the executor which may still contain a successful response (if cancellation was received after the command completed successfully) or a response signalling that cancellation succeeded #### Executor side -Regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__cancelRpc" flag set to "true", the command executor should: +Regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__stopRpc" flag set to "true", the command executor should: - Notify the application layer that that RPC has been canceled if it is still running - Send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. @@ -158,12 +159,6 @@ If the executor receives a cancellation request for a command that has already c This RPC streaming feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". -TODO: Start defining a doc in our repo that defines what features are present in what protocol version. - -## Example with code gen - -TODO which existing client works well for long-running commands? Mem mon ("Report usage for 10 seconds at 1 second intervals")? - ## Alternative designs considered - Allow the command executor to decide at run time of each command if it will stream responses independent of the command invoker's request @@ -177,14 +172,13 @@ TODO which existing client works well for long-running commands? Mem mon ("Repor - RPC executor dies before sending the final stream response. - Command invoker throws time out exception waiting on the next response - RPC executor receives command request with "__streamResp", but that executor doesn't understand streaming requests because it uses an older protocol version - - Command executor responds with "not supported protocol" error code + - Command executor responds with "not supported protocol" error code since the request carried protocol version 2.0 - RPC executor receives command request with "__streamResp", and the executor understands that it is a streaming request (protocol versions align) but that particular command doesn't support streaming - RPC executor treats it like a non-streaming command, but adds the "__isLastResp" flag to the one and only response - RPC invoker tries to invoke a non-streaming command that the executor requires streaming on - Atypical case since codegen will prevent this - But, for the sake of non-codegen users, executor returns "invalid header" error pointing to the "__streamResp" header - Invoker understands that, if the "invalid header" value is "__streamResp", it attempted a invoke a streaming method - - timeout per response vs overall? Both? ## Open Questions diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 9dda831473..72f44a6c23 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -87,7 +87,7 @@ public static class AkriSystemProperties /// /// Inidicates that an RPC request should be cancelled if it is still executing /// - internal const string CancelCommand = ReservedPrefix + "cancelRpc"; + internal const string CancelCommand = ReservedPrefix + "stopRpc"; internal static bool IsReservedUserProperty(string name) { From 1c1964b95b56fb75266cee9b33983df985be0879 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 10:43:50 -0700 Subject: [PATCH 23/74] Only allow cancelling streaming commands --- doc/dev/adr/0025-rpc-streaming.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index d2f560698e..95f66efcb0 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -136,11 +136,11 @@ With this design, commands that use streaming are defined at codegen time. Codeg ### Cancellation support -To avoid scenarios where long-running streaming responses are no longer wanted, we will want to support cancelling RPC calls. This feature is moreso applicable for RPC streaming, but the design allows for it to work for non-streaming RPC as well. +To avoid scenarios where long-running streaming responses are no longer wanted, we will want to support cancelling streaming RPC calls. #### Invoker side -- The command invoker may cancel a normal or streaming RPC call at an arbitrary time by sending an MQTT message with: +- The command invoker may cancel a streaming RPC call at an arbitrary time by sending an MQTT message with: - The same MQTT topic as the invoked method - The same correlation data as the invoked method - The user property "__stopRpc" set to "true". @@ -149,11 +149,11 @@ To avoid scenarios where long-running streaming responses are no longer wanted, #### Executor side -Regardless of if an RPC is streaming or not, upon receiving an MQTT message with the "__stopRpc" flag set to "true", the command executor should: +Upon receiving an MQTT message with the "__stopRpc" flag set to "true" that correlates to an actively executing streaming command, the command executor should: - Notify the application layer that that RPC has been canceled if it is still running - Send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. -If the executor receives a cancellation request for a command that has already completed, then the cancellation request should be ignored. +If the executor receives a cancellation request for a streaming command that has already completed, then the cancellation request should be ignored. ### Protocol version update From 61a5212bbfe0fc6a4283d61e63932f8277665ed2 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 11:25:08 -0700 Subject: [PATCH 24/74] typo --- .../src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 72f44a6c23..3e3912399c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -80,12 +80,12 @@ public static class AkriSystemProperties internal const string CommandInvokerId = ReservedPrefix + "invId"; /// - /// Inidicates that an RPC request expects the executor to stream one or many responses. + /// Indicates that an RPC request expects the executor to stream one or many responses. /// internal const string IsStreamingCommand = ReservedPrefix + "streamResp"; /// - /// Inidicates that an RPC request should be cancelled if it is still executing + /// Indicates that an RPC request should be cancelled if it is still executing /// internal const string CancelCommand = ReservedPrefix + "stopRpc"; From 79af3012fecdb7f9e3ad1d9f29ad8a2afdc70e64 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 11:38:57 -0700 Subject: [PATCH 25/74] code changes in another branch --- .../AkriSystemProperties.cs | 35 +++++++------------ .../RPC/CommandExecutor.cs | 11 +----- .../RPC/CommandInvoker.cs | 17 --------- 3 files changed, 13 insertions(+), 50 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 3e3912399c..95b53972e9 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -79,30 +79,19 @@ public static class AkriSystemProperties // TODO remove this once akri service is code gen'd to expect srcId instead of invId internal const string CommandInvokerId = ReservedPrefix + "invId"; - /// - /// Indicates that an RPC request expects the executor to stream one or many responses. - /// - internal const string IsStreamingCommand = ReservedPrefix + "streamResp"; - - /// - /// Indicates that an RPC request should be cancelled if it is still executing - /// - internal const string CancelCommand = ReservedPrefix + "stopRpc"; - internal static bool IsReservedUserProperty(string name) - { - return name.Equals(Timestamp, StringComparison.Ordinal) - || name.Equals(Status, StringComparison.Ordinal) - || name.Equals(StatusMessage, StringComparison.Ordinal) - || name.Equals(IsApplicationError, StringComparison.Ordinal) - || name.Equals(InvalidPropertyName, StringComparison.Ordinal) - || name.Equals(InvalidPropertyValue, StringComparison.Ordinal) - || name.Equals(ProtocolVersion, StringComparison.Ordinal) - || name.Equals(SupportedMajorProtocolVersions, StringComparison.Ordinal) - || name.Equals(RequestedProtocolVersion, StringComparison.Ordinal) - || name.Equals(SourceId, StringComparison.Ordinal) - || name.Equals(CommandInvokerId, StringComparison.Ordinal) - || name.Equals(IsStreamingCommand, StringComparison.Ordinal); + { + return name.Equals(Timestamp, StringComparison.Ordinal) + || name.Equals(Status, StringComparison.Ordinal) + || name.Equals(StatusMessage, StringComparison.Ordinal) + || name.Equals(IsApplicationError, StringComparison.Ordinal) + || name.Equals(InvalidPropertyName, StringComparison.Ordinal) + || name.Equals(InvalidPropertyValue, StringComparison.Ordinal) + || name.Equals(ProtocolVersion, StringComparison.Ordinal) + || name.Equals(SupportedMajorProtocolVersions, StringComparison.Ordinal) + || name.Equals(RequestedProtocolVersion, StringComparison.Ordinal) + || name.Equals(SourceId, StringComparison.Ordinal) + || name.Equals(CommandInvokerId, StringComparison.Ordinal); } } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs index 5c48b32826..991537a2b3 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs @@ -55,16 +55,7 @@ public abstract class CommandExecutor : IAsyncDisposable /// /// This callback may be null if this command executor only supports commands that stream responses. /// - public Func, CancellationToken, Task>>? OnCommandReceived { get; set; } - - /// - /// The callback to execute each time a command request that expects streamed responses is received. - /// - /// - /// The callback provides the request itself and requires the user to return one to many responses. This callback may be null - /// if this command executors doesn't have any streaming commands. - /// - public Func, CancellationToken, Task>>>? OnStreamingCommandReceived { get; set; } + public required Func, CancellationToken, Task>> OnCommandReceived { get; set; } public string? ExecutorId { get; init; } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs index f89d6871cb..c40db767e4 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandInvoker.cs @@ -686,23 +686,6 @@ public async Task> InvokeCommandAsync(TReq request, Comm } } - /// - /// Invoke a command and receive a stream of responses. - /// - /// The payload of command request. - /// The metadata of the command request. - /// - /// The topic token replacement map to use in addition to . If this map - /// contains any keys that also has, then values specified in this map will take precedence. - /// - /// How long to wait for a command response. Note that each command executor also has a configurable timeout value that may be shorter than this value. - /// Cancellation token. - /// The asynchronously stream of responses and their respective metadata. - public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, CommandRequestMetadata? metadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) - { - throw new NotImplementedException(); - } - /// /// Dispose this object and the underlying mqtt client. /// From c7fb69e068f13672349dd87dbecd180292e644c6 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 11:39:50 -0700 Subject: [PATCH 26/74] more --- .../RPC/CommandStatusCode.cs | 3 -- .../RPC/ExtendedResponse.cs | 13 +++----- .../RPC/StreamingExtendedResponse.cs | 31 ------------------- 3 files changed, 4 insertions(+), 43 deletions(-) delete mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs index 447ec74e50..bcfa68182c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandStatusCode.cs @@ -23,9 +23,6 @@ public enum CommandStatusCode : int /// Unsupported Media Type. The content type specified in the request is not supported by this implementation. UnsupportedMediaType = 415, - /// The RPC was canceled prior to it finishing. - Canceled = 452, - /// Internal Server. Unknown error, internal logic error, or command processor error other than . InternalServerError = 500, diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs index d6ce35c8cc..ec9390ad97 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs @@ -7,7 +7,7 @@ namespace Azure.Iot.Operations.Protocol.RPC { - public class ExtendedResponse + public struct ExtendedResponse where TResp : class { // These two user properties are used to communicate application level errors in an RPC response message. Code is mandatory, but data is optional. @@ -18,16 +18,11 @@ public class ExtendedResponse public CommandResponseMetadata? ResponseMetadata { get; set; } -#pragma warning disable CA1000 // Do not declare static members on generic types - public static ExtendedResponse CreateFromResponse(TResp response) + public ExtendedResponse(TResp response, CommandResponseMetadata? responseMetadata) { - return new() - { - Response = response, - ResponseMetadata = null, - }; + Response = response; + ResponseMetadata = responseMetadata; } -#pragma warning restore CA1000 // Do not declare static members on generic types public ExtendedResponse WithApplicationError(string errorCode) { diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs deleted file mode 100644 index 84b36bf385..0000000000 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/StreamingExtendedResponse.cs +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -using System; -using System.Text.Json; -using System.Text.Json.Nodes; - -namespace Azure.Iot.Operations.Protocol.RPC -{ - public class StreamingExtendedResponse : ExtendedResponse - where TResp : class - { - /// - /// An optional Id for this response (relative to the other responses in this response stream) - /// - /// - /// Users are allowed to provide Ids for each response, only for specific responses, or for none of the responses. - /// - public string? StreamingResponseId { get; set; } - - /// - /// The index of this response relative to the other responses in this response stream. Starts at 0. - /// - public int StreamingResponseIndex { get; set; } - - /// - /// If true, this response is the final response in this response stream. - /// - public bool IsLastResponse { get; set; } - } -} From aa2dc8138e1daf2da93c3d2800a95eeaef3b4afe Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 28 Jul 2025 11:41:25 -0700 Subject: [PATCH 27/74] more --- .../RPC/CommandExecutor.cs | 6 ------ .../RPC/ExtendedResponse.cs | 13 +++++++++---- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs index 991537a2b3..b722293c82 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/CommandExecutor.cs @@ -49,12 +49,6 @@ public abstract class CommandExecutor : IAsyncDisposable /// public TimeSpan ExecutionTimeout { get; set; } - /// - /// The callback to execute each time a non-streaming command request is received. - /// - /// - /// This callback may be null if this command executor only supports commands that stream responses. - /// public required Func, CancellationToken, Task>> OnCommandReceived { get; set; } public string? ExecutorId { get; init; } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs index ec9390ad97..a9e049e2ac 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs @@ -18,11 +18,16 @@ public struct ExtendedResponse public CommandResponseMetadata? ResponseMetadata { get; set; } - public ExtendedResponse(TResp response, CommandResponseMetadata? responseMetadata) +#pragma warning disable CA1000 // Do not declare static members on generic types + public static ExtendedResponse CreateFromResponse(TResp response) { - Response = response; - ResponseMetadata = responseMetadata; + return new() + { + Response = response, + ResponseMetadata = null, + }; } +#pragma warning restore CA1000 // Do not declare static members on generic types public ExtendedResponse WithApplicationError(string errorCode) { @@ -32,7 +37,7 @@ public ExtendedResponse WithApplicationError(string errorCode) } public ExtendedResponse WithApplicationError(string errorCode, string? errorPayload) - { + { ResponseMetadata ??= new(); SetApplicationError(errorCode, errorPayload); return this; From 0784a10f0f97b049ddb574fbcf2f3838ed804e26 Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Mon, 28 Jul 2025 11:42:14 -0700 Subject: [PATCH 28/74] Update ExtendedResponse.cs --- .../src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs index a9e049e2ac..33a5babdef 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/RPC/ExtendedResponse.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. using System; @@ -37,7 +37,7 @@ public ExtendedResponse WithApplicationError(string errorCode) } public ExtendedResponse WithApplicationError(string errorCode, string? errorPayload) - { + { ResponseMetadata ??= new(); SetApplicationError(errorCode, errorPayload); return this; From ff9efc9052346e03942c682a97c9221dfa9ca6b1 Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Mon, 28 Jul 2025 11:42:37 -0700 Subject: [PATCH 29/74] Update AkriSystemProperties.cs --- .../AkriSystemProperties.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs index 95b53972e9..e55cd7fc7d 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/AkriSystemProperties.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. using System; @@ -80,17 +80,17 @@ public static class AkriSystemProperties internal const string CommandInvokerId = ReservedPrefix + "invId"; internal static bool IsReservedUserProperty(string name) - { - return name.Equals(Timestamp, StringComparison.Ordinal) - || name.Equals(Status, StringComparison.Ordinal) - || name.Equals(StatusMessage, StringComparison.Ordinal) - || name.Equals(IsApplicationError, StringComparison.Ordinal) - || name.Equals(InvalidPropertyName, StringComparison.Ordinal) - || name.Equals(InvalidPropertyValue, StringComparison.Ordinal) - || name.Equals(ProtocolVersion, StringComparison.Ordinal) - || name.Equals(SupportedMajorProtocolVersions, StringComparison.Ordinal) - || name.Equals(RequestedProtocolVersion, StringComparison.Ordinal) - || name.Equals(SourceId, StringComparison.Ordinal) + { + return name.Equals(Timestamp, StringComparison.Ordinal) + || name.Equals(Status, StringComparison.Ordinal) + || name.Equals(StatusMessage, StringComparison.Ordinal) + || name.Equals(IsApplicationError, StringComparison.Ordinal) + || name.Equals(InvalidPropertyName, StringComparison.Ordinal) + || name.Equals(InvalidPropertyValue, StringComparison.Ordinal) + || name.Equals(ProtocolVersion, StringComparison.Ordinal) + || name.Equals(SupportedMajorProtocolVersions, StringComparison.Ordinal) + || name.Equals(RequestedProtocolVersion, StringComparison.Ordinal) + || name.Equals(SourceId, StringComparison.Ordinal) || name.Equals(CommandInvokerId, StringComparison.Ordinal); } } From 0ff4dbe1774bf107b3840adb8252108d70e0caa6 Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Wed, 30 Jul 2025 11:04:58 -0700 Subject: [PATCH 30/74] Update 0025-rpc-streaming.md --- doc/dev/adr/0025-rpc-streaming.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 95f66efcb0..d70105b430 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -13,7 +13,7 @@ Users have expressed a desire to allow more than one response per RPC invocation - The total number of responses does not need to be known before the first response is sent - When exposed to the user, each response includes an index of where it was in the stream and an optional response Id - Allow for multiple separate commands to be streamed simultaneously - - Allow for invoker to cancel streamed responses mid-stream + - Allow for invoker to cancel streamed responses mid-stream (from client side) ## Non-requirements @@ -184,4 +184,4 @@ This RPC streaming feature is not backwards compatible (new invoker can't initia - When to ack the streaming request? - In normal RPC, request is Ack'd only after the method finishes invocation. Waiting until a streamed RPC finishes could clog up Acks since streaming requests can take a while. - - Ack after first response is generated? \ No newline at end of file + - Ack after first response is generated? From 4aef0a4e2dd0fe2c4f269a08d26513a95afe6b55 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 1 Aug 2025 13:44:07 -0700 Subject: [PATCH 31/74] Remove responseId concept. User will do this with their own user properties if they want it --- doc/dev/adr/0025-rpc-streaming.md | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index d70105b430..6f7c9dff3b 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -11,7 +11,7 @@ Users have expressed a desire to allow more than one response per RPC invocation - Allow for an arbitrary number of command responses for a single command invocation - The total number of responses does not need to be known before the first response is sent - - When exposed to the user, each response includes an index of where it was in the stream and an optional response Id + - When exposed to the user, each response includes an index of where it was in the stream - Allow for multiple separate commands to be streamed simultaneously - Allow for invoker to cancel streamed responses mid-stream (from client side) @@ -61,14 +61,6 @@ Additionally, this new method will return an extended version of the ```Extended public class StreamingExtendedResponse : ExtendedResponse where TResp : class { - /// - /// An optional Id for this response (relative to the other responses in this response stream) - /// - /// - /// Users are allowed to provide Ids for each response, only for specific responses, or for none of the responses. - /// - public string? StreamingResponseId { get; set; } - /// /// The index of this response relative to the other responses in this response stream. Starts at 0. /// @@ -126,7 +118,6 @@ With this design, commands that use streaming are defined at codegen time. Codeg - The command executor receives a command with "__streamResp" flag set to "true" - All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. - Each streamed response must contain an MQTT user property with name "__streamIndex" and value equal to the index of this response relative to the other responses (0 for the first response, 1 for the second response, etc.) - - Each streamed response may contain an MQTT user property with name "__streamRespId" and value equal to that response's streaming response Id. This is an optional and user-provided value. - The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. - A streaming command is allowed to have a single response. It must include the "__isLastResp" flag in that first/final response - Cache is only updated once the stream has completed and it is updated to include all of the responses (in order) for the command so they can be re-played if the streaming command is invoked again by the same client From 9dbb1746c139740a36148589418e5366e4b4057c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 1 Aug 2025 16:21:39 -0700 Subject: [PATCH 32/74] Incorporate a lot of feedback --- doc/dev/adr/0025-rpc-streaming.md | 243 +++++++++++++++++++++--------- 1 file changed, 169 insertions(+), 74 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 6f7c9dff3b..02294da378 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -2,25 +2,27 @@ ## Context -Users have expressed a desire to allow more than one response per RPC invocation. This would enable scenarios like: +Users have expressed a desire to allow more than one request and/or more than one response per RPC invocation. This would enable scenarios like: - Execute long-running commands while still being responsive - Allow users to report status over time for a long-running command +- Invoking a command where the executor wants a series of data points that would be impractical to put in one message ## Requirements - - Allow for an arbitrary number of command responses for a single command invocation - - The total number of responses does not need to be known before the first response is sent - - When exposed to the user, each response includes an index of where it was in the stream + - Allow for an arbitrary number of command requests and responses for a single command invocation + - The total number of requests and responses does not need to be known before the first request/response is sent + - The total number of entries in a stream is allowed to be 1 + - When exposed to the user, each request and response includes an index of where it was in the stream - Allow for multiple separate commands to be streamed simultaneously - - Allow for invoker to cancel streamed responses mid-stream (from client side) + - Allow for invoker and/or executor to cancel a streamed request and/or streamed response at any time ## Non-requirements - Different payload shapes per command response - - "Client Streaming" RPC (multiples requests -> One command response) - - Bi-directional streaming RPC (multiples requests -> multiple responses) - - Allow for executor to cancel streamed responses mid-stream + - The API of the receiving side of a stream will provide the user the streamed requests/responses in their **intended** order rather than their **received** order + - If the stream's Nth message is lost due to message expiry (or other circumstances), our API should still notify the user when the N+1th stream message is received + - This may be added as a feature later if requested by customers ## State of the art @@ -36,28 +38,41 @@ gRPC supports these patterns for RPC: ### API design, .NET +While RPC streaming shares a lot of similarities to normal RPC, we will define a new communication pattern to handle this scenario with two corresponding base classes: ```StreamingCommandInvoker``` and ```StreamingCommandExecutor```. + #### Invoker side -Our command invoker base class will now include a new method ```InvokeCommandWithStreaming``` to go with the existing ```InvokeCommand``` method. +The new ```StreamingCommandInvoker``` will largely look like the existing ```CommandInvoker```, but will have an API for ```InvokeCommandWithStreaming```. -This new method will take the same parameters as ```InvokeCommand``` but will return an asynchronously iterable list (or callback depending on language?) of command response objects. +This new method will take the same parameters as ```InvokeCommand``` but will accept a stream of requests and return a stream of command responses. ```csharp -public abstract class CommandInvoker +public abstract class StreamingCommandInvoker where TReq : class where TResp : class { - // Single response - public Task> InvokeCommandAsync(TReq request, ...) {...} - - // Many responses, responses may be staggered - public IAsyncEnumerable> InvokeStreamingCommandAsync(TReq request, ...) {...} + // Many requests, many responses. + public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, ...) {...} } ``` -Additionally, this new method will return an extended version of the ```ExtendedResponse``` wrapper that will include the streaming-specific information about each response: +Additionally, these new methods will use extended versions of the ```ExtendedRequest``` and ```ExtendedResponse``` classes that will include the streaming-specific information about each request and response: ```csharp +public class StreamingExtendedRequest : ExtendedRequest + where TResp : class +{ + /// + /// The index of this request relative to the other requests in this request stream. Starts at 0. + /// + public int StreamingRequestIndex { get; set; } + + /// + /// If true, this request is the final request in this request stream. + /// + public bool IsLastRequest { get; set; } +} + public class StreamingExtendedResponse : ExtendedResponse where TResp : class { @@ -75,29 +90,20 @@ public class StreamingExtendedResponse : ExtendedResponse #### Executor side -On the executor side, we will define a separate callback that executes whenever a streaming command is invoked. Instead of returning the single response, this callback will return the asynchronously iterable list of responses. Importantly, this iterable may still be added to by the user after this callback has finished. +The new ```StreamingCommandExecutor``` will largely look like the existing ```CommandExecutor```, but the callback to notify users that a command was received will include a stream of requests and return a stream of responses. ```csharp -public abstract class CommandExecutor : IAsyncDisposable +public abstract class StreamingCommandExecutor : IAsyncDisposable where TReq : class where TResp : class { /// - /// The callback to execute each time a non-streaming command request is received. - /// - /// - /// This callback may be null if this command executor only supports commands that stream responses. - /// - public Func, CancellationToken, Task>>? OnCommandReceived { get; set; } - - /// - /// The callback to execute each time a command request that expects streamed responses is received. + /// A streaming command was invoked /// /// - /// The callback provides the request itself and requires the user to return one to many responses. This callback may be null - /// if this command executors doesn't have any streaming commands. + /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public Func, CancellationToken, Task>>>? OnStreamingCommandReceived { get; set; } + public required Func>, CancellationToken, Task>>> OnStreamingCommandReceived { get; set; } } ``` @@ -106,73 +112,162 @@ With this design, commands that use streaming are defined at codegen time. Codeg ### MQTT layer protocol -#### Command invoker side +#### Streaming user property + +To convey streaming context in a request/response stream, we will put this information in the "__stream" MQTT user property with a value that looks like: + +```____``` TODO examples + +with data types + +```___``` + +examples: + +```0_false_false_false```: The first (and not last) message in a stream + +```3_true_false_false```: The third and final message in a stream + +```0_true_false_false```: The first and final message in a stream + +```0_true_true_false```: This stream should be canceled. Note that the values for ```index```, ```isLast``` and ```requestCanceledSuccessfully``` are irrelevant here. + +```0_false_false_true```: This stream was successfully canceled. Note that the values for ```index```, ```isLast```, and ```cancelRequest``` are irrelevant here. + +[see cancellation support for more details on cancellation scenarios](#cancellation-support) + +#### Invoker side + +The streaming command invoker will first subscribe to the appropriate response topic prior to sending any requests + +Once the user invokes a streaming command, the streaming command invoker will send one to many MQTT messages with: + - The same response topic + - The same correlation data + - The appropriate streaming metadata [see above](#streaming-user-property) + - The serialized payload as provided by the user's request object + - Any user-definied metadata as specified in the ```ExtendedRequest``` + +Once the stream of requests has finished sending, the streaming command invoker should expect the stream of responses to arrive on the provided response topic with the provided correlation data and the streaming user property. + +The command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command + +#### Executor side + +A streaming command executor should start by subscribing to the expected command topic + - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. + +Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the final message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: + - The same correlation data as the original request + - The topic as specified by the original request's response topic field + - The appropriate streaming metadata [see above](#streaming-user-property) + - The serialized payload as provided by the user's response object + - Any user-definied metadata as specified in the ```ExtendedResponse``` + +Unlike normal RPC, the streaming command executor will not provide any cache support. This is because streams may grow indefinitely in length and size. + +Also unlike normal RPC, the stream command executor should acknowledge the MQTT message of a received stream request as soon as the user has been notified about it. We cannot defer acknowledging the stream request messages until after the full command has finished as streams may run indefinitely and we don't want to block other users of the MQTT client. + +### Timeout support + +We need to provide timeout support for our streaming APIs to avoid scenarios such as: + +- The invoker side is stuck waiting for the final response in a stream because it was lost or the executor side crashed before sending it. +- The executor side is stuck waiting for the final request in a stream because it was lost or the invoker side crashed before sending it. +- The broker delivers a request/response stream message that is "no longer relevant" + +#### Decision -- The command invoker's request message will include an MQTT user property with name "__streamResp" and value "true". - - Executor needs to know if it can stream the response, and this is the flag that affirms it -- The command invoker will listen for command responses with the correlation data that matches the invoked method's correlation data until it receives a response with the "__isLastResp" flag set to "true" -- The command invoker will acknowledge all messages it receives that match the correlation data of the command request +Invoker side: + - delivery timeout (assigned per request message in stream (extended streaming request assigned?)) + - execution timeout (noted in header of each request (for redundancy in case first message is lost)) + - Client waits for ?????? Can't assume just execution timeout -#### Command executor side + Executor side: + - At constructor time, user assigns -- The command executor receives a command with "__streamResp" flag set to "true" - - All command responses will use the same MQTT message correlation data as the request provided so that the invoker can map responses to the appropriate command invocation. - - Each streamed response must contain an MQTT user property with name "__streamIndex" and value equal to the index of this response relative to the other responses (0 for the first response, 1 for the second response, etc.) - - The final command response will include an MQTT user property "__isLastResp" with value "true" to signal that it is the final response in the stream. - - A streaming command is allowed to have a single response. It must include the "__isLastResp" flag in that first/final response - - Cache is only updated once the stream has completed and it is updated to include all of the responses (in order) for the command so they can be re-played if the streaming command is invoked again by the same client -- The command executor receives a command **without** "__streamResp" flag set to "true" - - The command must be responded to without streaming +#### Alternative timeout designs considered + +- Include in the initial request user properties a total number of milliseconds that the command response can take to be delivered. + - This is the approach that gRPC takes, but... + - It doesn't account well for delays in message delivery from broker. + - It doesn't account for scenarios where the invoker dies unexpectedly since gRPC relies on a direct connection between invoker and executor +- Allow users to specify timeouts for delivery and a separate timeout for execution + - a bit complex on API surface. Also different from how our normal RPC does timeouts ### Cancellation support -To avoid scenarios where long-running streaming responses are no longer wanted, we will want to support cancelling streaming RPC calls. +To avoid scenarios where long-running streaming requests/responses are no longer wanted, we will want to support cancelling streaming RPC calls. + +Since sending a cancellation request may fail (message expiry on broker side), the SDK API design should allow for the user to repeatedly call "cancel" and should return successfully once the other party has responded appropriately. + +The proposed design for that would look like: + +```csharp + +public abstract class StreamingCommandInvoker + where TReq : class + where TResp : class +{ + public async Task CancelStreamingCommandAsync(Guid correlationId) {...} +} + +public abstract class StreamingCommandExecutor : IAsyncDisposable + where TReq : class + where TResp : class +{ + public async Task CancelStreamingCommandAsync(Guid correlationId) {...} +} + +``` + +where the user gets the correlationId from the ```CommandRequestMetadata``` they provide to the command invoker or the ```CommandResponseMetadata``` that the executor gives them upon receiving a streaming command. #### Invoker side -- The command invoker may cancel a streaming RPC call at an arbitrary time by sending an MQTT message with: +- The command invoker may cancel a streaming command while streaming the request or receiving the stream of responses by sending an MQTT message with: - The same MQTT topic as the invoked method - The same correlation data as the invoked method - - The user property "__stopRpc" set to "true". + - Streaming metadata with the ["cancel" flag set](#streaming-user-property) - No payload -- The command invoker should still listen on the response topic for a response from the executor which may still contain a successful response (if cancellation was received after the command completed successfully) or a response signalling that cancellation succeeded +- The command invoker should still listen on the response topic for a response from the executor which may still contain a successful response (if cancellation was received after the command completed successfully) or a response signalling that cancellation succeeded ("Canceled" error code) + +As detailed below, the executor may also cancel the stream at any time. In response to receiving a cancellation request from the executor, the invoker should send an MQTT message with: + - The same topic as the command itself + - The same correlation data as the command itself + - Streaming metadata with the ["stream successfully canceled" flag set](#streaming-user-property) + +Any received MQTT messages pertaining to a command that was already canceled should still be acknowledged. They should not be given to the user, though. #### Executor side -Upon receiving an MQTT message with the "__stopRpc" flag set to "true" that correlates to an actively executing streaming command, the command executor should: +Upon receiving an MQTT message with the stream "cancel" flag set to "true" that correlates to an actively executing streaming command, the command executor should: - Notify the application layer that that RPC has been canceled if it is still running - Send an MQTT message to the appropriate response topic with error code "canceled" to notify the invoker that the RPC has stopped and no further responses will be sent. If the executor receives a cancellation request for a streaming command that has already completed, then the cancellation request should be ignored. -### Protocol version update +The executor may cancel receiving a stream of requests or cancel sending a stream of responses as well. It does so by sending an MQTT message to the invoker with: + - The same MQTT topic as command response + - The same correlation data as the invoked method + - Streaming metadata with the ["cancel" flag set](#streaming-user-property) + - No payload + +The command invoker should then send a message on the same command topic with the same correlation data with the "stream canceled successfully" flag set. + +Any received MQTT messages pertaining to a command that was already canceled should still be acknowledged. They should not be given to the user, though. + +### Protocol versioning -This RPC streaming feature is not backwards compatible (new invoker can't initiate what it believes is a streaming RPC call on an old executor), so it requires a bump in our RPC protocol version from "1.0" to "2.0". +By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming specifically. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. ## Alternative designs considered - Allow the command executor to decide at run time of each command if it will stream responses independent of the command invoker's request - This would force users to always call the ```InvokeCommandWithStreaming``` API on the command invoker side and that returned object isn't as easy to use for single responses - - Treat streaming RPC as a separate protocol from RPC, give it its own client like ```CommandInvoker``` and ```TelemetrySender``` - - There is a lot of code re-use between RPC and streaming RPC so this would make implementation very inconvenient - - This would introduce another protocol to version. Future RPC changes would likely be relevant to RPC streaming anyways, so this feels redundant. - -## Error cases - - - RPC executor dies before sending the final stream response. - - Command invoker throws time out exception waiting on the next response - - RPC executor receives command request with "__streamResp", but that executor doesn't understand streaming requests because it uses an older protocol version - - Command executor responds with "not supported protocol" error code since the request carried protocol version 2.0 - - RPC executor receives command request with "__streamResp", and the executor understands that it is a streaming request (protocol versions align) but that particular command doesn't support streaming - - RPC executor treats it like a non-streaming command, but adds the "__isLastResp" flag to the one and only response - - RPC invoker tries to invoke a non-streaming command that the executor requires streaming on - - Atypical case since codegen will prevent this - - But, for the sake of non-codegen users, executor returns "invalid header" error pointing to the "__streamResp" header - - Invoker understands that, if the "invalid header" value is "__streamResp", it attempted a invoke a streaming method - - ## Open Questions - -- When to ack the streaming request? - - In normal RPC, request is Ack'd only after the method finishes invocation. Waiting until a streamed RPC finishes could clog up Acks since streaming requests can take a while. - - Ack after first response is generated? + - Treat streaming RPC as the same protocol as RPC + - This introduces a handful of error cases such as: + - Invoker invokes a method that it thinks is non-streaming, but the executor tries streaming responses + - Executor receives a streaming command but the user did not set the streaming command handler callback (which must be optional since not every command executor has streaming commands) + - API design is messy because a command invoker/executor should not expose streaming command APIs if they have no streaming commands + - Caching behavior of normal RPC doesn't fit well with streamed RPCs which may grow indefinitely large From 49b57c7fada20808f59b291b4b8b3f00a4abe297 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 1 Aug 2025 16:23:31 -0700 Subject: [PATCH 33/74] cleanup --- doc/dev/adr/0025-rpc-streaming.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 02294da378..506c2802c2 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -116,7 +116,7 @@ With this design, commands that use streaming are defined at codegen time. Codeg To convey streaming context in a request/response stream, we will put this information in the "__stream" MQTT user property with a value that looks like: -```____``` TODO examples +```___``` with data types @@ -177,6 +177,8 @@ We need to provide timeout support for our streaming APIs to avoid scenarios suc #### Decision +TODO + Invoker side: - delivery timeout (assigned per request message in stream (extended streaming request assigned?)) - execution timeout (noted in header of each request (for redundancy in case first message is lost)) @@ -191,9 +193,9 @@ Invoker side: - Include in the initial request user properties a total number of milliseconds that the command response can take to be delivered. - This is the approach that gRPC takes, but... - It doesn't account well for delays in message delivery from broker. - - It doesn't account for scenarios where the invoker dies unexpectedly since gRPC relies on a direct connection between invoker and executor + - It doesn't account for scenarios where the invoker/executor dies unexpectedly (since gRPC relies on a direct connection between invoker and executor) - Allow users to specify timeouts for delivery and a separate timeout for execution - - a bit complex on API surface. Also different from how our normal RPC does timeouts + - a bit complex on API surface. Also subtly different enough from how our normal RPC does timeouts that it would cause confusion ### Cancellation support From ac799bc9acb4b50ae9f7df4df35a43fc917265d3 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 1 Aug 2025 16:33:19 -0700 Subject: [PATCH 34/74] canceled error code instead of header --- doc/dev/adr/0025-rpc-streaming.md | 54 +++++++++++++++---------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 506c2802c2..ab9aeb2ccc 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -38,25 +38,9 @@ gRPC supports these patterns for RPC: ### API design, .NET -While RPC streaming shares a lot of similarities to normal RPC, we will define a new communication pattern to handle this scenario with two corresponding base classes: ```StreamingCommandInvoker``` and ```StreamingCommandExecutor```. +While RPC streaming shares a lot of similarities to normal RPC, we will define a new communication pattern to handle this scenario with two corresponding base classes: ```StreamingCommandInvoker``` and ```StreamingCommandExecutor```. -#### Invoker side - -The new ```StreamingCommandInvoker``` will largely look like the existing ```CommandInvoker```, but will have an API for ```InvokeCommandWithStreaming```. - -This new method will take the same parameters as ```InvokeCommand``` but will accept a stream of requests and return a stream of command responses. - -```csharp -public abstract class StreamingCommandInvoker - where TReq : class - where TResp : class -{ - // Many requests, many responses. - public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, ...) {...} -} -``` - -Additionally, these new methods will use extended versions of the ```ExtendedRequest``` and ```ExtendedResponse``` classes that will include the streaming-specific information about each request and response: +These new base classes will use extended versions of the ```ExtendedRequest``` and ```ExtendedResponse``` classes to include the streaming-specific information about each request and response: ```csharp public class StreamingExtendedRequest : ExtendedRequest @@ -88,6 +72,22 @@ public class StreamingExtendedResponse : ExtendedResponse } ``` +#### Invoker side + +The new ```StreamingCommandInvoker``` will largely look like the existing ```CommandInvoker```, but will instead have an API for ```InvokeCommandWithStreaming```. + +This new method will take the same parameters as ```InvokeCommand``` but will accept a stream of requests and return a stream of command responses. + +```csharp +public abstract class StreamingCommandInvoker + where TReq : class + where TResp : class +{ + // Many requests, many responses. + public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, ...) {...} +} +``` + #### Executor side The new ```StreamingCommandExecutor``` will largely look like the existing ```CommandExecutor```, but the callback to notify users that a command was received will include a stream of requests and return a stream of responses. @@ -116,23 +116,21 @@ With this design, commands that use streaming are defined at codegen time. Codeg To convey streaming context in a request/response stream, we will put this information in the "__stream" MQTT user property with a value that looks like: -```___``` +```__``` with data types -```___``` +```__``` examples: -```0_false_false_false```: The first (and not last) message in a stream - -```3_true_false_false```: The third and final message in a stream +```0_false_false```: The first (and not last) message in a stream -```0_true_false_false```: The first and final message in a stream +```3_true_false```: The third and final message in a stream -```0_true_true_false```: This stream should be canceled. Note that the values for ```index```, ```isLast``` and ```requestCanceledSuccessfully``` are irrelevant here. +```0_true_false```: The first and final message in a stream -```0_false_false_true```: This stream was successfully canceled. Note that the values for ```index```, ```isLast```, and ```cancelRequest``` are irrelevant here. +```0_true_true```: This stream should be canceled. Note that the values for ```index```, and ```isLast``` are ignored here. [see cancellation support for more details on cancellation scenarios](#cancellation-support) @@ -223,7 +221,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable ``` -where the user gets the correlationId from the ```CommandRequestMetadata``` they provide to the command invoker or the ```CommandResponseMetadata``` that the executor gives them upon receiving a streaming command. +where the user gets the correlationId from the ```CommandRequestMetadata``` they provide to the command invoker when invoking a command or the ```CommandResponseMetadata``` that the executor gives them upon receiving a streaming command. #### Invoker side @@ -237,7 +235,7 @@ where the user gets the correlationId from the ```CommandRequestMetadata``` they As detailed below, the executor may also cancel the stream at any time. In response to receiving a cancellation request from the executor, the invoker should send an MQTT message with: - The same topic as the command itself - The same correlation data as the command itself - - Streaming metadata with the ["stream successfully canceled" flag set](#streaming-user-property) + - The "Canceled" error code Any received MQTT messages pertaining to a command that was already canceled should still be acknowledged. They should not be given to the user, though. From 04f1ce9682bcbcf718742cd40999c61411a55580 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 1 Aug 2025 16:35:49 -0700 Subject: [PATCH 35/74] timeout thoughts --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index ab9aeb2ccc..cc8466349e 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -180,7 +180,7 @@ TODO Invoker side: - delivery timeout (assigned per request message in stream (extended streaming request assigned?)) - execution timeout (noted in header of each request (for redundancy in case first message is lost)) - - Client waits for ?????? Can't assume just execution timeout + - overall timeout (not sent over the wire, just the amount of time the invoker should wait from API call to final response before giving up) Executor side: - At constructor time, user assigns From 353a2fd1e385a0a496a6cae1d5f94c40c4b201a2 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 4 Aug 2025 10:32:21 -0700 Subject: [PATCH 36/74] asdf --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index cc8466349e..7c9e68c2aa 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -188,7 +188,7 @@ Invoker side: #### Alternative timeout designs considered -- Include in the initial request user properties a total number of milliseconds that the command response can take to be delivered. +- Include in the initial request user properties the number of milliseconds allowed between receiving the final command request and delivering the final command response. - This is the approach that gRPC takes, but... - It doesn't account well for delays in message delivery from broker. - It doesn't account for scenarios where the invoker/executor dies unexpectedly (since gRPC relies on a direct connection between invoker and executor) From e462a3f8a5f1b5ed6a470c22bcaf0b9c9ddba8cb Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 4 Aug 2025 10:46:08 -0700 Subject: [PATCH 37/74] reword --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 7c9e68c2aa..7629b266ba 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -188,7 +188,7 @@ Invoker side: #### Alternative timeout designs considered -- Include in the initial request user properties the number of milliseconds allowed between receiving the final command request and delivering the final command response. +- Specify the number of milliseconds allowed between the executor receiving the final command request and delivering the final command response. - This is the approach that gRPC takes, but... - It doesn't account well for delays in message delivery from broker. - It doesn't account for scenarios where the invoker/executor dies unexpectedly (since gRPC relies on a direct connection between invoker and executor) From 4c4cb0b39db63de3d60be8ca995231c5e5c6fc47 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 4 Aug 2025 11:23:11 -0700 Subject: [PATCH 38/74] API fix --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 7629b266ba..557e9cf706 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -84,7 +84,7 @@ public abstract class StreamingCommandInvoker where TResp : class { // Many requests, many responses. - public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, ...) {...} + public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable requests, ...) {...} } ``` From 5050adaaef0a50231b55f22896ee1f7ae8dc0c6d Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 4 Aug 2025 11:25:56 -0700 Subject: [PATCH 39/74] not needed --- doc/dev/adr/0025-rpc-streaming.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 557e9cf706..c322ab870c 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -50,11 +50,6 @@ public class StreamingExtendedRequest : ExtendedRequest /// The index of this request relative to the other requests in this request stream. Starts at 0. /// public int StreamingRequestIndex { get; set; } - - /// - /// If true, this request is the final request in this request stream. - /// - public bool IsLastRequest { get; set; } } public class StreamingExtendedResponse : ExtendedResponse @@ -64,11 +59,6 @@ public class StreamingExtendedResponse : ExtendedResponse /// The index of this response relative to the other responses in this response stream. Starts at 0. /// public int StreamingResponseIndex { get; set; } - - /// - /// If true, this response is the final response in this response stream. - /// - public bool IsLastResponse { get; set; } } ``` From 9caa59dfe535709d734c9c38cc21566a89c03e58 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 4 Aug 2025 11:42:08 -0700 Subject: [PATCH 40/74] non-req --- doc/dev/adr/0025-rpc-streaming.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index c322ab870c..ec6c76f40f 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -23,6 +23,7 @@ Users have expressed a desire to allow more than one request and/or more than on - The API of the receiving side of a stream will provide the user the streamed requests/responses in their **intended** order rather than their **received** order - If the stream's Nth message is lost due to message expiry (or other circumstances), our API should still notify the user when the N+1th stream message is received - This may be added as a feature later if requested by customers + - Allow for users to send command responses before the request stream has finished ## State of the art From 58d8fc1c0981e83da65371f0cab149f3591a70c6 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 5 Aug 2025 15:30:53 -0700 Subject: [PATCH 41/74] fix type --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index ec6c76f40f..1b5730d68d 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -94,7 +94,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, CancellationToken, Task>>> OnStreamingCommandReceived { get; set; } + public required Func>, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } } ``` From fead2ade61475bd41144602bba72e8ca26c58da0 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 6 Aug 2025 12:01:58 -0700 Subject: [PATCH 42/74] timeout musings --- doc/dev/adr/0025-rpc-streaming.md | 56 +++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 1b5730d68d..d62715a830 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -107,24 +107,30 @@ With this design, commands that use streaming are defined at codegen time. Codeg To convey streaming context in a request/response stream, we will put this information in the "__stream" MQTT user property with a value that looks like: -```__``` +```___``` with data types -```__``` +```___``` + +where the field ```_``` is only present in request stream messages examples: -```0_false_false```: The first (and not last) message in a stream +```0_false_false_10000```: The first (and not last) message in a request stream where the RPC should timeout beyond 10 seconds + +```3_true_false```: The third and final message in a response stream -```3_true_false```: The third and final message in a stream +```0_true_false_1000```: The first and final message in a request stream where the RPC should timeout beyond 1 second -```0_true_false```: The first and final message in a stream +```0_true_true_0```: This request stream has been canceled. Note that the values for ```index```, ```isLast```, and `````` are ignored here. -```0_true_true```: This stream should be canceled. Note that the values for ```index```, and ```isLast``` are ignored here. +```0_true_true```: This response stream has been canceled. Note that the values for ```index``` and ```isLast``` are ignored here. [see cancellation support for more details on cancellation scenarios](#cancellation-support) +[see timeout support for more details on timeout scenarios](#timeout-support) + #### Invoker side The streaming command invoker will first subscribe to the appropriate response topic prior to sending any requests @@ -166,25 +172,41 @@ We need to provide timeout support for our streaming APIs to avoid scenarios suc #### Decision -TODO +We will offer two layers of timeout configurations. + - Delivery timeout per message in the stream + - Overall timeout for the RPC as a whole. + +##### Delivery timeout + +For the delivery timeout per message, the streaming command invoker and streaming command executor will assign the user-provided timeout as the message expiry interval in the associated MQTT PUBLISH packet. This allows the broker to discard the message if it wasn't delivered in time. Unlike normal RPC, though, the receiving end (invoker or executor) does not care about the message expiry interval. + +If the user specifies a delivery timeout of 0, the PUBLISH packet should not include a message expiry interval. + +##### RPC timeout + +For the overall RPC timeout, each message in the request stream will include a value in the `````` portion of the ```__stream``` user property. This header should be sent in all request stream messages in case the first N request messages are lost due to timeout or otherwise. + +The invoker side will start a countdown from this value after receiving the first PUBACK that ends with throwing a timeout exception to the user if the final stream response has not been received yet. The invoker should not send any further messages + +The executor side will start a countdown from this value after receiving the first PUBLISH in the request stream. At the end of the countdown, if the executor has not sent the final response in the response stream, the executor should return the ```timeout``` error code back to the invoker. -Invoker side: - - delivery timeout (assigned per request message in stream (extended streaming request assigned?)) - - execution timeout (noted in header of each request (for redundancy in case first message is lost)) - - overall timeout (not sent over the wire, just the amount of time the invoker should wait from API call to final response before giving up) +Any request stream or response stream messages that are received by the executor/invoker after they have ended the timeout countdown should be acknowledged but otherwise ignored. This will require both parties to track correlationIds for timed out streams for a period of time beyond the expected end of the RPC so that any straggler messages are not treated as initiating a new stream. - Executor side: - - At constructor time, user assigns +An RPC timeout value of 0 will be treated as infinite timeout. +This design does make the invoker start the countdown sooner than the executor, but the time difference is negligible in most circumstances. #### Alternative timeout designs considered +- The above approach, but trying to calculate time spent on broker side (using message expiry interval) so that invoker and executor timeout at the same exact time + - This would require additional metadata in the ```__stream``` user property (intended vs received message expiry interval) and is only helpful + in the uncommon scenario where a message spends extended periods of time at the broker - Specify the number of milliseconds allowed between the executor receiving the final command request and delivering the final command response. - This is the approach that gRPC takes, but... - - It doesn't account well for delays in message delivery from broker. - It doesn't account for scenarios where the invoker/executor dies unexpectedly (since gRPC relies on a direct connection between invoker and executor) -- Allow users to specify timeouts for delivery and a separate timeout for execution - - a bit complex on API surface. Also subtly different enough from how our normal RPC does timeouts that it would cause confusion +- Use the message expiry interval of the first received message in a stream to indicate the RPC level timeout + - Misuses the message expiry interval's purpose and could lead to broker storing messages for extended periods of time unintentionally + - The first message sent may not be the first message received ### Cancellation support @@ -250,7 +272,7 @@ Any received MQTT messages pertaining to a command that was already canceled sho ### Protocol versioning -By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming specifically. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. +By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. ## Alternative designs considered From 7c83f0621b1df324954bfe330f03939f1167cb10 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Thu, 7 Aug 2025 10:01:20 -0700 Subject: [PATCH 43/74] wording --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index d62715a830..f17e20601f 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -188,7 +188,7 @@ For the overall RPC timeout, each message in the request stream will include a v The invoker side will start a countdown from this value after receiving the first PUBACK that ends with throwing a timeout exception to the user if the final stream response has not been received yet. The invoker should not send any further messages -The executor side will start a countdown from this value after receiving the first PUBLISH in the request stream. At the end of the countdown, if the executor has not sent the final response in the response stream, the executor should return the ```timeout``` error code back to the invoker. +The executor side will start a countdown from this value after receiving the first PUBLISH in the request stream. At the end of the countdown, if the executor has not sent the final response in the response stream, the executor should return the ```timeout``` error code back to the invoker. The executor should also notify the user callback to stop. Any request stream or response stream messages that are received by the executor/invoker after they have ended the timeout countdown should be acknowledged but otherwise ignored. This will require both parties to track correlationIds for timed out streams for a period of time beyond the expected end of the RPC so that any straggler messages are not treated as initiating a new stream. From fb9de69bb9be19255925631b47bf4bba13846395 Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Thu, 7 Aug 2025 13:51:50 -0700 Subject: [PATCH 44/74] Update 0025-rpc-streaming.md --- doc/dev/adr/0025-rpc-streaming.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index f17e20601f..34d973d942 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -2,11 +2,7 @@ ## Context -Users have expressed a desire to allow more than one request and/or more than one response per RPC invocation. This would enable scenarios like: - -- Execute long-running commands while still being responsive -- Allow users to report status over time for a long-running command -- Invoking a command where the executor wants a series of data points that would be impractical to put in one message +Users have expressed a desire to allow more than one request and/or more than one response per RPC invocation. ## Requirements From 29e18115b25febec451fb2868d982b89437f52a7 Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Thu, 7 Aug 2025 15:24:59 -0700 Subject: [PATCH 45/74] Update 0025-rpc-streaming.md --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 34d973d942..ed1f4472fc 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -246,7 +246,7 @@ As detailed below, the executor may also cancel the stream at any time. In respo - The same correlation data as the command itself - The "Canceled" error code -Any received MQTT messages pertaining to a command that was already canceled should still be acknowledged. They should not be given to the user, though. +After receiving an acknowledgement from the executor side that the stream has been canceled, any further received messages should be acknowledged but not given to the user. #### Executor side From c15790c9f9d9849a102510881b09197a55d510fe Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Thu, 7 Aug 2025 15:37:23 -0700 Subject: [PATCH 46/74] Update 0025-rpc-streaming.md semicolon separator --- doc/dev/adr/0025-rpc-streaming.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index ed1f4472fc..95b13e840d 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -103,25 +103,25 @@ With this design, commands that use streaming are defined at codegen time. Codeg To convey streaming context in a request/response stream, we will put this information in the "__stream" MQTT user property with a value that looks like: -```___``` +```:::``` with data types -```___``` +```:::``` -where the field ```_``` is only present in request stream messages +where the field ```:``` is only present in request stream messages examples: -```0_false_false_10000```: The first (and not last) message in a request stream where the RPC should timeout beyond 10 seconds +```0:false:false:10000```: The first (and not last) message in a request stream where the RPC should timeout beyond 10 seconds -```3_true_false```: The third and final message in a response stream +```3:true:false```: The third and final message in a response stream -```0_true_false_1000```: The first and final message in a request stream where the RPC should timeout beyond 1 second +```0:true:false:1000```: The first and final message in a request stream where the RPC should timeout beyond 1 second -```0_true_true_0```: This request stream has been canceled. Note that the values for ```index```, ```isLast```, and `````` are ignored here. +```0:true:true:0```: This request stream has been canceled. Note that the values for ```index```, ```isLast```, and `````` are ignored here. -```0_true_true```: This response stream has been canceled. Note that the values for ```index``` and ```isLast``` are ignored here. +```0:true:true```: This response stream has been canceled. Note that the values for ```index``` and ```isLast``` are ignored here. [see cancellation support for more details on cancellation scenarios](#cancellation-support) From 98f8763b9e8177455adac577205154a7fdd2e17f Mon Sep 17 00:00:00 2001 From: Tim Taylor Date: Thu, 7 Aug 2025 15:38:28 -0700 Subject: [PATCH 47/74] Update 0025-rpc-streaming.md gRPC note --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 95b13e840d..b536c481c9 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -27,7 +27,7 @@ gRPC supports these patterns for RPC: - [Unary RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#unary-rpc) (1 request message, 1 response message) - [Server streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc) (1 request message, many response messages) - [Client streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#server-streaming-rpc) (many request messages, one response message) -- [Bi-directional streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc) (many request messages, many response messages) +- [Bi-directional streaming RPC](https://grpc.io/docs/what-is-grpc/core-concepts/#bidirectional-streaming-rpc) (many request messages, many response messages. Request and response stream may send concurrently and/or in any order) [gRPC also allows for either the client or server to cancel an RPC at any time](https://grpc.io/docs/what-is-grpc/core-concepts/#cancelling-an-rpc) From bf8c252e677a3b3b05e2a04d4ee51c383ab987fa Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 3 Sep 2025 14:27:45 -0700 Subject: [PATCH 48/74] Address feedback - Allow users to send responses before the end of the request stream - Add .NET API surface to support the review process - Re-vamp the extended request types to better handle automatic indexing - Split metadata into stream-specific vs message-specific - Remove per-message timeout in a stream --- doc/dev/adr/0025-rpc-streaming.md | 107 ++-- .../Streaming/ICancelableStreamContext.cs | 33 ++ .../Streaming/RequestStreamMetadata.cs | 45 ++ .../Streaming/ResponseStreamMetadata.cs | 23 + .../Streaming/StreamMessageMetadata.cs | 29 ++ .../Streaming/StreamingCommandExecutor.cs | 69 +++ .../Streaming/StreamingCommandInvoker.cs | 77 +++ .../Streaming/StreamingExtendedRequest.cs | 29 ++ .../Streaming/StreamingExtendedResponse.cs | 29 ++ .../StreamingIntegrationTests.cs | 456 ++++++++++++++++++ 10 files changed, 849 insertions(+), 48 deletions(-) create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/RequestStreamMetadata.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ResponseStreamMetadata.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs create mode 100644 dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index b536c481c9..675d2cf5a5 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -12,6 +12,10 @@ Users have expressed a desire to allow more than one request and/or more than on - When exposed to the user, each request and response includes an index of where it was in the stream - Allow for multiple separate commands to be streamed simultaneously - Allow for invoker and/or executor to cancel a streamed request and/or streamed response at any time + - Allow for invoker + executor to send their requests/responses at arbitrary* times + - For instance, executor may send 1 response upon receiving 1 request, or it may wait for the request stream to finish before sending the first response + - Alternatively, this allows the invoker to send a request upon receiving a response + - *The only limitations are that the invoker must initiate the RPC with a request and the executor must end the RPC with a response ## Non-requirements @@ -19,7 +23,6 @@ Users have expressed a desire to allow more than one request and/or more than on - The API of the receiving side of a stream will provide the user the streamed requests/responses in their **intended** order rather than their **received** order - If the stream's Nth message is lost due to message expiry (or other circumstances), our API should still notify the user when the N+1th stream message is received - This may be added as a feature later if requested by customers - - Allow for users to send command responses before the request stream has finished ## State of the art @@ -37,25 +40,35 @@ gRPC supports these patterns for RPC: While RPC streaming shares a lot of similarities to normal RPC, we will define a new communication pattern to handle this scenario with two corresponding base classes: ```StreamingCommandInvoker``` and ```StreamingCommandExecutor```. -These new base classes will use extended versions of the ```ExtendedRequest``` and ```ExtendedResponse``` classes to include the streaming-specific information about each request and response: +These new base classes will use similar versions of the ```ExtendedRequest``` and ```ExtendedResponse``` RPC classes to include the streaming-specific information about each request and response: ```csharp -public class StreamingExtendedRequest : ExtendedRequest - where TResp : class +public class StreamingExtendedRequest + where TReq : class { /// - /// The index of this request relative to the other requests in this request stream. Starts at 0. + /// The request payload + /// + public TReq Request { get; set; } + + /// + /// The metadata specific to this message in the stream /// - public int StreamingRequestIndex { get; set; } + public StreamMessageMetadata Metadata { get; set; } } -public class StreamingExtendedResponse : ExtendedResponse - where TResp : class +public class StreamingExtendedResponse + where TResp : class { /// - /// The index of this response relative to the other responses in this response stream. Starts at 0. + /// The response payload + /// + public TResp Response { get; set; } + + /// + /// The metadata specific to this message in the stream /// - public int StreamingResponseIndex { get; set; } + public StreamMessageMetadata Metadata { get; set; } } ``` @@ -71,7 +84,7 @@ public abstract class StreamingCommandInvoker where TResp : class { // Many requests, many responses. - public IAsyncEnumerable> InvokeStreamingCommandAsync(IAsyncEnumerable requests, ...) {...} + public async Task> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, StreamRequestMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) {...} } ``` @@ -90,7 +103,8 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func, StreamRequestMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + } ``` @@ -109,9 +123,9 @@ with data types ```:::``` -where the field ```:``` is only present in request stream messages +where the field ```:``` is only present in request stream messages and may be omitted if the RPC has no timeout. -examples: +For example: ```0:false:false:10000```: The first (and not last) message in a request stream where the RPC should timeout beyond 10 seconds @@ -138,16 +152,16 @@ Once the user invokes a streaming command, the streaming command invoker will se - The serialized payload as provided by the user's request object - Any user-definied metadata as specified in the ```ExtendedRequest``` -Once the stream of requests has finished sending, the streaming command invoker should expect the stream of responses to arrive on the provided response topic with the provided correlation data and the streaming user property. +Once the stream of requests has started sending, the streaming command invoker should expect the stream of responses to arrive on the provided response topic with the provided correlation data and the streaming user property. -The command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command +The command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command. #### Executor side A streaming command executor should start by subscribing to the expected command topic - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. -Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the final message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: +Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the first message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: - The same correlation data as the original request - The topic as specified by the original request's response topic field - The appropriate streaming metadata [see above](#streaming-user-property) @@ -164,31 +178,20 @@ We need to provide timeout support for our streaming APIs to avoid scenarios suc - The invoker side is stuck waiting for the final response in a stream because it was lost or the executor side crashed before sending it. - The executor side is stuck waiting for the final request in a stream because it was lost or the invoker side crashed before sending it. -- The broker delivers a request/response stream message that is "no longer relevant" #### Decision -We will offer two layers of timeout configurations. - - Delivery timeout per message in the stream - - Overall timeout for the RPC as a whole. - -##### Delivery timeout - -For the delivery timeout per message, the streaming command invoker and streaming command executor will assign the user-provided timeout as the message expiry interval in the associated MQTT PUBLISH packet. This allows the broker to discard the message if it wasn't delivered in time. Unlike normal RPC, though, the receiving end (invoker or executor) does not care about the message expiry interval. +We will allow configuration on the invoker's side of a timeout for the RPC as a whole. -If the user specifies a delivery timeout of 0, the PUBLISH packet should not include a message expiry interval. +To enable this, each message in the request stream will include a value in the `````` portion of the ```__stream``` user property. This header should be sent in all request stream messages in case the first N request messages are lost due to timeout or otherwise. -##### RPC timeout - -For the overall RPC timeout, each message in the request stream will include a value in the `````` portion of the ```__stream``` user property. This header should be sent in all request stream messages in case the first N request messages are lost due to timeout or otherwise. - -The invoker side will start a countdown from this value after receiving the first PUBACK that ends with throwing a timeout exception to the user if the final stream response has not been received yet. The invoker should not send any further messages +The invoker side will start a countdown from this value after receiving the first PUBACK that ends with throwing a timeout exception to the user if the final stream response has not been received yet. The invoker should not send any further messages beyond this timeout. The executor side will start a countdown from this value after receiving the first PUBLISH in the request stream. At the end of the countdown, if the executor has not sent the final response in the response stream, the executor should return the ```timeout``` error code back to the invoker. The executor should also notify the user callback to stop. -Any request stream or response stream messages that are received by the executor/invoker after they have ended the timeout countdown should be acknowledged but otherwise ignored. This will require both parties to track correlationIds for timed out streams for a period of time beyond the expected end of the RPC so that any straggler messages are not treated as initiating a new stream. +Any request stream or response stream messages that are received by the executor/invoker after they have ended the timeout countdown should be acknowledged but otherwise ignored. This will require both parties to track correlationIds for timed out streams for a period of time beyond the expected end of the RPC so that any post-timeout messages are not treated as initiating a new stream. -An RPC timeout value of 0 will be treated as infinite timeout. +If the request stream omits the timeout value in the ```__stream``` user property, the invoker and executor should treat the RPC as not having a timeout. This design does make the invoker start the countdown sooner than the executor, but the time difference is negligible in most circumstances. @@ -202,7 +205,6 @@ This design does make the invoker start the countdown sooner than the executor, - It doesn't account for scenarios where the invoker/executor dies unexpectedly (since gRPC relies on a direct connection between invoker and executor) - Use the message expiry interval of the first received message in a stream to indicate the RPC level timeout - Misuses the message expiry interval's purpose and could lead to broker storing messages for extended periods of time unintentionally - - The first message sent may not be the first message received ### Cancellation support @@ -210,27 +212,36 @@ To avoid scenarios where long-running streaming requests/responses are no longer Since sending a cancellation request may fail (message expiry on broker side), the SDK API design should allow for the user to repeatedly call "cancel" and should return successfully once the other party has responded appropriately. -The proposed design for that would look like: +#### .NET API design -```csharp +The proposed cancellation support would come from the return type on the invoker side and the provided type on the executor side: -public abstract class StreamingCommandInvoker - where TReq : class - where TResp : class +```csharp +public interface ICancelableStreamContext + where T : class { - public async Task CancelStreamingCommandAsync(Guid correlationId) {...} -} + /// + /// The asynchronously readable entries in the stream. + /// + IAsyncEnumerable Entries { get; set; } -public abstract class StreamingCommandExecutor : IAsyncDisposable - where TReq : class - where TResp : class -{ - public async Task CancelStreamingCommandAsync(Guid correlationId) {...} + /// + /// Cancel this received RPC streaming request. + /// + /// Cancellation token for this cancellation request + /// + /// This method may be called by the streaming executor at any time. For instance, if the request stream + /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. + /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. + /// + Task CancelAsync(CancellationToken cancellationToken = default); +}oken cancellationToken = default); } - ``` -where the user gets the correlationId from the ```CommandRequestMetadata``` they provide to the command invoker when invoking a command or the ```CommandResponseMetadata``` that the executor gives them upon receiving a streaming command. +With this design, we can cancel a stream from either side at any time. For detailed examples, see the integration tests written [here](../../../dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs). + +### Protocol layer details #### Invoker side diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs new file mode 100644 index 0000000000..85a6a26a4e --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// A stream of requests or responses that can be canceled (with confirmation) at any time. + /// + /// The type of the payload of the request stream + public interface ICancelableStreamContext + where T : class + { + /// + /// The asynchronously readable entries in the stream + /// + IAsyncEnumerable Entries { get; set; } + + /// + /// Cancel this received RPC streaming request. + /// + /// Cancellation token for this cancellation request + /// + /// This method may be called by the streaming executor at any time. For instance, if the request stream + /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. + /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. + /// + Task CancelAsync(CancellationToken cancellationToken = default); + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/RequestStreamMetadata.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/RequestStreamMetadata.cs new file mode 100644 index 0000000000..32703b7185 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/RequestStreamMetadata.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using Azure.Iot.Operations.Protocol.Models; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// Metadata for a request stream as a whole. + /// + public class RequestStreamMetadata + { + /// + /// The correlationId for tracking this streaming request + /// + public Guid CorrelationId { get; set; } + + /// + /// The Id of the client that invoked this streaming request + /// + public string? InvokerClientId { get; set; } + + /// + /// The MQTT topic tokens used in this streaming request. + /// + public Dictionary TopicTokens { get; } = new(); + + /// + /// The partition associated with this streaming request. + /// + public string? Partition { get; } + + /// + /// The content type of all messages sent in this request stream. + /// + public string? ContentType { get; set; } + + /// + /// The payload format indicator for all messages sent in this request stream. + /// + public MqttPayloadFormatIndicator PayloadFormatIndicator { get; set; } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ResponseStreamMetadata.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ResponseStreamMetadata.cs new file mode 100644 index 0000000000..a88630ec25 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ResponseStreamMetadata.cs @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using Azure.Iot.Operations.Protocol.Models; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// Metadata for a response stream as a whole. + /// + public class ResponseStreamMetadata + { + /// + /// The content type of all messages in this response stream + /// + public string? ContentType { get; set; } + + /// + /// The payload format indicator for all messages in this response stream + /// + public MqttPayloadFormatIndicator PayloadFormatIndicator { get; set; } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs new file mode 100644 index 0000000000..b60c0c9cd8 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Collections.Generic; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// Metadata for a specific message within a request stream + /// + public class StreamMessageMetadata + { + /// + /// The timestamp attached to this particular message + /// + public HybridLogicalClock? Timestamp { get; internal set; } + + /// + /// User properties associated with this particular message + /// + public Dictionary UserData { get; } = new(); + + /// + /// The index of this message within the stream as a whole + /// + /// This value is automatically assigned when sending messages in a request/response stream and cannot be overriden. + public int Index { get; internal set; } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs new file mode 100644 index 0000000000..699aa6387f --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -0,0 +1,69 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +#pragma warning disable IDE0060 // Remove unused parameter +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + public abstract class StreamingCommandExecutor : IAsyncDisposable + where TReq : class + where TResp : class + { + /// + /// The timeout for all commands received by this executor. + /// + /// + /// Note that a command invoker may also send a per-invocation timeout. When this happens, a command will timeout if it exceeds either + /// of these timeout values. + /// + public TimeSpan ExecutionTimeout { get; set; } + + /// + /// A streaming command was invoked + /// + /// + /// The callback provides the stream of requests and requires the user to return one to many responses. + /// + public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + + public string? ExecutorId { get; init; } + + public string ServiceGroupId { get; init; } + + public string RequestTopicPattern { get; init; } + + public string? TopicNamespace { get; set; } + + /// + /// The topic token replacement map that this executor will use by default. Generally, this will include the token values + /// for topic tokens such as "executorId" which should be the same for the duration of this command executor's lifetime. + /// + /// + /// Tokens replacement values can also be specified when starting the executor by specifying the additionalTopicToken map in . + /// + public Dictionary TopicTokenMap { get; protected set; } + + public Task StartAsync(int? preferredDispatchConcurrency = null, CancellationToken cancellationToken = default) + { + throw new NotImplementedException(); + } + + public Task StopAsync(CancellationToken cancellationToken = default) + { + throw new NotImplementedException(); + } + + public ValueTask DisposeAsync() + { + GC.SuppressFinalize(this); + return ValueTask.CompletedTask; + } + } +} +#pragma warning restore IDE0060 // Remove unused parameter +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs new file mode 100644 index 0000000000..0e5c6a1e8f --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +#pragma warning disable IDE0060 // Remove unused parameter +#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + //TODO if we allow simultaneous request + response streaming, does it have to end with a response message? Does the final request have to happen prior to the final response message? + public abstract class StreamingCommandInvoker : IAsyncDisposable + where TReq : class + where TResp : class + { + /// + /// The topic token replacement map that this command invoker will use by default. Generally, this will include the token values + /// for topic tokens such as "modelId" which should be the same for the duration of this command invoker's lifetime. + /// + /// + /// Tokens replacement values can also be specified per-method invocation by specifying the additionalTopicToken map in . + /// + public Dictionary TopicTokenMap { get; protected set; } + + public string RequestTopicPattern { get; init; } + + public string? TopicNamespace { get; set; } + + /// + /// The prefix to use in the command response topic. This value is ignored if is set. + /// + /// + /// If no prefix or suffix is specified, and no value is provided in , then this + /// value will default to "clients/{invokerClientId}" for security purposes. + /// + /// If a prefix and/or suffix are provided, then the response topic will use the format: + /// {prefix}/{command request topic}/{suffix}. + /// + public string? ResponseTopicPrefix { get; set; } + + /// + /// The suffix to use in the command response topic. This value is ignored if is set. + /// + /// + /// If no suffix is specified, then the command response topic won't include a suffix. + /// + /// If a prefix and/or suffix are provided, then the response topic will use the format: + /// {prefix}/{command request topic}/{suffix}. + /// + public string? ResponseTopicSuffix { get; set; } + + /// + /// If provided, this topic pattern will be used for command response topic. + /// + /// + /// If not provided, and no value is provided for or , the default pattern used will be clients/{mqtt client id}/{request topic pattern}. + /// + public string? ResponseTopicPattern { get; set; } + + public Task>> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, RequestStreamMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + { + throw new NotImplementedException(); + } + + public ValueTask DisposeAsync() + { + GC.SuppressFinalize(this); + return ValueTask.CompletedTask; + } +#pragma warning restore IDE0060 // Remove unused parameter +#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. + + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs new file mode 100644 index 0000000000..b3079ad2bf --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// The payload and metadata associated with a single request in a request stream. + /// + /// The type of the payload of the request + public class StreamingExtendedRequest + where TReq : class + { + /// + /// The request payload + /// + public TReq Request { get; set; } + + /// + /// The metadata specific to this message in the stream + /// + public StreamMessageMetadata Metadata { get; set; } + + public StreamingExtendedRequest(TReq request, StreamMessageMetadata? metadata = null) + { + Request = request; + Metadata = metadata ?? new(); + } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs new file mode 100644 index 0000000000..108c95304b --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// The payload and metadata associated with a single response in a response stream. + /// + /// The type of the payload of the response + public class StreamingExtendedResponse + where TResp : class + { + /// + /// The response payload + /// + public TResp Response { get; set; } + + /// + /// The metadata specific to this message in the stream + /// + public StreamMessageMetadata Metadata { get; set; } + + public StreamingExtendedResponse(TResp response, StreamMessageMetadata? metadata = null) + { + Response = response; + Metadata = metadata ?? new(); + } + } +} diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs new file mode 100644 index 0000000000..f55014e9e1 --- /dev/null +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -0,0 +1,456 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Collections.Concurrent; +using System.Runtime.CompilerServices; +using Azure.Iot.Operations.Mqtt.Session; +using Azure.Iot.Operations.Protocol.Streaming; + +namespace Azure.Iot.Operations.Protocol.IntegrationTests +{ + public class StreamingIntegrationTests + { + // shared across all tests in this file, but each test should use a unique GUID as the correlationId of the stream + private readonly ConcurrentDictionary>> _receivedRequests = new(); + private readonly ConcurrentDictionary>> _sentResponses = new(); + +#pragma warning disable CS9113 // Parameter is unread. +#pragma warning disable IDE0060 // Remove unused parameter + internal class StringStreamingCommandInvoker(ApplicationContext applicationContext, IMqttPubSubClient mqttClient) + : StreamingCommandInvoker() + { } + + internal class EchoStringStreamingCommandExecutor : StreamingCommandExecutor + { + internal EchoStringStreamingCommandExecutor(ApplicationContext applicationContext, IMqttPubSubClient mqttClient, string commandName = "echo") +#pragma warning restore IDE0060 // Remove unused parameter + : base() +#pragma warning restore CS9113 // Parameter is unread. + + { + + } + } + + [Theory] + [InlineData(false, false)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(true, true)] + public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool multipleResponses) + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + int requestCount = multipleRequests ? 3 : 1; + int responseCount = multipleResponses ? 3 : 1; + + await using EchoStringStreamingCommandExecutor executor = multipleResponses + ? new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerMultipleResponses + } + : new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerSingleResponse + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(requestCount), requestMetadata); + + List> receivedResponses = new(); + await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) + { + receivedResponses.Add(response); + } + + List> expectedRequests = new(); + await foreach (var request in GetStringRequestStream(requestCount)) + { + expectedRequests.Add(request); + } + + if (!_receivedRequests.TryGetValue(requestMetadata.CorrelationId, out var receivedRequests)) + { + Assert.Fail("Executor did not receive any requests"); + } + + Assert.Equal(expectedRequests.Count, receivedRequests.Count); + for (int i = 0; i < expectedRequests.Count; i++) + { + Assert.Equal(expectedRequests[i].Request, receivedRequests[i].Request); + Assert.Equal(i, receivedRequests[i].Metadata!.Index); + } + + if (!_sentResponses.TryGetValue(requestMetadata.CorrelationId, out var sentResponses)) + { + Assert.Fail("Executor did not send any responses"); + } + + Assert.Equal(receivedResponses.Count, sentResponses.Count); + for (int i = 0; i < expectedRequests.Count; i++) + { + Assert.Equal(sentResponses[i].Response, receivedResponses[i].Response); + Assert.Equal(i, receivedResponses[i].Metadata!.Index); + } + } + + [Fact] + public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellation token trigger on executor side? Add to other tests as well + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerSingleResponse + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), requestMetadata); + + await stream.CancelAsync(); + } + + [Fact] + public async Task InvokerCanCancelWhileStreamingResponses() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerMultipleResponsesWithDelay + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), requestMetadata); + + await foreach (var response in responseStreamContext.Entries) + { + await responseStreamContext.CancelAsync(); + break; + } + } + + [Fact] + public async Task ExecutorCanCancelWhileStreamingRequests() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerWithCancellationWhileStreamingRequests + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), requestMetadata); + + bool receivedCancellation = false; + try + { + await foreach (var response in responseStreamContext.Entries) + { + // Executor should send cancellation request prior to sending any responses + } + } + catch (AkriMqttException ame) when (ame.Kind is AkriMqttErrorKind.Cancellation) + { + receivedCancellation = true; + } + + Assert.True(receivedCancellation); + } + + [Fact] + public async Task ExecutorCanCancelWhileStreamingResponses() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerWithCancellationWhileStreamingResponses + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), requestMetadata); + + bool receivedCancellation = false; + try + { + await foreach (var response in responseStreamContext.Entries) + { + // Read responses until the executor sends a cancellation request + } + } + catch (AkriMqttException ame) when (ame.Kind is AkriMqttErrorKind.Cancellation) + { + receivedCancellation = true; + } + + Assert.True(receivedCancellation); + } + + // Can configure the executor to send a response for each request and the invoker to only send the nth request after receiving the n-1th response + [Fact] + public async Task CanStreamRequestsAndResponsesSimultaneously() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = new(new(), executorMqttClient) + { + OnStreamingCommandReceived = ParallelHandlerEchoResponses + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + RequestStreamMetadata requestMetadata = new(); + TaskCompletionSource tcs1 = new(); // the delay to impose before sending the first request in the request stream + TaskCompletionSource tcs2 = new(); // the delay to impose before sending the second request in the request stream + TaskCompletionSource tcs3 = new(); // the delay to impose before sending the third request in the request stream + + tcs1.TrySetResult(); // Don't need to delay the first message + + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(tcs1, tcs2, tcs3), requestMetadata); + + List> receivedResponses = new(); + await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) + { + receivedResponses.Add(response); + + //TOOD metadata will never be null when received, but may be null when assigned + if (response.Metadata!.Index == 0) + { + // The first response has been received, so allow the second request to be sent + tcs2.TrySetResult(); + } + + if (response.Metadata!.Index == 1) + { + // The second response has been received, so allow the third request to be sent + tcs2.TrySetResult(); + } + } + + if (!_receivedRequests.TryGetValue(requestMetadata.CorrelationId, out var receivedRequests)) + { + Assert.Fail("Executor did not receive any requests"); + } + + // Executor should echo back each request as a response + Assert.Equal(receivedResponses.Count, receivedRequests.Count); + for (int i = 0; i < receivedResponses.Count; i++) + { + Assert.Equal(receivedResponses[i].Response, receivedRequests[i].Request); + } + } + + [Fact] + public Task CanAddUserPropertiesToSpecificToMessagesInRequestAndstreamContexts() + { + throw new NotImplementedException(); + } + + [Fact] + public Task CanCancelFromInvokerSideWithCancellationToken() + { + throw new NotImplementedException(); + } + + private async IAsyncEnumerable> GetStringRequestStream(int requestCount) + { + for (int i = 0; i < requestCount; i++) + { + await Task.Delay(TimeSpan.FromMicroseconds(1)); // Simulate asynchronous work + yield return new($"Message {i}"); + } + } + + // send N requests after each provided TCS is triggered. This allows for testing scenarios like "only send a request once a response has been received" + private async IAsyncEnumerable> GetStringRequestStreamWithDelay(params TaskCompletionSource[] delays) + { + int index = 0; + foreach (TaskCompletionSource delay in delays) + { + await delay.Task; // Simulate asynchronous work + yield return new($"Message {index++}"); + } + } + + private async IAsyncEnumerable> GetStringStreamContext(int responseCount) + { + for (int i = 0; i < responseCount; i++) + { + await Task.Delay(TimeSpan.FromMicroseconds(1)); // Simulate asynchronous work + yield return new($"Message {i}"); + } + } + + private async IAsyncEnumerable> GetStringRequestStreamWithDelay() + { + for (int i = 0; i <= 10; i++) + { + yield return new($"Message {i}"); + + await Task.Delay(TimeSpan.FromHours(1)); // Simulate asynchronous work that is stuck after the first request is sent + } + } + + private async IAsyncEnumerable> SerialHandlerSingleResponse(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + { + yield return response; + } + } + + private async IAsyncEnumerable> SerialHandlerMultipleResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + { + _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); + if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) + { + sentResponses.Add(response); + } + + yield return response; + } + } + + private async IAsyncEnumerable> ParallelHandlerEchoResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) + { + // doesn't overwrite if the correlationId already exists in the dictionary + _receivedRequests.TryAdd(streamMetadata.CorrelationId, new()); + + if (_receivedRequests.TryGetValue(streamMetadata.CorrelationId, out var requestsReceived)) + { + requestsReceived.Add(requestStreamEntry); + } + + yield return new(requestStreamEntry.Request); + } + } + + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + { + _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); + if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) + { + sentResponses.Add(response); + } + + yield return response; + + await Task.Delay(TimeSpan.FromHours(1), cancellationToken); + } + } + +#pragma warning disable IDE0060 // Remove unused parameter + private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) +#pragma warning restore IDE0060 // Remove unused parameter + { + CancellationTokenSource requestTimeoutCancellationTokenSource = new CancellationTokenSource(); + requestTimeoutCancellationTokenSource.CancelAfter(TimeSpan.FromSeconds(1)); + + var asyncEnumeratorWithCancellation = stream.Entries.WithCancellation(requestTimeoutCancellationTokenSource.Token).GetAsyncEnumerator(); + + bool readingRequestStream = true; + while (readingRequestStream) + { + StreamingExtendedRequest request; + try + { + readingRequestStream = await asyncEnumeratorWithCancellation.MoveNextAsync(); + request = asyncEnumeratorWithCancellation.Current; + } + catch (OperationCanceledException) + { + // simulates timing out while waiting on an entry in the stream and the executor deciding to cancel the stream as a result + await stream.CancelAsync(); + yield break; + } + + yield return new(request.Request); + } + } + + private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + + CancellationTokenSource cts = new(); + cts.CancelAfter(TimeSpan.FromSeconds(1)); + for (int responseCount = 0; responseCount < 5; responseCount++) + { + try + { + if (responseCount == 3) + { + // simulate one entry in the response stream taking too long and the executor deciding to cancel the stream because of it + await Task.Delay(TimeSpan.FromHours(1), cancellationToken); + } + } + catch (OperationCanceledException) + { + await stream.CancelAsync(); + yield break; + } + + yield return new StreamingExtendedResponse("some response"); + } + } + + private async Task SaveReceivedRequests(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, CancellationToken cancellationToken) + { + await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) + { + // doesn't overwrite if the correlationId already exists in the dictionary + _receivedRequests.TryAdd(streamMetadata.CorrelationId, new()); + + if (_receivedRequests.TryGetValue(streamMetadata.CorrelationId, out var requestsReceived)) + { + requestsReceived.Add(requestStreamEntry); + } + } + } + } +} From 8e6a6d8716d1dd83a786349cfc462029b508a224 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 3 Sep 2025 14:51:57 -0700 Subject: [PATCH 49/74] note --- doc/dev/adr/0025-rpc-streaming.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 675d2cf5a5..334eab50e5 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -160,6 +160,7 @@ The command invoker will acknowledge all messages it receives that match the cor A streaming command executor should start by subscribing to the expected command topic - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. + - The executor will use a shared subscription topic (exactly like how non-streaming RPC executors do) Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the first message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: - The same correlation data as the original request From 59afded8ec06a0328aefb9a9df44c35544e43675 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Thu, 4 Sep 2025 10:38:06 -0700 Subject: [PATCH 50/74] more --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 334eab50e5..909bfb3d03 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -160,7 +160,7 @@ The command invoker will acknowledge all messages it receives that match the cor A streaming command executor should start by subscribing to the expected command topic - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. - - The executor will use a shared subscription topic (exactly like how non-streaming RPC executors do) + - The executor will use a shared subscription topic (exactly like how non-streaming RPC executors do) so that each streaming request is received by only one executor Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the first message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: - The same correlation data as the original request From 69d778116835ad6abaa579bbe42c0132024e5564 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 15:35:29 -0700 Subject: [PATCH 51/74] More, complete streams at any time --- doc/dev/adr/0025-rpc-streaming.md | 28 +++-- ...ableStreamContext.cs => IStreamContext.cs} | 14 ++- .../Streaming/StreamingCommandExecutor.cs | 13 ++- .../Streaming/StreamingCommandInvoker.cs | 20 +++- .../StreamingIntegrationTests.cs | 110 +++++++++++++++--- 5 files changed, 152 insertions(+), 33 deletions(-) rename dotnet/src/Azure.Iot.Operations.Protocol/Streaming/{ICancelableStreamContext.cs => IStreamContext.cs} (55%) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 909bfb3d03..4d0570d7a2 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -15,7 +15,9 @@ Users have expressed a desire to allow more than one request and/or more than on - Allow for invoker + executor to send their requests/responses at arbitrary* times - For instance, executor may send 1 response upon receiving 1 request, or it may wait for the request stream to finish before sending the first response - Alternatively, this allows the invoker to send a request upon receiving a response - - *The only limitations are that the invoker must initiate the RPC with a request and the executor must end the RPC with a response + - *The only limitation is that the invoker must initiate the RPC streaming with a request + - Allow for invoker/executor to end their own request/response stream gracefully at any time + - For instance, if the executor doesn't know if a response will be the last one prior to sending it, the executor should still be capable of ending the response stream later without sending another fully-fledged payload ## Non-requirements @@ -84,7 +86,7 @@ public abstract class StreamingCommandInvoker where TResp : class { // Many requests, many responses. - public async Task> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, StreamRequestMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) {...} + public async Task> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, StreamRequestMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) {...} } ``` @@ -103,7 +105,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func, StreamRequestMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func, StreamRequestMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } } @@ -150,10 +152,16 @@ Once the user invokes a streaming command, the streaming command invoker will se - The same correlation data - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's request object - - Any user-definied metadata as specified in the ```ExtendedRequest``` + - Any user-definied metadata as specified in the ```ExtendedStreamingRequest``` Once the stream of requests has started sending, the streaming command invoker should expect the stream of responses to arrive on the provided response topic with the provided correlation data and the streaming user property. +Once the user-supplied stream of request messages has ended, the streaming command invoker should send one final message to the same topic/with the same correlation data with no payload and with the 'isLast' flag set in the '__stream' metadata bundle. + +Upon receiving an MQTT message in the response stream with the 'isLast' flag set in the '__stream' metadata, the streaming command invoker should notify the user that the stream of responses has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the response stream. + +If a streaming command invoker receives an MQTT message with the 'isLast' flag set but has not received any other messages in that response stream, the invoker should log an error, acknowledge the message, but otherwise ignore it. A stream of responses must have at least one entry. + The command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command. #### Executor side @@ -167,11 +175,15 @@ Upon receiving a MQTT message that contains a streaming request, the streaming e - The topic as specified by the original request's response topic field - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's response object - - Any user-definied metadata as specified in the ```ExtendedResponse``` + - Any user-definied metadata as specified in the ```ExtendedStreamingResponse``` + +Upon receiving an MQTT message in the request stream with the 'isLast' flag set in the '__stream' metadata, the streaming executor should notify the user that the stream of requests has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the request stream. + +If a streaming command executor receives an MQTT message with the 'isLast' flag set but has not received any other messages in that request stream, the executor should log an error, acknowledge the message, but otherwise ignore it. A stream of requests must have at least one entry. -Unlike normal RPC, the streaming command executor will not provide any cache support. This is because streams may grow indefinitely in length and size. +Unlike normal RPC, the stream command executor should acknowledge the MQTT message of a received stream request as soon as the user has been notified about it. We cannot defer acknowledging the stream request messages until after the full command has finished as streams may run indefinitely and we don't want to block other users of the MQTT client. -Also unlike normal RPC, the stream command executor should acknowledge the MQTT message of a received stream request as soon as the user has been notified about it. We cannot defer acknowledging the stream request messages until after the full command has finished as streams may run indefinitely and we don't want to block other users of the MQTT client. +Also unlike normal RPC, the streaming command executor will not provide any cache support. This is because streams may grow indefinitely in length and size. ### Timeout support @@ -218,7 +230,7 @@ Since sending a cancellation request may fail (message expiry on broker side), t The proposed cancellation support would come from the return type on the invoker side and the provided type on the executor side: ```csharp -public interface ICancelableStreamContext +public interface IStreamContext where T : class { /// diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs similarity index 55% rename from dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs rename to dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs index 85a6a26a4e..2f47d5d084 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ICancelableStreamContext.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs @@ -8,10 +8,10 @@ namespace Azure.Iot.Operations.Protocol.Streaming { /// - /// A stream of requests or responses that can be canceled (with confirmation) at any time. + /// A stream of requests or responses that can be gracefully ended or canceled (with confirmation) at any time. /// /// The type of the payload of the request stream - public interface ICancelableStreamContext + public interface IStreamContext where T : class { /// @@ -20,13 +20,17 @@ public interface ICancelableStreamContext IAsyncEnumerable Entries { get; set; } /// - /// Cancel this received RPC streaming request. + /// Cancel this RPC streaming call. /// /// Cancellation token for this cancellation request /// - /// This method may be called by the streaming executor at any time. For instance, if the request stream + /// When called by the invoker, the executor will be notified about this cancellation and the executor will attempt + /// to stop any user-defined handling of the streaming request. When called by the executor, the invoker will be notified + /// and will cease sending requests. + /// + /// This method may be called by the streaming invoker or executor at any time. For instance, if the request stream /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. - /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. + /// Additionally, the invoker can call this method if its response stream has stalled unexpectedly. /// Task CancelAsync(CancellationToken cancellationToken = default); } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 699aa6387f..89c0872338 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -29,7 +29,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } public string? ExecutorId { get; init; } @@ -50,18 +50,25 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable public Task StartAsync(int? preferredDispatchConcurrency = null, CancellationToken cancellationToken = default) { + // TODO: derive the expected request topic (like command executor does) + + // TODO: subscribe to the shared subscription prefixed request topic + throw new NotImplementedException(); } public Task StopAsync(CancellationToken cancellationToken = default) { + // TODO: Unsubscribe from the request topic derived in StartAsync + throw new NotImplementedException(); } - public ValueTask DisposeAsync() + public async ValueTask DisposeAsync() { + await StopAsync(); + GC.SuppressFinalize(this); - return ValueTask.CompletedTask; } } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs index 0e5c6a1e8f..bd87b50bc3 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -8,10 +8,10 @@ #pragma warning disable IDE0060 // Remove unused parameter #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +#pragma warning disable CS0168 // Variable is declared but never used namespace Azure.Iot.Operations.Protocol.Streaming { - //TODO if we allow simultaneous request + response streaming, does it have to end with a response message? Does the final request have to happen prior to the final response message? public abstract class StreamingCommandInvoker : IAsyncDisposable where TReq : class where TResp : class @@ -60,8 +60,23 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// public string? ResponseTopicPattern { get; set; } - public Task>> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, RequestStreamMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) + public async Task>> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, RequestStreamMetadata? streamMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? streamExchangeTimeout = default, CancellationToken cancellationToken = default) { + // TODO: Derive the request topic (like commandInvoker does) + + // TODO: Subscribe to the expected response topic + + // TODO: construct the IAsyncEnumerable of responses to capture the stream of responses prior to sending the first request. + IAsyncEnumerable> responses; + IStreamContext>> streamContext; + + await foreach (var streamMessage in requests) + { + // TODO: Construct and send an MQTT message to the executor. Attach properties from both streamMetadata and streamMessage.Metadata + } + + // TODO: Send the "end of stream" MQTT message now that all request messages have been sent + throw new NotImplementedException(); } @@ -72,6 +87,7 @@ public ValueTask DisposeAsync() } #pragma warning restore IDE0060 // Remove unused parameter #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider adding the 'required' modifier or declaring as nullable. +#pragma warning restore CS0168 // Variable is declared but never used } } diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index f55014e9e1..62ef98d722 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -115,8 +115,7 @@ public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellat await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - RequestStreamMetadata requestMetadata = new(); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), requestMetadata); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); await stream.CancelAsync(); } @@ -136,8 +135,7 @@ public async Task InvokerCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - RequestStreamMetadata requestMetadata = new(); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); await foreach (var response in responseStreamContext.Entries) { @@ -162,8 +160,7 @@ public async Task ExecutorCanCancelWhileStreamingRequests() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - RequestStreamMetadata requestMetadata = new(); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); bool receivedCancellation = false; try @@ -197,8 +194,7 @@ public async Task ExecutorCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - RequestStreamMetadata requestMetadata = new(); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); bool receivedCancellation = false; try @@ -285,6 +281,57 @@ public Task CanCancelFromInvokerSideWithCancellationToken() throw new NotImplementedException(); } + // In cases where the IAsyncEnumerable isn't sure if a given entry will be the last, users can "escape" by using the keyword + // "yield break" to signal the IAsyncEnumerable has ended without providing a fully-fledged final entry + [Fact] + public async Task InvokerCanCompleteRequestStreamWithYieldBreak() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerSingleResponse + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithYieldBreak()); + + await foreach (var response in stream.Entries) + { + // TODO verify expected responses + } + } + + // See 'InvokerCanCompleteRequestStreamWithYieldBreak' but on the executor side + [Fact] + public async Task ExecutorCanCompleteResponseStreamWithYieldBreak() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse + }; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3)); + + await foreach (var response in stream.Entries) + { + // TODO verify expected responses + } + } + private async IAsyncEnumerable> GetStringRequestStream(int requestCount) { for (int i = 0; i < requestCount; i++) @@ -324,7 +371,23 @@ private async IAsyncEnumerable> GetStringReques } } - private async IAsyncEnumerable> SerialHandlerSingleResponse(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + // Simulate a request stream that decides between entries to close gracefully + private static async IAsyncEnumerable> GetStringRequestStreamWithYieldBreak() + { + for (int i = 0; true; i++) + { + await Task.Delay(TimeSpan.FromMicroseconds(1)); // Simulate asynchronous work + + if (i > 5) + { + yield break; + } + + yield return new($"Message {i}"); + } + } + + private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, cancellationToken); @@ -334,7 +397,23 @@ private async IAsyncEnumerable> SerialHandlerS } } - private async IAsyncEnumerable> SerialHandlerMultipleResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + { + await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + { + _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); + if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) + { + sentResponses.Add(response); + } + + yield return response; + } + } + + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, cancellationToken); @@ -347,10 +426,11 @@ private async IAsyncEnumerable> SerialHandlerM } yield return response; + yield break; // Break after sending the first response } } - private async IAsyncEnumerable> ParallelHandlerEchoResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) { @@ -366,7 +446,7 @@ private async IAsyncEnumerable> ParallelHandle } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, cancellationToken); @@ -385,7 +465,7 @@ private async IAsyncEnumerable> SerialHandlerM } #pragma warning disable IDE0060 // Remove unused parameter - private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) #pragma warning restore IDE0060 // Remove unused parameter { CancellationTokenSource requestTimeoutCancellationTokenSource = new CancellationTokenSource(); @@ -413,7 +493,7 @@ private static async IAsyncEnumerable> SerialH } } - private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, cancellationToken); @@ -439,7 +519,7 @@ private async IAsyncEnumerable> SerialHandlerW } } - private async Task SaveReceivedRequests(ICancelableStreamContext> stream, RequestStreamMetadata streamMetadata, CancellationToken cancellationToken) + private async Task SaveReceivedRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, CancellationToken cancellationToken) { await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) { From 4a5b22ddcbb5a2035fb4d3fd141424722d05bdac Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 16:30:42 -0700 Subject: [PATCH 52/74] ExecutorId is mandatory --- doc/dev/adr/0025-rpc-streaming.md | 7 +++++-- .../Streaming/StreamingCommandExecutor.cs | 2 +- .../Streaming/StreamingCommandInvoker.cs | 20 +++++++++++++++++-- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 4d0570d7a2..19c9b541e2 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -149,7 +149,9 @@ The streaming command invoker will first subscribe to the appropriate response t Once the user invokes a streaming command, the streaming command invoker will send one to many MQTT messages with: - The same response topic + - This response topic must be prefixed with 'clients/{mqtt client id of invoker}' like in vanilla RPC - The same correlation data + - A topic that includes an 'executorId' topic token (see vanilla RPC for details) - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's request object - Any user-definied metadata as specified in the ```ExtendedStreamingRequest``` @@ -168,7 +170,9 @@ The command invoker will acknowledge all messages it receives that match the cor A streaming command executor should start by subscribing to the expected command topic - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. - - The executor will use a shared subscription topic (exactly like how non-streaming RPC executors do) so that each streaming request is received by only one executor + - The expected command topic _must_ include the 'executorId' topic token and its value must be set equal to the client Id of the executor's MQTT client + - By including the executorId in the expected request topic, we can guarantee that all messages in a request stream are delivered to the same executor + - Because streaming executors always have distinct expected request topics, there is no need to use/configure shared subscriptions Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the first message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: - The same correlation data as the original request @@ -248,7 +252,6 @@ public interface IStreamContext /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. /// Task CancelAsync(CancellationToken cancellationToken = default); -}oken cancellationToken = default); } ``` diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 89c0872338..e6c60dd11d 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -31,7 +31,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } - public string? ExecutorId { get; init; } + public string ExecutorId { get; init; } // Must equal the client Id of the MQTT client running this executor. Unlike in vanilla RPC, this is not optional. public string ServiceGroupId { get; init; } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs index bd87b50bc3..bb5a7974c7 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -17,7 +17,7 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable where TResp : class { /// - /// The topic token replacement map that this command invoker will use by default. Generally, this will include the token values + /// The topic token replacement map that this streaming command invoker will use by default. Generally, this will include the token values /// for topic tokens such as "modelId" which should be the same for the duration of this command invoker's lifetime. /// /// @@ -60,7 +60,23 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// public string? ResponseTopicPattern { get; set; } - public async Task>> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, RequestStreamMetadata? streamMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? streamExchangeTimeout = default, CancellationToken cancellationToken = default) + /// + /// Invoke a streaming command on a particular streaming command executor + /// + /// The stream of requests to send. This stream must contain at least one request. + /// The Id of the executor to send this request to. + /// The metadata for the request stream as a whole. + /// Topic tokens to substitute in the request topic. + /// The timeout between the beginning of the request stream and the end of both the request and response stream. + /// Cancellation token. Signalling this will also make a single attempt to notify the executor of the cancellation. + /// The stream of responses. + public async Task>> InvokeStreamingCommandAsync( + IAsyncEnumerable> requests, + string executorId, + RequestStreamMetadata? streamMetadata = null, + Dictionary? additionalTopicTokenMap = null, + TimeSpan? streamExchangeTimeout = default, + CancellationToken cancellationToken = default) { // TODO: Derive the request topic (like commandInvoker does) From 91f55f37ecc9aafcb9c461b5f7597d476695d2dd Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 17:01:30 -0700 Subject: [PATCH 53/74] fix .NET APIs --- doc/dev/adr/0025-rpc-streaming.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 19c9b541e2..ebfc130a2f 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -76,9 +76,8 @@ public class StreamingExtendedResponse #### Invoker side -The new ```StreamingCommandInvoker``` will largely look like the existing ```CommandInvoker```, but will instead have an API for ```InvokeCommandWithStreaming```. - -This new method will take the same parameters as ```InvokeCommand``` but will accept a stream of requests and return a stream of command responses. +The new API will ask users to provide a stream of request payloads + metadata, the target streaming command executor, any stream-level metadata and timeout/cancellation tokens. It will return the +stream of responses as well as a cancellation function that allows the user to terminate the stream exchange at any time. ```csharp public abstract class StreamingCommandInvoker @@ -86,7 +85,13 @@ public abstract class StreamingCommandInvoker where TResp : class { // Many requests, many responses. - public async Task> InvokeStreamingCommandAsync(IAsyncEnumerable> requests, StreamRequestMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, CancellationToken cancellationToken = default) {...} + public async Task> InvokeStreamingCommandAsync( + IAsyncEnumerable> requests, + string executorId, + StreamRequestMetadata? streamRequestMetadata = null, + Dictionary? additionalTopicTokenMap = null, + TimeSpan? commandTimeout = default, + CancellationToken cancellationToken = default) {...} } ``` @@ -106,7 +111,6 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// The callback provides the stream of requests and requires the user to return one to many responses. /// public required Func, StreamRequestMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } - } ``` From 0c7fd4a9f97e332970db9e411018e1f94b854ae5 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 17:15:53 -0700 Subject: [PATCH 54/74] disconnection considerations --- doc/dev/adr/0025-rpc-streaming.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index ebfc130a2f..feb1628d0b 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -297,6 +297,24 @@ The command invoker should then send a message on the same command topic with th Any received MQTT messages pertaining to a command that was already canceled should still be acknowledged. They should not be given to the user, though. +### Disconnection scenario considerations + +- Invoker side disconnects unexpectedly while sending requests + - Upon reconnection, the request messages queued in the session client should send as expected + - If no reconnection, the streaming RPC will timeout +- Invoker side disconnects unexpectedly while receiving responses + - The broker should hold all published responses for as long as the invoker's session lives and send them upon reconnection + - If the invoker's session is lost, then the RPC will timeout +- Executor side isn't connected when invoker sends first request + - Invoker will receive a "no matching subscribers" puback + - Seems like a scenario we would want to retry? +- Executor side disconnects unexpectedly while receiving requests + - Broker should hold all published requests for as long as the executor's session lives and send them upon reconnection + - If the executor's session is lost, the RPC will timeout +- Executor side disconnects unexpectedly while sending responses + - Upon reconnection, the response messages queued in the session client should send as expected + - If no reconnection, the streaming RPC will timeout + ### Protocol versioning By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. From 66556a515004e15163abeced03323402f7b2b2e5 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 17:18:40 -0700 Subject: [PATCH 55/74] De-duping? --- doc/dev/adr/0025-rpc-streaming.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index feb1628d0b..768cd99e68 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -314,7 +314,11 @@ Any received MQTT messages pertaining to a command that was already canceled sho - Executor side disconnects unexpectedly while sending responses - Upon reconnection, the response messages queued in the session client should send as expected - If no reconnection, the streaming RPC will timeout - +- Invoker disconnects causing one message in request stream to be sent twice (due to QoS 1) + - Executor side can de-dup the request by checking the stream index of this request vs the stream index of the last message it gave to the user +- Executor disconnects causing one message in response stream to be sent twice (due to QoS 1) + - Invoker side can de-dup the request by checking the stream index of this request vs the stream index of the last message it gave to the user + ### Protocol versioning By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. From 6b15e197286af1db493ab9817d8c9d9892f91a5c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 5 Sep 2025 17:23:40 -0700 Subject: [PATCH 56/74] Revert "De-duping?" This reverts commit 66556a515004e15163abeced03323402f7b2b2e5. --- doc/dev/adr/0025-rpc-streaming.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 768cd99e68..feb1628d0b 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -314,11 +314,7 @@ Any received MQTT messages pertaining to a command that was already canceled sho - Executor side disconnects unexpectedly while sending responses - Upon reconnection, the response messages queued in the session client should send as expected - If no reconnection, the streaming RPC will timeout -- Invoker disconnects causing one message in request stream to be sent twice (due to QoS 1) - - Executor side can de-dup the request by checking the stream index of this request vs the stream index of the last message it gave to the user -- Executor disconnects causing one message in response stream to be sent twice (due to QoS 1) - - Invoker side can de-dup the request by checking the stream index of this request vs the stream index of the last message it gave to the user - + ### Protocol versioning By maintaining RPC streaming as a separate communication pattern from normal RPC, we will need to introduce an independent protocol version for RPC streaming. It will start at ```1.0``` and should follow the same protocol versioning rules as the protocol versions used by telemetry and normal RPC. From bddf3f9a33763610af31274838e91d6f2ac56ea9 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 8 Sep 2025 10:53:15 -0700 Subject: [PATCH 57/74] executorId in tests --- .../StreamingIntegrationTests.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index 62ef98d722..c487f638d6 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -60,7 +60,7 @@ public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); RequestStreamMetadata requestMetadata = new(); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(requestCount), requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(requestCount), executorMqttClient.ClientId!, requestMetadata); List> receivedResponses = new(); await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) @@ -115,7 +115,7 @@ public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellat await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), executorMqttClient.ClientId!); await stream.CancelAsync(); } @@ -135,7 +135,7 @@ public async Task InvokerCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), executorMqttClient.ClientId!); await foreach (var response in responseStreamContext.Entries) { @@ -160,7 +160,7 @@ public async Task ExecutorCanCancelWhileStreamingRequests() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), executorMqttClient.ClientId!); bool receivedCancellation = false; try @@ -194,7 +194,7 @@ public async Task ExecutorCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), executorMqttClient.ClientId!); bool receivedCancellation = false; try @@ -235,7 +235,7 @@ public async Task CanStreamRequestsAndResponsesSimultaneously() tcs1.TrySetResult(); // Don't need to delay the first message - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(tcs1, tcs2, tcs3), requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(tcs1, tcs2, tcs3), executorMqttClient.ClientId!, requestMetadata); List> receivedResponses = new(); await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) @@ -299,7 +299,7 @@ public async Task InvokerCanCompleteRequestStreamWithYieldBreak() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithYieldBreak()); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithYieldBreak(), executorMqttClient.ClientId!); await foreach (var response in stream.Entries) { @@ -324,7 +324,7 @@ public async Task ExecutorCanCompleteResponseStreamWithYieldBreak() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3)); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3), executorMqttClient.ClientId!); await foreach (var response in stream.Entries) { From c39c41adc8e0ecc8a9dc704342832e5da1dde381 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 8 Sep 2025 11:02:18 -0700 Subject: [PATCH 58/74] fix --- doc/dev/adr/0025-rpc-streaming.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index feb1628d0b..a5e4747426 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -84,7 +84,16 @@ public abstract class StreamingCommandInvoker where TReq : class where TResp : class { - // Many requests, many responses. + /// + /// Invoke a streaming command on a particular streaming command executor + /// + /// The stream of requests to send. This stream must contain at least one request. + /// The Id of the executor to send this request to. + /// The metadata for the request stream as a whole. + /// Topic tokens to substitute in the request topic. + /// The timeout between the beginning of the request stream and the end of both the request and response stream. + /// Cancellation token. Signalling this will also make a single attempt to notify the executor of the cancellation. + /// The stream of responses. public async Task> InvokeStreamingCommandAsync( IAsyncEnumerable> requests, string executorId, From abe8ae89aac00a9fd705e8d442f6382ebe80e00c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 10 Sep 2025 10:38:21 -0700 Subject: [PATCH 59/74] No more executor Id, just use $partition --- doc/dev/adr/0025-rpc-streaming.md | 9 +++------ .../Streaming/StreamingCommandExecutor.cs | 2 -- .../Streaming/StreamingCommandInvoker.cs | 2 -- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index a5e4747426..973fb179fb 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -88,7 +88,6 @@ public abstract class StreamingCommandInvoker /// Invoke a streaming command on a particular streaming command executor /// /// The stream of requests to send. This stream must contain at least one request. - /// The Id of the executor to send this request to. /// The metadata for the request stream as a whole. /// Topic tokens to substitute in the request topic. /// The timeout between the beginning of the request stream and the end of both the request and response stream. @@ -96,7 +95,6 @@ public abstract class StreamingCommandInvoker /// The stream of responses. public async Task> InvokeStreamingCommandAsync( IAsyncEnumerable> requests, - string executorId, StreamRequestMetadata? streamRequestMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? commandTimeout = default, @@ -164,7 +162,8 @@ Once the user invokes a streaming command, the streaming command invoker will se - The same response topic - This response topic must be prefixed with 'clients/{mqtt client id of invoker}' like in vanilla RPC - The same correlation data - - A topic that includes an 'executorId' topic token (see vanilla RPC for details) + - The user property "$partition" set to a value of the client Id of the MQTT client sending this invocation + - This ensures that the broker always routes the messages in the stream to the same executor - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's request object - Any user-definied metadata as specified in the ```ExtendedStreamingRequest``` @@ -183,9 +182,7 @@ The command invoker will acknowledge all messages it receives that match the cor A streaming command executor should start by subscribing to the expected command topic - Even though the streaming command classes are separate from the existing RPC classes, they should also offer the same features around topic string pre/suffixing, custom topic token support, etc. - - The expected command topic _must_ include the 'executorId' topic token and its value must be set equal to the client Id of the executor's MQTT client - - By including the executorId in the expected request topic, we can guarantee that all messages in a request stream are delivered to the same executor - - Because streaming executors always have distinct expected request topics, there is no need to use/configure shared subscriptions + - The executor should use a shared subscription so that, if there are multiple executors, only one of them receives each stream Upon receiving a MQTT message that contains a streaming request, the streaming executor should notify the application layer that the first message in a request stream was received. Once the executor has notified the user that the first message in a request stream was received, the user should be able to provide a stream of responses. Upon receiving each response in that stream from the user, the executor will send an MQTT message for each streamed response with: - The same correlation data as the original request diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index e6c60dd11d..9fe7980b90 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -31,8 +31,6 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } - public string ExecutorId { get; init; } // Must equal the client Id of the MQTT client running this executor. Unlike in vanilla RPC, this is not optional. - public string ServiceGroupId { get; init; } public string RequestTopicPattern { get; init; } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs index bb5a7974c7..b091ba8c53 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -64,7 +64,6 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// Invoke a streaming command on a particular streaming command executor /// /// The stream of requests to send. This stream must contain at least one request. - /// The Id of the executor to send this request to. /// The metadata for the request stream as a whole. /// Topic tokens to substitute in the request topic. /// The timeout between the beginning of the request stream and the end of both the request and response stream. @@ -72,7 +71,6 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// The stream of responses. public async Task>> InvokeStreamingCommandAsync( IAsyncEnumerable> requests, - string executorId, RequestStreamMetadata? streamMetadata = null, Dictionary? additionalTopicTokenMap = null, TimeSpan? streamExchangeTimeout = default, From e6cef757d3156fdb9d02c2eed2eb4a890bf8a230 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 10 Sep 2025 10:50:23 -0700 Subject: [PATCH 60/74] de-dup + qos 1 clarification --- doc/dev/adr/0025-rpc-streaming.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 973fb179fb..28f61a590f 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -167,6 +167,7 @@ Once the user invokes a streaming command, the streaming command invoker will se - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's request object - Any user-definied metadata as specified in the ```ExtendedStreamingRequest``` + - QoS 1 Once the stream of requests has started sending, the streaming command invoker should expect the stream of responses to arrive on the provided response topic with the provided correlation data and the streaming user property. @@ -176,7 +177,9 @@ Upon receiving an MQTT message in the response stream with the 'isLast' flag set If a streaming command invoker receives an MQTT message with the 'isLast' flag set but has not received any other messages in that response stream, the invoker should log an error, acknowledge the message, but otherwise ignore it. A stream of responses must have at least one entry. -The command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command. +The streaming command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command. + +The streaming command invoker will provide de-dupe caching of received responses to account for QoS 1 messages potentially being re-delivered. The streaming command invoker will de-dup using a the combination of the correlationId of the stream and the index of the message within that stream. The de-dup cache entries for a stream should be cleared once the stream has finished (gracefully or otherwise). #### Executor side @@ -190,6 +193,7 @@ Upon receiving a MQTT message that contains a streaming request, the streaming e - The appropriate streaming metadata [see above](#streaming-user-property) - The serialized payload as provided by the user's response object - Any user-definied metadata as specified in the ```ExtendedStreamingResponse``` + - QoS 1 Upon receiving an MQTT message in the request stream with the 'isLast' flag set in the '__stream' metadata, the streaming executor should notify the user that the stream of requests has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the request stream. @@ -197,7 +201,9 @@ If a streaming command executor receives an MQTT message with the 'isLast' flag Unlike normal RPC, the stream command executor should acknowledge the MQTT message of a received stream request as soon as the user has been notified about it. We cannot defer acknowledging the stream request messages until after the full command has finished as streams may run indefinitely and we don't want to block other users of the MQTT client. -Also unlike normal RPC, the streaming command executor will not provide any cache support. This is because streams may grow indefinitely in length and size. +Also unlike normal RPC, the streaming command executor will not provide any re-play cache support. This is because streams may grow indefinitely in length and size so re-playing a response stream isn't feasible. + +The streaming command executor will provide de-dupe caching of received requests to account for QoS 1 messages potentially being re-delivered. The streaming command invoker will de-dup using a the combination of the correlationId of the stream and the index of the message within that stream. The de-dup cache entries for a stream should be cleared once the stream has finished (gracefully or otherwise). ### Timeout support From f4929a28ff3b2878232560b73a8231440d457ffc Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 10 Sep 2025 11:32:01 -0700 Subject: [PATCH 61/74] fixup --- .../StreamingIntegrationTests.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index c487f638d6..cad90a36e0 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -60,7 +60,7 @@ public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); RequestStreamMetadata requestMetadata = new(); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(requestCount), executorMqttClient.ClientId!, requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(requestCount), requestMetadata); List> receivedResponses = new(); await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) @@ -135,7 +135,7 @@ public async Task InvokerCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), executorMqttClient.ClientId!); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); await foreach (var response in responseStreamContext.Entries) { @@ -160,7 +160,7 @@ public async Task ExecutorCanCancelWhileStreamingRequests() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), executorMqttClient.ClientId!); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); bool receivedCancellation = false; try @@ -194,7 +194,7 @@ public async Task ExecutorCanCancelWhileStreamingResponses() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1), executorMqttClient.ClientId!); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(1)); bool receivedCancellation = false; try @@ -235,7 +235,7 @@ public async Task CanStreamRequestsAndResponsesSimultaneously() tcs1.TrySetResult(); // Don't need to delay the first message - var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(tcs1, tcs2, tcs3), executorMqttClient.ClientId!, requestMetadata); + var responseStreamContext = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(tcs1, tcs2, tcs3), requestMetadata); List> receivedResponses = new(); await foreach (StreamingExtendedResponse response in responseStreamContext.Entries) @@ -299,7 +299,7 @@ public async Task InvokerCanCompleteRequestStreamWithYieldBreak() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithYieldBreak(), executorMqttClient.ClientId!); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithYieldBreak()); await foreach (var response in stream.Entries) { @@ -324,7 +324,7 @@ public async Task ExecutorCanCompleteResponseStreamWithYieldBreak() await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3), executorMqttClient.ClientId!); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3)); await foreach (var response in stream.Entries) { From 8322019289ea8534542625e7085eda0351b84262 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 10 Sep 2025 11:32:21 -0700 Subject: [PATCH 62/74] Optionally delay acknowledgements --- doc/dev/adr/0025-rpc-streaming.md | 4 +- .../ReceivedStreamingExtendedRequest.cs | 29 ++++++++ .../ReceivedStreamingExtendedResponse.cs | 28 +++++++ .../Streaming/StreamingCommandExecutor.cs | 16 +++- .../Streaming/StreamingCommandInvoker.cs | 16 +++- .../StreamingIntegrationTests.cs | 73 +++++++++++++++---- 6 files changed, 147 insertions(+), 19 deletions(-) create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedRequest.cs create mode 100644 dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedResponse.cs diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 28f61a590f..4a9ffcbb5a 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -177,7 +177,7 @@ Upon receiving an MQTT message in the response stream with the 'isLast' flag set If a streaming command invoker receives an MQTT message with the 'isLast' flag set but has not received any other messages in that response stream, the invoker should log an error, acknowledge the message, but otherwise ignore it. A stream of responses must have at least one entry. -The streaming command invoker will acknowledge all messages it receives that match the correlation data of a known streaming command. +By default, the streaming command invoker will acknowledge all request messages it receives as soon as they are given to the user. Users may opt into manual acknowledgements, though. Opting into manual acknowledgements allows the user time to "process" each response as necessary before forgoing re-delivery from the broker if the invoker crashes unexpectedly. The streaming command invoker will provide de-dupe caching of received responses to account for QoS 1 messages potentially being re-delivered. The streaming command invoker will de-dup using a the combination of the correlationId of the stream and the index of the message within that stream. The de-dup cache entries for a stream should be cleared once the stream has finished (gracefully or otherwise). @@ -199,7 +199,7 @@ Upon receiving an MQTT message in the request stream with the 'isLast' flag set If a streaming command executor receives an MQTT message with the 'isLast' flag set but has not received any other messages in that request stream, the executor should log an error, acknowledge the message, but otherwise ignore it. A stream of requests must have at least one entry. -Unlike normal RPC, the stream command executor should acknowledge the MQTT message of a received stream request as soon as the user has been notified about it. We cannot defer acknowledging the stream request messages until after the full command has finished as streams may run indefinitely and we don't want to block other users of the MQTT client. +By default, the streaming command executor will acknowledge all response messages it receives as soon as they are given to the user. Users may opt into manual acknowledgements, though. Opting into manual acknowledgements allows the user time to "process" each response as necessary before forgoing re-delivery from the broker if the executor crashes unexpectedly. Also unlike normal RPC, the streaming command executor will not provide any re-play cache support. This is because streams may grow indefinitely in length and size so re-playing a response stream isn't feasible. diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedRequest.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedRequest.cs new file mode 100644 index 0000000000..02886fcd40 --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedRequest.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System; +using System.Threading.Tasks; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// The payload and metadata associated with a single request in a request stream. + /// + /// The type of the payload of the request + public class ReceivedStreamingExtendedRequest : StreamingExtendedRequest + where TReq : class + { + private readonly Task _acknowledgementFunc; + + internal ReceivedStreamingExtendedRequest(TReq request, StreamMessageMetadata metadata, Task acknowledgementFunc) + : base(request, metadata) + { + _acknowledgementFunc = acknowledgementFunc; + } + + public async Task AcknowledgeAsync() + { + await _acknowledgementFunc; + } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedResponse.cs new file mode 100644 index 0000000000..8d7b4d834f --- /dev/null +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/ReceivedStreamingExtendedResponse.cs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +using System.Threading.Tasks; + +namespace Azure.Iot.Operations.Protocol.Streaming +{ + /// + /// The payload and metadata associated with a single response in a response stream. + /// + /// The type of the payload of the response + public class ReceivedStreamingExtendedResponse : StreamingExtendedResponse + where TResp : class + { + private readonly Task _acknowledgementFunc; + + internal ReceivedStreamingExtendedResponse(TResp response, StreamMessageMetadata metadata, Task acknowledgementFunc) + : base(response, metadata) + { + _acknowledgementFunc = acknowledgementFunc; + } + + public async Task AcknowledgeAsync() + { + await _acknowledgementFunc; + } + } +} diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 9fe7980b90..631b731d2c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -29,7 +29,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } public string ServiceGroupId { get; init; } @@ -46,6 +46,20 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// public Dictionary TopicTokenMap { get; protected set; } + /// + /// If true, this executor will acknowledge the MQTT message associated with each streaming request as soon as it arrives. + /// If false, the user must call once they are done processing + /// each request message. + /// + /// + /// Generally, delaying acknowledgement allows for re-delivery by the broker in cases where the executor crashes or restarts unexpectedly. + /// However, MQTT acknowledgements must be delivered in order, so delaying these acknowledgements may affect the flow of acknowledgements + /// being sent by other processes using this same MQTT client. Additionally, the MQTT broker has a limit on the number of un-acknowledged messages + /// that are allowed to be in-flight at a single moment, so delaying too many acknowledgements may halt all further MQTT traffic on the underlying + /// MQTT client. + /// + public bool AutomaticallyAcknowledgeRequests { get; set; } = true; + public Task StartAsync(int? preferredDispatchConcurrency = null, CancellationToken cancellationToken = default) { // TODO: derive the expected request topic (like command executor does) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs index b091ba8c53..15aaba2e6c 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -60,6 +60,20 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// public string? ResponseTopicPattern { get; set; } + /// + /// If true, this invoker will acknowledge the MQTT message associated with each streaming response as soon as it arrives. + /// If false, the user must call once they are done processing + /// each response message. + /// + /// + /// Generally, delaying acknowledgement allows for re-delivery by the broker in cases where the invoker crashes or restarts unexpectedly. + /// However, MQTT acknowledgements must be delivered in order, so delaying these acknowledgements may affect the flow of acknowledgements + /// being sent by other processes using this same MQTT client. Additionally, the MQTT broker has a limit on the number of un-acknowledged messages + /// that are allowed to be in-flight at a single moment, so delaying too many acknowledgements may halt all further MQTT traffic on the underlying + /// MQTT client. + /// + public bool AutomaticallyAcknowledgeResponses { get; set; } = true; + /// /// Invoke a streaming command on a particular streaming command executor /// @@ -69,7 +83,7 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// The timeout between the beginning of the request stream and the end of both the request and response stream. /// Cancellation token. Signalling this will also make a single attempt to notify the executor of the cancellation. /// The stream of responses. - public async Task>> InvokeStreamingCommandAsync( + public async Task>> InvokeStreamingCommandAsync( IAsyncEnumerable> requests, RequestStreamMetadata? streamMetadata = null, Dictionary? additionalTopicTokenMap = null, diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index cad90a36e0..d943ff10b5 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -115,7 +115,7 @@ public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellat await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); - var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay(), executorMqttClient.ClientId!); + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); await stream.CancelAsync(); } @@ -332,6 +332,34 @@ public async Task ExecutorCanCompleteResponseStreamWithYieldBreak() } } + [Fact] + public async Task InvokerAndExecutorCanDelayAcknowledgements() + { + await using MqttSessionClient invokerMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + await using MqttSessionClient executorMqttClient = await ClientFactory.CreateSessionClientFromEnvAsync(); + + await using EchoStringStreamingCommandExecutor executor = + new(new(), executorMqttClient) + { + OnStreamingCommandReceived = SerialHandlerSingleResponseManualAcks + }; + + executor.AutomaticallyAcknowledgeRequests = false; + + await executor.StartAsync(); + + await using StringStreamingCommandInvoker invoker = new(new(), invokerMqttClient); + + invoker.AutomaticallyAcknowledgeResponses = false; + + var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStream(3)); + + await foreach (var response in stream.Entries) + { + await response.AcknowledgeAsync(); + } + } + private async IAsyncEnumerable> GetStringRequestStream(int requestCount) { for (int i = 0; i < requestCount; i++) @@ -387,9 +415,9 @@ private static async IAsyncEnumerable> GetStrin } } - private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) { @@ -397,9 +425,9 @@ private async IAsyncEnumerable> SerialHandlerS } } - private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) { @@ -413,9 +441,9 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) { @@ -430,7 +458,7 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) { @@ -446,9 +474,9 @@ private async IAsyncEnumerable> ParallelHandle } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) { @@ -465,7 +493,7 @@ private async IAsyncEnumerable> SerialHandlerM } #pragma warning disable IDE0060 // Remove unused parameter - private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) #pragma warning restore IDE0060 // Remove unused parameter { CancellationTokenSource requestTimeoutCancellationTokenSource = new CancellationTokenSource(); @@ -493,9 +521,9 @@ private static async IAsyncEnumerable> SerialH } } - private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await SaveReceivedRequests(stream, streamMetadata, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); CancellationTokenSource cts = new(); cts.CancelAfter(TimeSpan.FromSeconds(1)); @@ -519,9 +547,19 @@ private async IAsyncEnumerable> SerialHandlerW } } - private async Task SaveReceivedRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponseManualAcks(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) { - await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) + await SaveReceivedRequests(stream, streamMetadata, true, cancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + { + yield return response; + } + } + + private async Task SaveReceivedRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, bool manualAcks, CancellationToken cancellationToken) + { + await foreach (ReceivedStreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) { // doesn't overwrite if the correlationId already exists in the dictionary _receivedRequests.TryAdd(streamMetadata.CorrelationId, new()); @@ -530,6 +568,11 @@ private async Task SaveReceivedRequests(IStreamContext Date: Wed, 10 Sep 2025 15:17:28 -0700 Subject: [PATCH 63/74] cancellation user properties so far --- .../Streaming/IStreamContext.cs | 7 ++++++- .../Streaming/StreamingCommandExecutor.cs | 2 +- .../StreamingIntegrationTests.cs | 17 +++++++++-------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs index 2f47d5d084..c4386b9278 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs @@ -22,6 +22,7 @@ public interface IStreamContext /// /// Cancel this RPC streaming call. /// + /// The optional user properties to include in this cancellation request. /// Cancellation token for this cancellation request /// /// When called by the invoker, the executor will be notified about this cancellation and the executor will attempt @@ -32,6 +33,10 @@ public interface IStreamContext /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. /// Additionally, the invoker can call this method if its response stream has stalled unexpectedly. /// - Task CancelAsync(CancellationToken cancellationToken = default); + Task CancelAsync(Dictionary? userData = null, CancellationToken cancellationToken = default); + + //TODO how to pass these user properties to the executor when invoker cancels? Triggering cancellation token isn't sufficient + + //TODO move cancellation token in here so that both invoker + executor can access it more seamlessly? Move func in here as well for same reason? } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 631b731d2c..566b488bd3 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -29,7 +29,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, RequestStreamMetadata, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func>, RequestStreamMetadata, Func>, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } public string ServiceGroupId { get; init; } diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index d943ff10b5..97ecad2d05 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -99,6 +99,7 @@ public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool } } + //TODO add user properties to these tests [Fact] public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellation token trigger on executor side? Add to other tests as well { @@ -415,7 +416,7 @@ private static async IAsyncEnumerable> GetStrin } } - private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); @@ -425,7 +426,7 @@ private async IAsyncEnumerable> SerialHandlerS } } - private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); @@ -441,7 +442,7 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); @@ -458,7 +459,7 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) { @@ -474,7 +475,7 @@ private async IAsyncEnumerable> ParallelHandle } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); @@ -493,7 +494,7 @@ private async IAsyncEnumerable> SerialHandlerM } #pragma warning disable IDE0060 // Remove unused parameter - private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) #pragma warning restore IDE0060 // Remove unused parameter { CancellationTokenSource requestTimeoutCancellationTokenSource = new CancellationTokenSource(); @@ -521,7 +522,7 @@ private static async IAsyncEnumerable> SerialH } } - private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); @@ -547,7 +548,7 @@ private async IAsyncEnumerable> SerialHandlerW } } - private async IAsyncEnumerable> SerialHandlerSingleResponseManualAcks(IStreamContext> stream, RequestStreamMetadata streamMetadata, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponseManualAcks(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) { await SaveReceivedRequests(stream, streamMetadata, true, cancellationToken); From e8d75e4e947eaf2dc0f55e6c8ee16ab594356d32 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 12 Sep 2025 16:38:18 -0700 Subject: [PATCH 64/74] more cancellation user properties support asdf --- .../Streaming/IStreamContext.cs | 35 ++++- .../Streaming/StreamMessageMetadata.cs | 2 +- .../Streaming/StreamingCommandExecutor.cs | 4 +- .../Streaming/StreamingExtendedRequest.cs | 4 +- .../Streaming/StreamingExtendedResponse.cs | 4 +- .../StreamingIntegrationTests.cs | 139 ++++++++++++------ 6 files changed, 127 insertions(+), 61 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs index c4386b9278..f71cc3ac6a 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using System; using System.Collections.Generic; using System.Threading; using System.Threading.Tasks; @@ -10,7 +11,7 @@ namespace Azure.Iot.Operations.Protocol.Streaming /// /// A stream of requests or responses that can be gracefully ended or canceled (with confirmation) at any time. /// - /// The type of the payload of the request stream + /// The type of the payload of the request/response stream public interface IStreamContext where T : class { @@ -20,14 +21,18 @@ public interface IStreamContext IAsyncEnumerable Entries { get; set; } /// - /// Cancel this RPC streaming call. + /// Cancel this RPC streaming exchange. /// - /// The optional user properties to include in this cancellation request. - /// Cancellation token for this cancellation request + /// + /// The optional user properties to include in this cancellation request. the receiving side of this cancellation request + /// will be given these properties alongside the notification that the streaming exchange has been canceled. + /// + /// Cancellation token to wait for confirmation from the receiving side that the cancellation succeeded. /// /// When called by the invoker, the executor will be notified about this cancellation and the executor will attempt /// to stop any user-defined handling of the streaming request. When called by the executor, the invoker will be notified - /// and will cease sending requests. + /// and will cease sending requests and will throw an with + /// of . /// /// This method may be called by the streaming invoker or executor at any time. For instance, if the request stream /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. @@ -35,8 +40,24 @@ public interface IStreamContext /// Task CancelAsync(Dictionary? userData = null, CancellationToken cancellationToken = default); - //TODO how to pass these user properties to the executor when invoker cancels? Triggering cancellation token isn't sufficient + /// + /// The token that tracks if the streaming exchange has been cancelled or not. + /// + /// + /// For instance, if the invoker side cancels the streaming exchange, the executor side callback's + /// will be triggered. If the executor side cancels the streaming exchange, the invoker side's returned + /// will be triggered. + /// + CancellationToken CancellationToken { get; } - //TODO move cancellation token in here so that both invoker + executor can access it more seamlessly? Move func in here as well for same reason? + /// + /// Get the user properties associated with a cancellation request started with . + /// + /// The user properties associated with a cancellation request + /// + /// If the stream has not been cancelled, this will return null. If the stream has been cancelled, but no user properties were + /// provided in that cancellation request, this will return null. + /// + Dictionary? GetCancellationRequestUserProperties(); } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs index b60c0c9cd8..176424d8d9 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamMessageMetadata.cs @@ -18,7 +18,7 @@ public class StreamMessageMetadata /// /// User properties associated with this particular message /// - public Dictionary UserData { get; } = new(); + public Dictionary UserData { get; init; } = new(); /// /// The index of this message within the stream as a whole diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 566b488bd3..09242895f6 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -29,7 +29,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// /// The callback provides the stream of requests and requires the user to return one to many responses. /// - public required Func>, RequestStreamMetadata, Func>, CancellationToken, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } + public required Func>, RequestStreamMetadata, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } public string ServiceGroupId { get; init; } @@ -60,7 +60,7 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// public bool AutomaticallyAcknowledgeRequests { get; set; } = true; - public Task StartAsync(int? preferredDispatchConcurrency = null, CancellationToken cancellationToken = default) + public Task StartAsync(CancellationToken cancellationToken = default) { // TODO: derive the expected request topic (like command executor does) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs index b3079ad2bf..60487ef8ea 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs @@ -13,7 +13,7 @@ public class StreamingExtendedRequest /// /// The request payload /// - public TReq Request { get; set; } + public TReq Payload { get; set; } /// /// The metadata specific to this message in the stream @@ -22,7 +22,7 @@ public class StreamingExtendedRequest public StreamingExtendedRequest(TReq request, StreamMessageMetadata? metadata = null) { - Request = request; + Payload = request; Metadata = metadata ?? new(); } } diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs index 108c95304b..e542dbd592 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs @@ -13,7 +13,7 @@ public class StreamingExtendedResponse /// /// The response payload /// - public TResp Response { get; set; } + public TResp Payload { get; set; } /// /// The metadata specific to this message in the stream @@ -22,7 +22,7 @@ public class StreamingExtendedResponse public StreamingExtendedResponse(TResp response, StreamMessageMetadata? metadata = null) { - Response = response; + Payload = response; Metadata = metadata ?? new(); } } diff --git a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs index 97ecad2d05..1da01ba0ec 100644 --- a/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs +++ b/dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs @@ -2,7 +2,6 @@ // Licensed under the MIT License. using System.Collections.Concurrent; -using System.Runtime.CompilerServices; using Azure.Iot.Operations.Mqtt.Session; using Azure.Iot.Operations.Protocol.Streaming; @@ -82,7 +81,7 @@ public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool Assert.Equal(expectedRequests.Count, receivedRequests.Count); for (int i = 0; i < expectedRequests.Count; i++) { - Assert.Equal(expectedRequests[i].Request, receivedRequests[i].Request); + Assert.Equal(expectedRequests[i].Payload, receivedRequests[i].Payload); Assert.Equal(i, receivedRequests[i].Metadata!.Index); } @@ -94,7 +93,7 @@ public async Task StreamRequestsAndResponsesInSerial(bool multipleRequests, bool Assert.Equal(receivedResponses.Count, sentResponses.Count); for (int i = 0; i < expectedRequests.Count; i++) { - Assert.Equal(sentResponses[i].Response, receivedResponses[i].Response); + Assert.Equal(sentResponses[i].Payload, receivedResponses[i].Payload); Assert.Equal(i, receivedResponses[i].Metadata!.Index); } } @@ -109,7 +108,7 @@ public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellat await using EchoStringStreamingCommandExecutor executor = new(new(), executorMqttClient) { - OnStreamingCommandReceived = SerialHandlerSingleResponse + OnStreamingCommandReceived = SerialHandlerExpectsCancellationWhileStreamingRequests }; await executor.StartAsync(); @@ -118,7 +117,14 @@ public async Task InvokerCanCancelWhileStreamingRequests() //TODO does cancellat var stream = await invoker.InvokeStreamingCommandAsync(GetStringRequestStreamWithDelay()); - await stream.CancelAsync(); + Dictionary cancellationCustomUserProperties = new() + { + { "someUserPropertyKey", "someUserPropertyValue"} + }; + + await stream.CancelAsync(cancellationCustomUserProperties); + + //TODO assert the executor received cancellation + user properties } [Fact] @@ -140,7 +146,12 @@ public async Task InvokerCanCancelWhileStreamingResponses() await foreach (var response in responseStreamContext.Entries) { - await responseStreamContext.CancelAsync(); + Dictionary cancellationCustomUserProperties = new() + { + { "someUserPropertyKey", "someUserPropertyValue"} + }; + + await responseStreamContext.CancelAsync(cancellationCustomUserProperties); break; } } @@ -154,7 +165,7 @@ public async Task ExecutorCanCancelWhileStreamingRequests() await using EchoStringStreamingCommandExecutor executor = new(new(), executorMqttClient) { - OnStreamingCommandReceived = SerialHandlerWithCancellationWhileStreamingRequests + OnStreamingCommandReceived = SerialHandlerThatCancelsWhileStreamingRequests }; await executor.StartAsync(); @@ -174,6 +185,10 @@ public async Task ExecutorCanCancelWhileStreamingRequests() catch (AkriMqttException ame) when (ame.Kind is AkriMqttErrorKind.Cancellation) { receivedCancellation = true; + Assert.True(responseStreamContext.CancellationToken.IsCancellationRequested); // TODO timing on exception thrown vs cancellation token triggered? + Dictionary? cancellationRequestUserProperties = responseStreamContext.GetCancellationRequestUserProperties(); + Assert.NotNull(cancellationRequestUserProperties); + Assert.NotEmpty(cancellationRequestUserProperties.Keys); //TODO actually validate the values match } Assert.True(receivedCancellation); @@ -188,7 +203,7 @@ public async Task ExecutorCanCancelWhileStreamingResponses() await using EchoStringStreamingCommandExecutor executor = new(new(), executorMqttClient) { - OnStreamingCommandReceived = SerialHandlerWithCancellationWhileStreamingResponses + OnStreamingCommandReceived = SerialHandlerThatCancelsStreamingWhileStreamingResponses }; await executor.StartAsync(); @@ -208,6 +223,10 @@ public async Task ExecutorCanCancelWhileStreamingResponses() catch (AkriMqttException ame) when (ame.Kind is AkriMqttErrorKind.Cancellation) { receivedCancellation = true; + Assert.True(responseStreamContext.CancellationToken.IsCancellationRequested); // TODO timing on exception thrown vs cancellation token triggered? + Dictionary? cancellationRequestUserProperties = responseStreamContext.GetCancellationRequestUserProperties(); + Assert.NotNull(cancellationRequestUserProperties); + Assert.NotEmpty(cancellationRequestUserProperties.Keys); //TODO actually validate the values match } Assert.True(receivedCancellation); @@ -266,7 +285,7 @@ public async Task CanStreamRequestsAndResponsesSimultaneously() Assert.Equal(receivedResponses.Count, receivedRequests.Count); for (int i = 0; i < receivedResponses.Count; i++) { - Assert.Equal(receivedResponses[i].Response, receivedRequests[i].Request); + Assert.Equal(receivedResponses[i].Payload, receivedRequests[i].Payload); } } @@ -416,21 +435,40 @@ private static async IAsyncEnumerable> GetStrin } } - private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, stream.CancellationToken); - await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + await foreach (var response in GetStringStreamContext(3).WithCancellation(stream.CancellationToken)) { yield return response; } } - private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerExpectsCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); + try + { + await foreach (var request in stream.Entries.WithCancellation(stream.CancellationToken)) + { + } + } + catch (OperationCanceledException) + { + // The stream was cancelled by the invoker while it streamed requests + Dictionary? cancellationUserProperties = stream.GetCancellationRequestUserProperties(); + + //TODO assert received user properties in the cancellation request + } + + yield return new("should never be reached"); + } - await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + private async IAsyncEnumerable> SerialHandlerMultipleResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata) + { + await SaveReceivedRequests(stream, streamMetadata, false, stream.CancellationToken); + + await foreach (var response in GetStringStreamContext(3).WithCancellation(stream.CancellationToken)) { _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) @@ -442,11 +480,11 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithYieldBreakAfterFirstResponse(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, stream.CancellationToken); - await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + await foreach (var response in GetStringStreamContext(3).WithCancellation(stream.CancellationToken)) { _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) @@ -459,9 +497,9 @@ private async IAsyncEnumerable> SerialHandlerM } } - private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> ParallelHandlerEchoResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(cancellationToken)) + await foreach (StreamingExtendedRequest requestStreamEntry in stream.Entries.WithCancellation(stream.CancellationToken)) { // doesn't overwrite if the correlationId already exists in the dictionary _receivedRequests.TryAdd(streamMetadata.CorrelationId, new()); @@ -471,31 +509,41 @@ private async IAsyncEnumerable> ParallelHandle requestsReceived.Add(requestStreamEntry); } - yield return new(requestStreamEntry.Request); + yield return new(requestStreamEntry.Payload); } } - private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerMultipleResponsesWithDelay(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, stream.CancellationToken); + + var asyncEnumeratorWithCancellation = GetStringRequestStreamWithDelay().WithCancellation(stream.CancellationToken).GetAsyncEnumerator(); - await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + bool readingRequestStream = true; + while (readingRequestStream) { - _sentResponses.TryAdd(streamMetadata.CorrelationId, new()); - if (_sentResponses.TryGetValue(streamMetadata.CorrelationId, out var sentResponses)) + StreamingExtendedRequest request; + try { - sentResponses.Add(response); + readingRequestStream = await asyncEnumeratorWithCancellation.MoveNextAsync(); + request = asyncEnumeratorWithCancellation.Current; } + catch (OperationCanceledException) + { + // The invoker side will cancel this stream of responses (via the provided cancellation token) since it takes too long - yield return response; + Dictionary? cancellationUserProperties = stream.GetCancellationRequestUserProperties(); + + //TODO assert these match the user properties sent by the invoker - await Task.Delay(TimeSpan.FromHours(1), cancellationToken); + yield break; + } + + yield return new(request.Payload); } } -#pragma warning disable IDE0060 // Remove unused parameter - private static async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) -#pragma warning restore IDE0060 // Remove unused parameter + private static async IAsyncEnumerable> SerialHandlerThatCancelsWhileStreamingRequests(IStreamContext> stream, RequestStreamMetadata streamMetadata) { CancellationTokenSource requestTimeoutCancellationTokenSource = new CancellationTokenSource(); requestTimeoutCancellationTokenSource.CancelAfter(TimeSpan.FromSeconds(1)); @@ -518,29 +566,26 @@ private static async IAsyncEnumerable> SerialH yield break; } - yield return new(request.Request); + yield return new(request.Payload); } } - private async IAsyncEnumerable> SerialHandlerWithCancellationWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerThatCancelsStreamingWhileStreamingResponses(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, false, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, false, stream.CancellationToken); CancellationTokenSource cts = new(); cts.CancelAfter(TimeSpan.FromSeconds(1)); for (int responseCount = 0; responseCount < 5; responseCount++) { - try + if (responseCount == 3) { - if (responseCount == 3) + Dictionary cancellationCustomUserProperties = new() { - // simulate one entry in the response stream taking too long and the executor deciding to cancel the stream because of it - await Task.Delay(TimeSpan.FromHours(1), cancellationToken); - } - } - catch (OperationCanceledException) - { - await stream.CancelAsync(); + { "someUserPropertyKey", "someUserPropertyValue"} + }; + + await stream.CancelAsync(cancellationCustomUserProperties); yield break; } @@ -548,11 +593,11 @@ private async IAsyncEnumerable> SerialHandlerW } } - private async IAsyncEnumerable> SerialHandlerSingleResponseManualAcks(IStreamContext> stream, RequestStreamMetadata streamMetadata, Func> _, [EnumeratorCancellation] CancellationToken cancellationToken) + private async IAsyncEnumerable> SerialHandlerSingleResponseManualAcks(IStreamContext> stream, RequestStreamMetadata streamMetadata) { - await SaveReceivedRequests(stream, streamMetadata, true, cancellationToken); + await SaveReceivedRequests(stream, streamMetadata, true, stream.CancellationToken); - await foreach (var response in GetStringStreamContext(3).WithCancellation(cancellationToken)) + await foreach (var response in GetStringStreamContext(3).WithCancellation(stream.CancellationToken)) { yield return response; } From 645f30624441b3cca01b81ec340169f434b6c380 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 15 Sep 2025 11:55:09 -0700 Subject: [PATCH 65/74] message level timeout is back --- doc/dev/adr/0025-rpc-streaming.md | 9 ++++++++- .../Streaming/StreamingCommandInvoker.cs | 7 +++++-- .../Streaming/StreamingExtendedRequest.cs | 12 ++++++++++++ .../Streaming/StreamingExtendedResponse.cs | 12 ++++++++++++ 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 4a9ffcbb5a..a344255774 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -214,7 +214,9 @@ We need to provide timeout support for our streaming APIs to avoid scenarios suc #### Decision -We will allow configuration on the invoker's side of a timeout for the RPC as a whole. +We will allow configuration on the invoker's side of a timeout for the RPC as a whole and a timeout of each message in the request and/or response stream. + +##### RPC level timeout To enable this, each message in the request stream will include a value in the `````` portion of the ```__stream``` user property. This header should be sent in all request stream messages in case the first N request messages are lost due to timeout or otherwise. @@ -228,6 +230,11 @@ If the request stream omits the timeout value in the ```__stream``` user propert This design does make the invoker start the countdown sooner than the executor, but the time difference is negligible in most circumstances. +##### Message level timeout + +We will allow users to set the message expiry interval of each message in a request/response stream. By default, though, we will set each message expiry interval equal to the RPC level timeout value. + + #### Alternative timeout designs considered - The above approach, but trying to calculate time spent on broker side (using message expiry interval) so that invoker and executor timeout at the same exact time diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs index 15aaba2e6c..07d329bafd 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandInvoker.cs @@ -81,13 +81,16 @@ public abstract class StreamingCommandInvoker : IAsyncDisposable /// The metadata for the request stream as a whole. /// Topic tokens to substitute in the request topic. /// The timeout between the beginning of the request stream and the end of both the request and response stream. - /// Cancellation token. Signalling this will also make a single attempt to notify the executor of the cancellation. + /// + /// Cancellation token. Signalling this will also make a single attempt to notify the executor of the cancellation. To make multiple attempts to cancel and/or + /// check that this cancellation succeeded, use instead. + /// /// The stream of responses. public async Task>> InvokeStreamingCommandAsync( IAsyncEnumerable> requests, RequestStreamMetadata? streamMetadata = null, Dictionary? additionalTopicTokenMap = null, - TimeSpan? streamExchangeTimeout = default, + TimeSpan? streamExchangeTimeout = null, CancellationToken cancellationToken = default) { // TODO: Derive the request topic (like commandInvoker does) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs index 60487ef8ea..be174de3e0 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedRequest.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using System; + namespace Azure.Iot.Operations.Protocol.Streaming { /// @@ -20,6 +22,16 @@ public class StreamingExtendedRequest /// public StreamMessageMetadata Metadata { get; set; } + /// + /// How long the message will be persisted by the MQTT broker if the executor side is not connected to receive it. + /// + /// + /// By default, this value will be set equal to the stream-level timeout specified in . + /// Generally, this value should be strictly less than or equal to the stream-level timeout. + /// Setting shorter timespans here allows for streamed messages to expire if they are no longer relevant beyond a certain point. + /// + public TimeSpan? MessageExpiry { get; set; } + public StreamingExtendedRequest(TReq request, StreamMessageMetadata? metadata = null) { Payload = request; diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs index e542dbd592..c33c1bfd71 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingExtendedResponse.cs @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +using System; + namespace Azure.Iot.Operations.Protocol.Streaming { /// @@ -20,6 +22,16 @@ public class StreamingExtendedResponse /// public StreamMessageMetadata Metadata { get; set; } + /// + /// How long the message will be persisted by the MQTT broker if the invoker side is not connected to receive it. + /// + /// + /// By default, this value will be set equal to the stream-level timeout specified in . + /// Generally, this value should be strictly less than or equal to the stream-level timeout. + /// Setting shorter timespans here allows for streamed messages to expire if they are no longer relevant beyond a certain point. + /// + public TimeSpan? MessageExpiry { get; set; } + public StreamingExtendedResponse(TResp response, StreamMessageMetadata? metadata = null) { Payload = response; From 3e0f8637d1749383d290f3dcfc79e46d3654590e Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 15 Sep 2025 12:59:03 -0700 Subject: [PATCH 66/74] fixup --- doc/dev/adr/0025-rpc-streaming.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index a344255774..9631e4b10d 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -252,6 +252,8 @@ To avoid scenarios where long-running streaming requests/responses are no longer Since sending a cancellation request may fail (message expiry on broker side), the SDK API design should allow for the user to repeatedly call "cancel" and should return successfully once the other party has responded appropriately. +Additionally, cancellation requests may include user properties. This allows users to provide additional context on why the cancellation is happening. + #### .NET API design The proposed cancellation support would come from the return type on the invoker side and the provided type on the executor side: From 886906c0f120f69f987b277412964dfd66b1bd43 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 15 Sep 2025 13:52:27 -0700 Subject: [PATCH 67/74] fixup --- doc/dev/adr/0025-rpc-streaming.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 9631e4b10d..a427828ddf 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -270,13 +270,14 @@ public interface IStreamContext /// /// Cancel this received RPC streaming request. /// + /// The optional user properties to include /// Cancellation token for this cancellation request /// /// This method may be called by the streaming executor at any time. For instance, if the request stream /// stalls unexpectedly, the executor can call this method to notify the invoker to stop sending requests. /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. /// - Task CancelAsync(CancellationToken cancellationToken = default); + Task CancelAsync(Dictionary? userProperties = null, CancellationToken cancellationToken = default); } ``` From 040940d0798d508a50bb255a0a895f311e9f5e59 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 15 Sep 2025 14:18:31 -0700 Subject: [PATCH 68/74] timeout vs cancellation --- .../Streaming/IStreamContext.cs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs index f71cc3ac6a..a948e33e52 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/IStreamContext.cs @@ -41,12 +41,15 @@ public interface IStreamContext Task CancelAsync(Dictionary? userData = null, CancellationToken cancellationToken = default); /// - /// The token that tracks if the streaming exchange has been cancelled or not. + /// The token that tracks if the streaming exchange has been cancelled by the other party and/or timed out. /// /// /// For instance, if the invoker side cancels the streaming exchange, the executor side callback's /// will be triggered. If the executor side cancels the streaming exchange, the invoker side's returned /// will be triggered. + /// + /// To see if this was triggered because the stream exchange was cancelled, see . To see if it was triggered because + /// the stream exchange timed out, see . /// CancellationToken CancellationToken { get; } @@ -59,5 +62,15 @@ public interface IStreamContext /// provided in that cancellation request, this will return null. /// Dictionary? GetCancellationRequestUserProperties(); + + /// + /// True if this stream exchange has timed out. If a stream has timed out, will trigger as well. + /// + bool HasTimedOut { get; internal set; } + + /// + /// True if this stream exchange has been canceled by the other party. If a stream has been cancelled, will trigger as well. + /// + bool IsCanceled { get; internal set; } } } From c7cba209e1745ab227a118bc14c27fd4b4e86fa6 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Mon, 15 Sep 2025 16:05:18 -0700 Subject: [PATCH 69/74] more --- doc/dev/adr/0025-rpc-streaming.md | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index a427828ddf..eb9727270d 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -278,6 +278,39 @@ public interface IStreamContext /// Additionally, the executor can call this method if its response stream has stalled unexpectedly. /// Task CancelAsync(Dictionary? userProperties = null, CancellationToken cancellationToken = default); + + /// + /// The token that tracks if the streaming exchange has been cancelled by the other party and/or timed out. + /// + /// + /// For instance, if the invoker side cancels the streaming exchange, the executor side callback's + /// will be triggered. If the executor side cancels the streaming exchange, the invoker side's returned + /// will be triggered. + /// + /// To see if this was triggered because the stream exchange was cancelled, see . To see if it was triggered because + /// the stream exchange timed out, see . + /// + CancellationToken CancellationToken { get; } + + /// + /// Get the user properties associated with a cancellation request started with . + /// + /// The user properties associated with a cancellation request + /// + /// If the stream has not been cancelled, this will return null. If the stream has been cancelled, but no user properties were + /// provided in that cancellation request, this will return null. + /// + Dictionary? GetCancellationRequestUserProperties(); + + /// + /// True if this stream exchange has timed out. If a stream has timed out, will trigger as well. + /// + bool HasTimedOut { get; internal set; } + + /// + /// True if this stream exchange has been canceled by the other party. If a stream has been cancelled, will trigger as well. + /// + bool IsCanceled { get; internal set; } } ``` From 8f72c15138318a7febc8005c4af8ebd04a54a882 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 16 Sep 2025 10:06:11 -0700 Subject: [PATCH 70/74] isLast --- doc/dev/adr/0025-rpc-streaming.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index eb9727270d..530f4b7d47 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -173,9 +173,7 @@ Once the stream of requests has started sending, the streaming command invoker s Once the user-supplied stream of request messages has ended, the streaming command invoker should send one final message to the same topic/with the same correlation data with no payload and with the 'isLast' flag set in the '__stream' metadata bundle. -Upon receiving an MQTT message in the response stream with the 'isLast' flag set in the '__stream' metadata, the streaming command invoker should notify the user that the stream of responses has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the response stream. - -If a streaming command invoker receives an MQTT message with the 'isLast' flag set but has not received any other messages in that response stream, the invoker should log an error, acknowledge the message, but otherwise ignore it. A stream of responses must have at least one entry. +Upon receiving an MQTT message in the response stream with the 'isLast' flag set in the '__stream' metadata, the streaming command invoker should notify the user that the stream of responses has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the response stream. [See here for more details on why this ```isLast``` flag is an independent message](#islast-message-being-its-own-message). By default, the streaming command invoker will acknowledge all request messages it receives as soon as they are given to the user. Users may opt into manual acknowledgements, though. Opting into manual acknowledgements allows the user time to "process" each response as necessary before forgoing re-delivery from the broker if the invoker crashes unexpectedly. @@ -195,7 +193,7 @@ Upon receiving a MQTT message that contains a streaming request, the streaming e - Any user-definied metadata as specified in the ```ExtendedStreamingResponse``` - QoS 1 -Upon receiving an MQTT message in the request stream with the 'isLast' flag set in the '__stream' metadata, the streaming executor should notify the user that the stream of requests has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the request stream. +Upon receiving an MQTT message in the request stream with the 'isLast' flag set in the '__stream' metadata, the streaming executor should notify the user that the stream of requests has ended. This particular message should not contain any payload or other user properties, so the message _should not_ be propagated to the user as if it were part of the request stream. [See here for more details on why this ```isLast``` flag is an independent message](#islast-message-being-its-own-message). If a streaming command executor receives an MQTT message with the 'isLast' flag set but has not received any other messages in that request stream, the executor should log an error, acknowledge the message, but otherwise ignore it. A stream of requests must have at least one entry. @@ -314,7 +312,7 @@ public interface IStreamContext } ``` -With this design, we can cancel a stream from either side at any time. For detailed examples, see the integration tests written [here](../../../dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs). +With this design, we can cancel a stream from either side at any time and check for received user properties on any received cancellation requests. For detailed examples, see the integration tests written [here](../../../dotnet/test/Azure.Iot.Operations.Protocol.IntegrationTests/StreamingIntegrationTests.cs). ### Protocol layer details @@ -384,3 +382,17 @@ By maintaining RPC streaming as a separate communication pattern from normal RPC - Executor receives a streaming command but the user did not set the streaming command handler callback (which must be optional since not every command executor has streaming commands) - API design is messy because a command invoker/executor should not expose streaming command APIs if they have no streaming commands - Caching behavior of normal RPC doesn't fit well with streamed RPCs which may grow indefinitely large + + +## Appendix + +### IsLast message being its own message + +There are three possible approaches to marking the final message in a stream that have been considered. Below are the approaches and the reasons why that approach doesn't work + +- Require the ```isLast``` flag to be set on a message that carries a fully-fledged stream message (i.e. has a user-provided payload and/or user properties) + - We must support ending streams at an arbitrary time even if a fully-fledged stream message can't be sent and this approach doesn't allow for that +- Allow the ```isLast``` flag to be set on either a fully-fledged stream message or as a standalone message with no user payload and no user properties + - This approach does not allow the receiving end to distinguish between "The stream is over" and "This is the final message in the stream" in cases where the user may provide no payload or user properties on streamed messages. + +Because the two above approaches either don't support our requirements or have ambiguities in corner cases, we should require the ```isLast``` flag be set on a standalone message with no uesr payload and no user properties. \ No newline at end of file From 5c5d4b37271f4e4cae86642ef16686268721746e Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 16 Sep 2025 10:55:21 -0700 Subject: [PATCH 71/74] unused --- .../Streaming/StreamingCommandExecutor.cs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs index 09242895f6..532c24ea05 100644 --- a/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs +++ b/dotnet/src/Azure.Iot.Operations.Protocol/Streaming/StreamingCommandExecutor.cs @@ -14,15 +14,6 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable where TReq : class where TResp : class { - /// - /// The timeout for all commands received by this executor. - /// - /// - /// Note that a command invoker may also send a per-invocation timeout. When this happens, a command will timeout if it exceeds either - /// of these timeout values. - /// - public TimeSpan ExecutionTimeout { get; set; } - /// /// A streaming command was invoked /// @@ -31,12 +22,8 @@ public abstract class StreamingCommandExecutor : IAsyncDisposable /// public required Func>, RequestStreamMetadata, IAsyncEnumerable>> OnStreamingCommandReceived { get; set; } - public string ServiceGroupId { get; init; } - public string RequestTopicPattern { get; init; } - public string? TopicNamespace { get; set; } - /// /// The topic token replacement map that this executor will use by default. Generally, this will include the token values /// for topic tokens such as "executorId" which should be the same for the duration of this command executor's lifetime. From 1e59bd994d2e962a6d2c488ae16f08a37dd2e665 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Tue, 16 Sep 2025 11:05:55 -0700 Subject: [PATCH 72/74] expiry interval note --- doc/dev/adr/0025-rpc-streaming.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 530f4b7d47..906f98b0e4 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -232,6 +232,7 @@ This design does make the invoker start the countdown sooner than the executor, We will allow users to set the message expiry interval of each message in a request/response stream. By default, though, we will set each message expiry interval equal to the RPC level timeout value. +The message expiry interval will not be checked on the receiving side, though. It is for use at the MQTT broker only. #### Alternative timeout designs considered From 266a8091a383fc5fedc494f97101f0d56197b371 Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Wed, 17 Sep 2025 11:02:33 -0700 Subject: [PATCH 73/74] message expiry purpose --- doc/dev/adr/0025-rpc-streaming.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 906f98b0e4..2370258612 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -232,7 +232,7 @@ This design does make the invoker start the countdown sooner than the executor, We will allow users to set the message expiry interval of each message in a request/response stream. By default, though, we will set each message expiry interval equal to the RPC level timeout value. -The message expiry interval will not be checked on the receiving side, though. It is for use at the MQTT broker only. +Both the invoker and executor stream messages _must_ include a message expiry interval. The receiving end will use this value as the de-dup cache length for each cached message. Vanilla RPC has the same requirement as explained [here](../../reference/command-timeouts.md#input-values). #### Alternative timeout designs considered From 10f55dcab314135a35e7ef362c223d02d066b98c Mon Sep 17 00:00:00 2001 From: timtay-microsoft Date: Fri, 19 Sep 2025 15:30:56 -0700 Subject: [PATCH 74/74] broker behavior --- doc/dev/adr/0025-rpc-streaming.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/dev/adr/0025-rpc-streaming.md b/doc/dev/adr/0025-rpc-streaming.md index 2370258612..f8b3a6a9c9 100644 --- a/doc/dev/adr/0025-rpc-streaming.md +++ b/doc/dev/adr/0025-rpc-streaming.md @@ -360,8 +360,9 @@ Any received MQTT messages pertaining to a command that was already canceled sho - The broker should hold all published responses for as long as the invoker's session lives and send them upon reconnection - If the invoker's session is lost, then the RPC will timeout - Executor side isn't connected when invoker sends first request - - Invoker will receive a "no matching subscribers" puback + - Depending on broker behavior, invoker will receive a "no matching subscribers" puback - Seems like a scenario we would want to retry? + - If the broker returns a successful puback, then the invoker side will eventually time out - Executor side disconnects unexpectedly while receiving requests - Broker should hold all published requests for as long as the executor's session lives and send them upon reconnection - If the executor's session is lost, the RPC will timeout