diff --git a/CHANGELOG.md b/CHANGELOG.md index 68b500cc5..1c1c36311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ ## (Unreleased) -- Add automatic retry on gateway timeout in `GrpcDurableTaskClient.WaitForInstanceCompletionAsync` in [#412](https://github.com/microsoft/durabletask-dotnet/pull/412)) +- - Add automatic retry on gateway timeout in `GrpcDurableTaskClient.WaitForInstanceCompletionAsync` in [#412](https://github.com/microsoft/durabletask-dotnet/pull/412)) +- Add specific logging for NotFound error on worker connection by @halspang in ([#413](https://github.com/microsoft/durabletask-dotnet/pull/413)) + ## v1.10.0 diff --git a/src/Worker/Grpc/GrpcDurableTaskWorker.Processor.cs b/src/Worker/Grpc/GrpcDurableTaskWorker.Processor.cs index dbc18c81b..327bb0cc1 100644 --- a/src/Worker/Grpc/GrpcDurableTaskWorker.Processor.cs +++ b/src/Worker/Grpc/GrpcDurableTaskWorker.Processor.cs @@ -65,6 +65,16 @@ public async Task ExecuteAsync(CancellationToken cancellation) // Sidecar is down - keep retrying this.Logger.SidecarUnavailable(); } + catch (RpcException ex) when (ex.StatusCode == StatusCode.NotFound) + { + // We retry on a NotFound for several reasons: + // 1. It was the existing behavior through the UnexpectedError path. + // 2. A 404 can be returned for a missing task hub or authentication failure. Authentication takes + // time to propagate so we should retry instead of making the user restart the application. + // 3. In some cases, a task hub can be created separately from the scheduler. If a worker is deployed + // between the scheduler and task hub, it would need to be restarted to function. + this.Logger.TaskHubNotFound(); + } catch (OperationCanceledException) when (cancellation.IsCancellationRequested) { // Shutting down, lets exit gracefully. diff --git a/src/Worker/Grpc/Logs.cs b/src/Worker/Grpc/Logs.cs index 1230e26e8..240a40639 100644 --- a/src/Worker/Grpc/Logs.cs +++ b/src/Worker/Grpc/Logs.cs @@ -22,6 +22,9 @@ static partial class Logs [LoggerMessage(EventId = 4, Level = LogLevel.Information, Message = "Sidecar work-item streaming connection established.")] public static partial void EstablishedWorkItemConnection(this ILogger logger); + [LoggerMessage(EventId = 5, Level = LogLevel.Warning, Message = "Task hub NotFound. Will continue retrying.")] + public static partial void TaskHubNotFound(this ILogger logger); + [LoggerMessage(EventId = 10, Level = LogLevel.Debug, Message = "{instanceId}: Received request to run orchestrator '{name}' with {oldEventCount} replay and {newEventCount} new history events.")] public static partial void ReceivedOrchestratorRequest(this ILogger logger, string name, string instanceId, int oldEventCount, int newEventCount);