Skip to content

Adding restart reason #11191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/WebJobs.Script.Grpc/Channel/GrpcWorkerChannel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ internal GrpcWorkerChannel(
_eventSubscriptions.Add(_eventManager.OfType<FileEvent>()
.Where(msg => _workerConfig.Description.Extensions.Contains(Path.GetExtension(msg.FileChangeArguments.FullPath)))
.Throttle(TimeSpan.FromMilliseconds(300)) // debounce
.Subscribe(msg => _eventManager.Publish(new HostRestartEvent())));
.Subscribe(msg => _eventManager.Publish(new HostRestartEvent($"Worker monitored file changed: '{msg.FileChangeArguments.Name}'."))));

_startLatencyMetric = metricsLogger?.LatencyEvent(string.Format(MetricEventNames.WorkerInitializeLatency, workerConfig.Description.Language, attemptCount));

Expand Down
4 changes: 2 additions & 2 deletions src/WebJobs.Script.WebHost/Controllers/HostController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ public async Task<IActionResult> Resume([FromServices] IScriptHostManager script
if (drainModeManager.IsDrainModeEnabled)
{
_logger.LogDebug("Starting a new host");
await scriptHostManager.RestartHostAsync();
await scriptHostManager.RestartHostAsync("Resuming from drain mode.", CancellationToken.None);
}

var status = new ResumeStatus { State = scriptHostManager.State };
Expand Down Expand Up @@ -451,7 +451,7 @@ public async Task<IActionResult> GetTriggers()
[Authorize(Policy = PolicyNames.AdminAuthLevel)]
public IActionResult Restart([FromServices] IScriptHostManager hostManager)
{
Task ignore = hostManager.RestartHostAsync();
Task ignore = hostManager.RestartHostAsync("Restart API called.");

var hostOptionsValue = _applicationHostOptions.Value;
var response = new HostRestartResponse()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ public static class ScriptHostServiceLoggerExtension
new EventId(518, nameof(ActiveHostChanging)),
"Active host changing from '{oldHostInstanceId}' to '{newHostInstanceId}'.");

private static readonly Action<ILogger, Exception> _enteringRestart =
LoggerMessage.Define(
private static readonly Action<ILogger, string, Exception> _enteringRestart =
LoggerMessage.Define<string>(
LogLevel.Debug,
new EventId(519, nameof(EnteringRestart)),
"Restart requested.");
"Restart requested. Reason '{restartReason}'");

private static readonly Action<ILogger, Exception> _restartBeforeStart =
LoggerMessage.Define(
Expand Down Expand Up @@ -321,9 +321,9 @@ public static void ActiveHostChanging(this ILogger logger, string oldHostInstanc
_activeHostChanging(logger, oldHostInstanceId, newHostInstanceId, null);
}

public static void EnteringRestart(this ILogger logger)
public static void EnteringRestart(this ILogger logger, string reason)
{
_enteringRestart(logger, null);
_enteringRestart(logger, reason, null);
}

public static void RestartBeforeStart(this ILogger logger)
Expand Down
31 changes: 18 additions & 13 deletions src/WebJobs.Script.WebHost/FileMonitoringService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class FileMonitoringService : IFileMonitoringService, IDisposable
private readonly ILogger _logger;
private readonly ILogger<FileMonitoringService> _typedLogger;
private readonly IList<IDisposable> _eventSubscriptions = new List<IDisposable>();
private readonly Func<Task> _restart;
private readonly Func<string, Task> _restart;
private readonly Action _shutdown;
private readonly ImmutableArray<string> _rootDirectorySnapshot;
private AutoRecoveringFileSystemWatcher _debugModeFileWatcher;
Expand Down Expand Up @@ -112,23 +112,23 @@ private void ResumeRestartIfScheduled()
using (System.Threading.ExecutionContext.SuppressFlow())
{
_typedLogger.LogDebug("Resuming scheduled restart.");
Task.Run(async () => await ScheduleRestartAsync());
Task.Run(async () => await ScheduleRestartAsync("Running restart requested while in a restart suspension scope."));
}
}
}

private async Task ScheduleRestartAsync(bool shutdown)
private async Task ScheduleRestartAsync(string reason, bool shutdown)
{
_restartScheduled = true;
if (shutdown)
{
_shutdownScheduled = true;
}

await ScheduleRestartAsync();
await ScheduleRestartAsync(reason);
}

private async Task ScheduleRestartAsync()
private async Task ScheduleRestartAsync(string reason)
{
if (Interlocked.Read(ref _suspensionRequestsCount) > 0)
{
Expand All @@ -142,7 +142,7 @@ private async Task ScheduleRestartAsync()
}
else
{
await _restart();
await _restart(reason);
}
}
}
Expand All @@ -164,14 +164,18 @@ private void InitializeFileWatchers()
}

_eventSubscriptions.Add(_eventManager.OfType<HostRestartEvent>()
.Subscribe((msg) => ScheduleRestartAsync(false)
.ContinueWith(t => _logger.LogCritical(t.Exception.Message),
TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously)));
.Subscribe(HandleHostRestartEvent));

// Delay starting up for logging and debug file watchers to avoid long start up times
Utility.ExecuteAfterColdStartDelay(_environment, InitializeSecondaryFileWatchers);
}

private void HandleHostRestartEvent(HostRestartEvent restartEvent)
{
ScheduleRestartAsync($"Handling {nameof(HostRestartEvent)} ('{restartEvent.Name}':'{restartEvent.Source}'). Reason: {restartEvent.Reason}", false)
.ContinueWith(t => _logger.LogCritical(t.Exception?.Message), TaskContinuationOptions.OnlyOnFaulted | TaskContinuationOptions.ExecuteSynchronously);
}

/// <summary>
/// Initializes the file and directory monitoring that does not need to happen as part of a Host startup
/// These watchers can be started after a delay to avoid startup performance issue
Expand Down Expand Up @@ -317,8 +321,9 @@ private void OnFileChanged(FileSystemEventArgs e)
}

TraceFileChangeRestart(changeDescription, e.ChangeType.ToString(), e.FullPath, shutdown);
ScheduleRestartAsync(shutdown).ContinueWith(t => _logger.LogError(t.Exception, $"Error restarting host (full shutdown: {shutdown})"),
TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnFaulted);
ScheduleRestartAsync("Monitored file change detected.", shutdown)
.ContinueWith(t => _logger.LogError(t.Exception, $"Error restarting host (full shutdown: {shutdown})"),
TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnFaulted);
}
}

Expand Down Expand Up @@ -349,11 +354,11 @@ internal static string GetRelativeDirectory(string path, string scriptRoot)
return string.Empty;
}

private Task RestartAsync()
private Task RestartAsync(string reason)
{
if (!_shutdownScheduled && Interlocked.Exchange(ref _restartRequested, 1) == 0)
{
return _scriptHostManager.RestartHostAsync();
return _scriptHostManager.RestartHostAsync(reason);
}

return Task.CompletedTask;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public async Task HostWarmupAsync(HttpRequest request)
{
if (request.Query.TryGetValue("restart", out StringValues value) && string.Compare("1", value) == 0)
{
await _hostManager.RestartHostAsync(CancellationToken.None);
await _hostManager.RestartHostAsync("Host warmup call requested a restart.", CancellationToken.None);

// This call is here for sanity, but we should be fully initialized.
await _hostManager.DelayUntilHostReady();
Expand Down
4 changes: 2 additions & 2 deletions src/WebJobs.Script.WebHost/Standby/StandbyManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public Task SpecializeHostAsync()
if (t.IsFaulted)
{
// if we fail during specialization for whatever reason
// this is fatal, so we shutdown
// this is fatal, so we shut down.
_logger.LogError(t.Exception, "Specialization failed. Shutting down.");
_applicationLifetime.StopApplication();
}
Expand Down Expand Up @@ -119,7 +119,7 @@ public async Task SpecializeHostCoreAsync()

using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationRestartHost))
{
await _scriptHostManager.RestartHostAsync();
await _scriptHostManager.RestartHostAsync("Host specialization.");
}

using (_metricsLogger.LatencyEvent(MetricEventNames.SpecializationDelayUntilHostReady))
Expand Down
10 changes: 5 additions & 5 deletions src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ public async Task StopAsync(CancellationToken cancellationToken)
State = ScriptHostState.Stopped;
}

public async Task RestartHostAsync(CancellationToken cancellationToken)
public async Task RestartHostAsync(string reason, CancellationToken cancellationToken)
{
if (ShutdownRequested)
{
Expand All @@ -570,7 +570,7 @@ public async Task RestartHostAsync(CancellationToken cancellationToken)
await _hostStarted;
}

_logger.EnteringRestart();
_logger.EnteringRestart(reason);

// If anything is mid-startup, cancel it.
_startupLoopTokenSource?.Cancel();
Expand Down Expand Up @@ -798,10 +798,10 @@ private void OnHostHealthCheckTimer(object state)
{
// This periodic check allows us to break out of the host run
// loop. The health check performed in OnHostInitializing will then
// fail and we'll enter a restart loop (exponentially backing off)
// until the host is healthy again and we can resume host processing.
// fail, and we'll enter a restart loop (exponentially backing off)
// until the host is healthy again, and we can resume host processing.
_logger.UnhealthyRestart();
var tIgnore = RestartHostAsync(CancellationToken.None);
var tIgnore = RestartHostAsync("Host health check detected an unhealthy host.", CancellationToken.None);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,13 @@ private async Task ReloadScriptAsync()

// If the compilation succeeded, AND:
// - We haven't cached a function (failed to compile on load), OR
// - We're referencing local function types (i.e. POCOs defined in the function) AND Our our function signature has changed
// - We're referencing local function types (i.e. POCOs defined in the function) AND our function signature has changed
// Restart our host.
if (compilationSucceeded &&
(_functionSignature == null ||
(_functionSignature.HasLocalTypeReference || !_functionSignature.Equals(signature))))
{
await Host.RestartAsync().ConfigureAwait(false);
await Host.RestartAsync(".NET compilation.").ConfigureAwait(false);
}
}

Expand Down
5 changes: 4 additions & 1 deletion src/WebJobs.Script/Eventing/Host/HostRestartEvent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ namespace Microsoft.Azure.WebJobs.Script.Eventing
{
public class HostRestartEvent : ScriptEvent
{
public HostRestartEvent()
public HostRestartEvent(string reason)
: base(nameof(HostRestartEvent), EventSources.Worker)
{
Reason = reason;
}

public string Reason { get; }
}
}
4 changes: 2 additions & 2 deletions src/WebJobs.Script/Host/IScriptHostManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public interface IScriptHostManager
/// <summary>
/// Restarts the current Script Job Host.
/// </summary>
/// <returns>A <see cref="Task"/> that will completed when the host is restarted.</returns>
Task RestartHostAsync(CancellationToken cancellationToken = default);
/// <returns>A <see cref="Task"/> that will be completed when the host is restarted.</returns>
Task RestartHostAsync(string reason, CancellationToken cancellationToken = default);
}
}
6 changes: 2 additions & 4 deletions src/WebJobs.Script/Host/ScriptHost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,9 @@ internal static Collection<CustomAttributeBuilder> CreateTypeAttributes(ScriptJo
return customAttributes;
}

// TODO: DI (FACAVAL) Remove this method.
// all restart/shutdown requests should go through the
internal Task RestartAsync()
internal Task RestartAsync(string reason)
{
_scriptHostManager.RestartHostAsync();
_scriptHostManager.RestartHostAsync(reason);
return Task.CompletedTask;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public async Task<FunctionMetadataResult> GetFunctionMetadataAsync(IEnumerable<R
// During the restart flow, GetFunctionMetadataAsync gets invoked
// again through a new script host initialization flow.
_logger.LogDebug("Host is running without any initialized channels, restarting the JobHost.");
await _scriptHostManager.RestartHostAsync();
await _scriptHostManager.RestartHostAsync("No initialized worker channels available.");
}

channels = _channelManager.GetChannels(_workerRuntime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public async Task<string> GetFunctionSecretAsync(string functionName)

public async Task RestartAsync(CancellationToken cancellationToken)
{
await _hostService.RestartHostAsync(cancellationToken);
await _hostService.RestartHostAsync("test", cancellationToken);
}

private void StillRunningCallback(object state)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ await TestHelpers.Await(() =>
});

// Once we've hit 2, we have a couple seconds to trigger a restart.
_ = Task.Run(() => scriptHostManager.RestartHostAsync());
_ = Task.Run(() => scriptHostManager.RestartHostAsync("test"));

DateTime start = DateTime.UtcNow;
await TestHelpers.Await(() =>
Expand Down
2 changes: 1 addition & 1 deletion test/WebJobs.Script.Tests.Shared/TestHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ object IServiceProvider.GetService(Type serviceType)
throw new NotImplementedException();
}

Task IScriptHostManager.RestartHostAsync(CancellationToken cancellationToken)
Task IScriptHostManager.RestartHostAsync(string reason, CancellationToken cancellationToken)
{
throw new NotImplementedException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ public async Task ResumeHost_HostNotRunning_ReturnsExpected(ScriptHostState host

var result = (StatusCodeResult)await _hostController.Resume(scriptHostManagerMock.Object, default);
Assert.Equal(expectedCode, result.StatusCode);
scriptHostManagerMock.Verify(p => p.RestartHostAsync(It.IsAny<CancellationToken>()), Times.Never());
scriptHostManagerMock.Verify(p => p.RestartHostAsync("test", It.IsAny<CancellationToken>()), Times.Never());
}

[Fact]
Expand All @@ -361,15 +361,15 @@ public async Task ResumeHost_HostRunning_DrainModeEnabled_StartNewHostSuccessful

serviceProviderMock.Setup(x => x.GetService(typeof(IDrainModeManager))).Returns(drainModeManager.Object);
scriptHostManagerMock.SetupGet(p => p.State).Returns(ScriptHostState.Running);
scriptHostManagerMock.Setup(p => p.RestartHostAsync(It.IsAny<CancellationToken>())).Returns(Task.CompletedTask);
scriptHostManagerMock.Setup(p => p.RestartHostAsync("test", It.IsAny<CancellationToken>())).Returns(Task.CompletedTask);
drainModeManager.Setup(x => x.IsDrainModeEnabled).Returns(true);

var expectedBody = new ResumeStatus { State = ScriptHostState.Running };
var result = (OkObjectResult)await _hostController.Resume(scriptHostManagerMock.Object, default);

Assert.Equal(StatusCodes.Status200OK, result.StatusCode);
Assert.Equal(expectedBody.State, (result.Value as ResumeStatus).State);
scriptHostManagerMock.Verify(p => p.RestartHostAsync(It.IsAny<CancellationToken>()), Times.Once());
scriptHostManagerMock.Verify(p => p.RestartHostAsync("test", It.IsAny<CancellationToken>()), Times.Once());
}

[Fact]
Expand All @@ -388,7 +388,7 @@ public async Task ResumeHost_HostRunning_DrainModeNotEnabled_DoesNotStartNewHost

Assert.Equal(StatusCodes.Status200OK, result.StatusCode);
Assert.Equal(expectedBody.State, (result.Value as ResumeStatus).State);
scriptHostManagerMock.Verify(p => p.RestartHostAsync(It.IsAny<CancellationToken>()), Times.Never());
scriptHostManagerMock.Verify(p => p.RestartHostAsync("test", It.IsAny<CancellationToken>()), Times.Never());
}

[Fact]
Expand All @@ -398,7 +398,7 @@ public async Task ResumeHost_HostRunning_ClientDisconnect_DoesNotStartNewHost()
using var cts = new CancellationTokenSource();
cts.Cancel();
await Assert.ThrowsAnyAsync<OperationCanceledException>(() => _hostController.Resume(scriptHostManagerMock.Object, cts.Token));
scriptHostManagerMock.Verify(p => p.RestartHostAsync(It.IsAny<CancellationToken>()), Times.Never());
scriptHostManagerMock.Verify(p => p.RestartHostAsync("test", It.IsAny<CancellationToken>()), Times.Never());
}
}
}
Loading