Skip to content

Commit cde54a5

Browse files
committed
preventing 'old' startup exceptions from shutting down host
1 parent 112ceff commit cde54a5

File tree

4 files changed

+178
-38
lines changed

4 files changed

+178
-38
lines changed

src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ public async Task StartAsync(CancellationToken cancellationToken)
115115
private async Task StartHostAsync(CancellationToken cancellationToken, int attemptCount = 0, JobHostStartupMode startupMode = JobHostStartupMode.Normal)
116116
{
117117
cancellationToken.ThrowIfCancellationRequested();
118+
IHost localHost = null;
118119

119120
try
120121
{
@@ -132,15 +133,16 @@ private async Task StartHostAsync(CancellationToken cancellationToken, int attem
132133
// If we're in a non-transient error state or offline, skip host initialization
133134
bool skipJobHostStartup = isOffline || hasNonTransientErrors;
134135

135-
_host = BuildHost(skipJobHostStartup, skipHostJsonConfiguration: startupMode == JobHostStartupMode.HandlingConfigurationParsingError);
136+
localHost = BuildHost(skipJobHostStartup, skipHostJsonConfiguration: startupMode == JobHostStartupMode.HandlingConfigurationParsingError);
137+
_host = localHost;
136138

137139
var scriptHost = (ScriptHost)_host.Services.GetService<ScriptHost>();
138140
if (scriptHost != null)
139141
{
140142
scriptHost.HostInitializing += OnHostInitializing;
141143
}
142144

143-
LogInitialization(isOffline, attemptCount, ++_hostStartCount);
145+
LogInitialization(localHost, isOffline, attemptCount, ++_hostStartCount);
144146

145147
await _host.StartAsync(cancellationToken);
146148

@@ -156,32 +158,49 @@ private async Task StartHostAsync(CancellationToken cancellationToken, int attem
156158
}
157159
catch (OperationCanceledException)
158160
{
161+
GetHostLogger(localHost).LogDebug("Host startup was canceled.");
159162
throw;
160163
}
161164
catch (Exception exc)
162165
{
163-
LastError = exc;
164-
State = ScriptHostState.Error;
165-
attemptCount++;
166+
bool isActiveHost = ReferenceEquals(localHost, _host);
167+
ILogger logger = GetHostLogger(localHost);
168+
169+
if (isActiveHost)
170+
{
171+
LastError = exc;
172+
State = ScriptHostState.Error;
173+
logger.LogError(exc, "A host error has occurred");
174+
}
175+
else
176+
{
177+
// Another host has been created before this host
178+
// threw its startup exception. We want to make sure it
179+
// doesn't control the state of the service.
180+
logger.LogWarning(exc, "A host error has occurred on an inactive host");
181+
}
166182

167-
ILogger logger = GetHostLogger();
168-
logger.LogError(exc, "A host error has occurred");
183+
attemptCount++;
169184

170185
if (ShutdownHostIfUnhealthy())
171186
{
172187
return;
173188
}
174189

175-
var orphanTask = Orphan(_host, logger)
190+
var orphanTask = Orphan(localHost, logger)
176191
.ContinueWith(t =>
192+
{
193+
if (t.IsFaulted)
177194
{
178-
if (t.IsFaulted)
179-
{
180-
t.Exception.Handle(e => true);
181-
}
182-
}, TaskContinuationOptions.ExecuteSynchronously);
195+
t.Exception.Handle(e => true);
196+
}
197+
}, TaskContinuationOptions.ExecuteSynchronously);
183198

184-
cancellationToken.ThrowIfCancellationRequested();
199+
if (cancellationToken.IsCancellationRequested)
200+
{
201+
logger.LogDebug($"Cancellation requested. A new host will not be started.");
202+
cancellationToken.ThrowIfCancellationRequested();
203+
}
185204

186205
var nextStartupAttemptMode = JobHostStartupMode.Normal;
187206

@@ -242,6 +261,7 @@ public async Task RestartHostAsync(CancellationToken cancellationToken)
242261
await _hostRestartSemaphore.WaitAsync();
243262
if (State == ScriptHostState.Stopping || State == ScriptHostState.Stopped)
244263
{
264+
_logger.LogDebug($"Host restart was requested, but current host state is '{State}'. Skipping restart.");
245265
return;
246266
}
247267

@@ -277,18 +297,26 @@ private IHost BuildHost(bool skipHostStartup = false, bool skipHostJsonConfigura
277297
return _scriptHostBuilder.BuildHost(skipHostStartup, skipHostJsonConfiguration);
278298
}
279299

280-
private ILogger GetHostLogger()
300+
private ILogger GetHostLogger(IHost host)
281301
{
282-
var hostLoggerFactory = _host?.Services.GetService<ILoggerFactory>();
302+
ILoggerFactory hostLoggerFactory = null;
303+
try
304+
{
305+
hostLoggerFactory = host?.Services.GetService<ILoggerFactory>();
306+
}
307+
catch (ObjectDisposedException)
308+
{
309+
// If the host is disposed, we cannot access services.
310+
}
283311

284312
// Attempt to get the host logger with JobHost configuration applied
285313
// using the default logger as a fallback
286314
return hostLoggerFactory?.CreateLogger(LogCategories.Startup) ?? _logger;
287315
}
288316

289-
private void LogInitialization(bool isOffline, int attemptCount, int startCount)
317+
private void LogInitialization(IHost host, bool isOffline, int attemptCount, int startCount)
290318
{
291-
var logger = GetHostLogger();
319+
var logger = GetHostLogger(host);
292320

293321
var log = isOffline ? "Host is offline." : "Initializing Host.";
294322
logger.LogInformation(log);
@@ -362,10 +390,17 @@ private bool ShutdownHostIfUnhealthy()
362390
/// <param name="instance">The <see cref="IHost"/> instance to remove</param>
363391
private async Task Orphan(IHost instance, ILogger logger, CancellationToken cancellationToken = default)
364392
{
365-
var scriptHost = (ScriptHost)instance?.Services.GetService<ScriptHost>();
366-
if (scriptHost != null)
393+
try
394+
{
395+
var scriptHost = (ScriptHost)instance?.Services.GetService<ScriptHost>();
396+
if (scriptHost != null)
397+
{
398+
scriptHost.HostInitializing -= OnHostInitializing;
399+
}
400+
}
401+
catch (ObjectDisposedException)
367402
{
368-
scriptHost.HostInitializing -= OnHostInitializing;
403+
// If the instance is already disposed, we cannot access its services.
369404
}
370405

371406
try

src/WebJobs.Script/Host/ScriptHost.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,10 @@ protected override async Task StartAsyncCore(CancellationToken cancellationToken
249249
var ignore = LogInitializationAsync();
250250

251251
await InitializeAsync();
252+
253+
// Throw if cancellation occurred during initialization.
254+
cancellationToken.ThrowIfCancellationRequested();
255+
252256
await base.StartAsyncCore(cancellationToken);
253257

254258
LogHostFunctionErrors();

test/WebJobs.Script.Tests.Shared/TestHostExtensions.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the MIT License. See License.txt in the project root for license information.
33

4+
using System.Linq;
45
using System.Threading;
5-
using Microsoft.Azure.WebJobs.Host;
66
using Microsoft.Azure.WebJobs.Host.Executors;
77
using Microsoft.Azure.WebJobs.Script;
88
using Microsoft.Extensions.Configuration;
99
using Microsoft.Extensions.DependencyInjection;
10-
using Microsoft.Extensions.Hosting;
10+
using Microsoft.Extensions.Logging;
11+
using Microsoft.WebJobs.Script.Tests;
1112

12-
namespace Microsoft.WebJobs.Script.Tests
13+
namespace Microsoft.Extensions.Hosting
1314
{
1415
public static class TestHostExtensions
1516
{
@@ -27,5 +28,10 @@ public static string GetStorageConnectionString(this IHost host)
2728
{
2829
return host.Services.GetService<IConfiguration>().GetWebJobsConnectionString("Storage");
2930
}
31+
32+
public static TestLoggerProvider GetTestLoggerProvider(this IHost host)
33+
{
34+
return host.Services.GetServices<ILoggerProvider>().SingleOrDefault(p => p is TestLoggerProvider) as TestLoggerProvider;
35+
}
3036
}
3137
}

test/WebJobs.Script.Tests/WebJobsScriptHostServiceTests.cs

Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ public class WebJobsScriptHostServiceTests
2525
{
2626
private WebJobsScriptHostService _hostService;
2727
private ScriptApplicationHostOptionsMonitor _monitor;
28-
private TestLoggerProvider _testLoggerProvider;
28+
private TestLoggerProvider _webHostLoggerProvider = new TestLoggerProvider();
29+
private TestLoggerProvider _jobHostLoggerProvider = new TestLoggerProvider();
2930
private ILoggerFactory _loggerFactory;
3031
private Mock<IServiceProvider> _mockRootServiceProvider;
3132
private Mock<IServiceScopeFactory> _mockRootScopeFactory;
@@ -43,12 +44,10 @@ public WebJobsScriptHostServiceTests()
4344
LogPath = @"c:\tests\logs",
4445
};
4546
_monitor = new ScriptApplicationHostOptionsMonitor(options);
46-
var services = new ServiceCollection()
47-
.AddLogging()
48-
.BuildServiceProvider();
49-
_host = new Mock<IHost>();
50-
_host.Setup(h => h.Services)
51-
.Returns(services);
47+
_loggerFactory = new LoggerFactory();
48+
_loggerFactory.AddProvider(_webHostLoggerProvider);
49+
50+
_host = CreateMockHost();
5251

5352
_mockRootServiceProvider = new Mock<IServiceProvider>();
5453
_mockRootScopeFactory = new Mock<IServiceScopeFactory>();
@@ -58,16 +57,42 @@ public WebJobsScriptHostServiceTests()
5857
_hostPerformanceManager = new HostPerformanceManager(_mockEnvironment.Object, _healthMonitorOptions);
5958
}
6059

60+
private Mock<IHost> CreateMockHost()
61+
{
62+
// The tests can pull the logger from a specific host if they need to.
63+
var services = new ServiceCollection()
64+
.AddLogging(l => l.Services.AddSingleton<ILoggerProvider, TestLoggerProvider>())
65+
.BuildServiceProvider();
66+
67+
var host = new Mock<IHost>();
68+
69+
host.Setup(h => h.Services)
70+
.Returns(services);
71+
72+
host.Setup(h => h.Dispose())
73+
.Callback(() => services.Dispose());
74+
75+
return host;
76+
}
77+
6178
[Fact]
6279
public async Task HostInitialization_OnInitializationException_MaintainsErrorInformation()
6380
{
64-
_host.SetupSequence(h => h.StartAsync(It.IsAny<CancellationToken>()))
65-
.Throws(new HostInitializationException("boom"))
81+
// When an exception is thrown, we'll create a new host. Make sure
82+
// we don't return the same one (with disposed services) the second time.
83+
var hostA = CreateMockHost();
84+
hostA.Setup(h => h.StartAsync(It.IsAny<CancellationToken>()))
85+
.Throws(new HostInitializationException("boom"));
86+
87+
var hostB = CreateMockHost();
88+
hostB.Setup(h => h.StartAsync(It.IsAny<CancellationToken>()))
6689
.Returns(Task.CompletedTask);
6790

6891
var hostBuilder = new Mock<IScriptHostBuilder>();
69-
hostBuilder.Setup(b => b.BuildHost(It.IsAny<bool>(), It.IsAny<bool>()))
70-
.Returns(_host.Object);
92+
hostBuilder.SetupSequence(b => b.BuildHost(It.IsAny<bool>(), It.IsAny<bool>()))
93+
.Returns(hostA.Object)
94+
.Returns(hostB.Object);
95+
7196
_hostService = new WebJobsScriptHostService(
7297
_monitor, hostBuilder.Object, NullLoggerFactory.Instance, _mockRootServiceProvider.Object, _mockRootScopeFactory.Object,
7398
_mockScriptWebHostEnvironment.Object, _mockEnvironment.Object, _hostPerformanceManager, _healthMonitorOptions);
@@ -88,9 +113,9 @@ public async Task HostRestart_Specialization_Succeeds()
88113
hostBuilder.Setup(b => b.BuildHost(It.IsAny<bool>(), It.IsAny<bool>()))
89114
.Returns(_host.Object);
90115

91-
_testLoggerProvider = new TestLoggerProvider();
116+
_webHostLoggerProvider = new TestLoggerProvider();
92117
_loggerFactory = new LoggerFactory();
93-
_loggerFactory.AddProvider(_testLoggerProvider);
118+
_loggerFactory.AddProvider(_webHostLoggerProvider);
94119

95120
_hostService = new WebJobsScriptHostService(
96121
_monitor, hostBuilder.Object, _loggerFactory, _mockRootServiceProvider.Object, _mockRootScopeFactory.Object,
@@ -105,10 +130,80 @@ public async Task HostRestart_Specialization_Succeeds()
105130
restartHostThread.Join();
106131
specializeHostThread.Join();
107132

108-
var logMessages = _testLoggerProvider.GetAllLogMessages().Where(m => m.FormattedMessage.Contains("Restarting host."));
133+
var logMessages = _webHostLoggerProvider.GetAllLogMessages().Where(m => m.FormattedMessage.Contains("Restarting host."));
109134
Assert.Equal(2, logMessages.Count());
110135
}
111136

137+
[Fact]
138+
public async Task HostRestart_DuringInitialization_Recovers()
139+
{
140+
SemaphoreSlim semaphore = new SemaphoreSlim(1, 1);
141+
await semaphore.WaitAsync();
142+
143+
// Have the first host start, but pause. We'll issue a restart, wait for the
144+
// second host to be running, then let this first host throw an exception.
145+
var hostA = CreateMockHost();
146+
hostA
147+
.Setup(h => h.StartAsync(It.IsAny<CancellationToken>()))
148+
.Returns(async () =>
149+
{
150+
await semaphore.WaitAsync();
151+
throw new InvalidOperationException("Something happened at startup!");
152+
});
153+
154+
var hostB = CreateMockHost();
155+
hostB
156+
.Setup(h => h.StartAsync(It.IsAny<CancellationToken>()))
157+
.Returns(() => Task.CompletedTask);
158+
159+
var hostBuilder = new Mock<IScriptHostBuilder>();
160+
hostBuilder.SetupSequence(b => b.BuildHost(It.IsAny<bool>(), It.IsAny<bool>()))
161+
.Returns(hostA.Object)
162+
.Returns(hostB.Object);
163+
164+
_webHostLoggerProvider = new TestLoggerProvider();
165+
_loggerFactory = new LoggerFactory();
166+
_loggerFactory.AddProvider(_webHostLoggerProvider);
167+
168+
_hostService = new WebJobsScriptHostService(
169+
_monitor, hostBuilder.Object, _loggerFactory, _mockRootServiceProvider.Object, _mockRootScopeFactory.Object,
170+
_mockScriptWebHostEnvironment.Object, _mockEnvironment.Object, _hostPerformanceManager, _healthMonitorOptions);
171+
172+
Task initialStart = _hostService.StartAsync(CancellationToken.None);
173+
174+
Thread restartHostThread = new Thread(new ThreadStart(RestartHost));
175+
restartHostThread.Start();
176+
restartHostThread.Join();
177+
178+
await TestHelpers.Await(() => _hostService.State == ScriptHostState.Running);
179+
180+
// Now let the first host throw its startup exception.
181+
semaphore.Release();
182+
183+
await initialStart;
184+
185+
// Note that HostA is disposed so its services cannot be accessed. Logging will fall
186+
// back to using the WebHost's logger.
187+
Assert.Throws<ObjectDisposedException>(() => hostA.Object.GetTestLoggerProvider());
188+
TestLoggerProvider hostBLogger = hostB.Object.GetTestLoggerProvider();
189+
190+
// Make sure the error was logged to the correct logger
191+
Assert.Contains(_webHostLoggerProvider.GetAllLogMessages(), m => m.FormattedMessage != null && m.FormattedMessage.Contains("A host error has occurred on an inactive host"));
192+
Assert.DoesNotContain(hostBLogger.GetAllLogMessages(), m => m.FormattedMessage != null && m.FormattedMessage.Contains("A host error has occurred"));
193+
194+
// Make sure we orphaned the correct host when the exception was thrown. This will happen
195+
// twice: once during Restart and once during the Orphan call when handling the exception.
196+
hostA.Verify(m => m.StopAsync(It.IsAny<CancellationToken>()), Times.Exactly(2));
197+
hostA.Verify(m => m.Dispose(), Times.Exactly(2));
198+
199+
// We should not be calling Orphan on the good host
200+
hostB.Verify(m => m.StopAsync(It.IsAny<CancellationToken>()), Times.Never);
201+
hostB.Verify(m => m.Dispose(), Times.Never);
202+
203+
// The "late" exception from the first host shouldn't bring things down
204+
Assert.Equal(ScriptHostState.Running, _hostService.State);
205+
}
206+
112207
public void RestartHost()
113208
{
114209
_hostService.RestartHostAsync(CancellationToken.None).Wait();

0 commit comments

Comments
 (0)