Skip to content

Commit 6529bf0

Browse files
committed
HealthMonitor improvements (#2972)
1 parent 7aa7229 commit 6529bf0

File tree

8 files changed

+32
-16
lines changed

8 files changed

+32
-16
lines changed

src/WebJobs.Script.WebHost/WebScriptHostManager.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,14 +434,24 @@ private void InitializeHttpFunctions(IEnumerable<FunctionDescriptor> functions,
434434
}
435435
}
436436

437-
public override void Shutdown()
437+
public override void Shutdown(bool hard = false)
438438
{
439439
string message = "Environment shutdown has been triggered. Stopping host and signaling shutdown.";
440440
Instance?.TraceWriter.Info(message);
441441
Instance?.Logger?.LogInformation(message);
442442

443443
Stop();
444-
HostingEnvironment.InitiateShutdown();
444+
445+
if (hard)
446+
{
447+
// "hard" shutdown recycles the process
448+
Process.GetCurrentProcess().Kill();
449+
}
450+
else
451+
{
452+
// "soft" shutdown recycles the AppDomain
453+
HostingEnvironment.InitiateShutdown();
454+
}
445455
}
446456

447457
public async Task DelayUntilHostReady()

src/WebJobs.Script/Host/IScriptHostEnvironment.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ public interface IScriptHostEnvironment
1919
/// <summary>
2020
/// Stops the <see cref="ScriptHost"/> and shuts down the hosting environment.
2121
/// </summary>
22-
void Shutdown();
22+
/// <param name="hard">True if the shutdown should be "hard" - i.e. shut down process.</param>
23+
void Shutdown(bool hard = false);
2324
}
2425
}

src/WebJobs.Script/Host/NullScriptHostEnvironment.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public void RestartHost()
1515
{
1616
}
1717

18-
public void Shutdown()
18+
public void Shutdown(bool hard = false)
1919
{
2020
}
2121
}

src/WebJobs.Script/Host/ScriptHostManager.cs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using System.Text;
1313
using System.Threading;
1414
using System.Threading.Tasks;
15+
using Microsoft.Azure.WebJobs.Host;
1516
using Microsoft.Azure.WebJobs.Script.Config;
1617
using Microsoft.Azure.WebJobs.Script.Diagnostics;
1718
using Microsoft.Azure.WebJobs.Script.Eventing;
@@ -266,13 +267,15 @@ private bool ShutdownHostIfUnhealthy()
266267
{
267268
if (ShouldMonitorHostHealth && _healthCheckWindow.GetEvents().Where(isHealthy => !isHealthy).Count() > _config.HostHealthMonitor.HealthCheckThreshold)
268269
{
269-
// if the number of times the host has been unhealthy in
270+
// If the number of times the host has been unhealthy in
270271
// the current time window exceeds the threshold, recover by
271-
// initiating shutdown
272-
var message = $"Host unhealthy count exceeds the threshold of {_config.HostHealthMonitor.HealthCheckThreshold} for time window {_config.HostHealthMonitor.HealthCheckWindow}. Initiating shutdown.";
273-
Instance?.TraceWriter?.Error(message);
272+
// initiating a hard shutdown.
273+
var message = $"Host unhealthy count exceeds the threshold of {_config.HostHealthMonitor.HealthCheckThreshold} for time window {_config.HostHealthMonitor.HealthCheckWindow}. Initiating hard shutdown.";
274+
var shutdownEvent = new TraceEvent(TraceLevel.Error, message);
275+
shutdownEvent.Properties.Add(ScriptConstants.TracePropertyEventNameKey, ScriptConstants.ShutdownRecoveryEventName);
276+
Instance?.TraceWriter?.Trace(shutdownEvent);
274277
Instance?.Logger?.LogError(0, message);
275-
_environment.Shutdown();
278+
_environment.Shutdown(hard: true);
276279
return true;
277280
}
278281

@@ -504,11 +507,11 @@ public virtual void RestartHost()
504507
_restartHostEvent.Set();
505508
}
506509

507-
public virtual void Shutdown()
510+
public virtual void Shutdown(bool hard = false)
508511
{
509512
Stop();
510513

511-
Process.GetCurrentProcess().Close();
514+
Process.GetCurrentProcess().Kill();
512515
}
513516

514517
private void OnHostHealthCheckTimer(object state)

src/WebJobs.Script/ScriptConstants.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public static class ScriptConstants
6363
public const string DefaultMasterKeyName = "master";
6464
public const string DefaultFunctionKeyName = "default";
6565
public const string ColdStartEventName = "ColdStart";
66+
public const string ShutdownRecoveryEventName = "ShutdownRecovery";
6667

6768
public const string AntaresLogIdHeaderName = "X-ARR-LOG-ID";
6869
public const string AntaresScaleOutHeaderName = "X-FUNCTION-SCALEOUT";

test/WebJobs.Script.Tests.Integration/Host/ScriptHostManagerTests.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ public async Task HostHealthMonitor_TriggersShutdown_WhenHostUnhealthy()
403403

404404
var environmentMock = new Mock<IScriptHostEnvironment>(MockBehavior.Strict);
405405
bool shutdownCalled = false;
406-
environmentMock.Setup(p => p.Shutdown()).Callback(() => shutdownCalled = true);
406+
environmentMock.Setup(p => p.Shutdown(true)).Callback(() => shutdownCalled = true);
407407

408408
var mockSettings = new Mock<ScriptSettingsManager>();
409409
mockSettings.Setup(p => p.IsAzureEnvironment).Returns(true);
@@ -431,7 +431,7 @@ public async Task HostHealthMonitor_TriggersShutdown_WhenHostUnhealthy()
431431

432432
await TestHelpers.Await(() => hostManager.State == ScriptHostState.Error && shutdownCalled);
433433

434-
environmentMock.Verify(p => p.Shutdown(), Times.Once);
434+
environmentMock.Verify(p => p.Shutdown(true), Times.Once);
435435

436436
var traces = testTraceWriter.GetTraces();
437437

@@ -441,7 +441,8 @@ public async Task HostHealthMonitor_TriggersShutdown_WhenHostUnhealthy()
441441

442442
var log = traces.Last();
443443
Assert.True(traces.Count(p => p.Message == "Host is unhealthy. Initiating a restart." && p.Level == TraceLevel.Error) > 0);
444-
Assert.Equal("Host unhealthy count exceeds the threshold of 5 for time window 00:00:01. Initiating shutdown.", log.Message);
444+
Assert.Equal("Host unhealthy count exceeds the threshold of 5 for time window 00:00:01. Initiating hard shutdown.", log.Message);
445+
Assert.Equal(ScriptConstants.ShutdownRecoveryEventName, log.Properties[ScriptConstants.TracePropertyEventNameKey]);
445446
Assert.Equal(TraceLevel.Error, log.Level);
446447
}
447448

test/WebJobs.Script.Tests.Integration/RawAssemblyEndToEndTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public async Task InvokeDotNetFunction(string functionName, string expectedResul
5959
public void AssemblyChange_TriggersEnvironmentShutdown()
6060
{
6161
var manualResetEvent = new ManualResetEvent(false);
62-
Fixture.ScriptHostEnvironmentMock.Setup(e => e.Shutdown())
62+
Fixture.ScriptHostEnvironmentMock.Setup(e => e.Shutdown(false))
6363
.Callback(() => manualResetEvent.Set());
6464

6565
string sourceFile = TestFixture.SharedAssemblyPath;

test/WebJobs.Script.Tests/Description/DotNet/DotNetFunctionInvokerTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ public async Task RestorePackagesAsync_WithUpdatedReferences_TriggersShutdown(bo
346346
// and won't be made immediately
347347
await Task.Delay(1000);
348348

349-
environmentMock.Verify(e => e.Shutdown(), Times.Exactly(shutdownExpected ? 1 : 0));
349+
environmentMock.Verify(e => e.Shutdown(false), Times.Exactly(shutdownExpected ? 1 : 0));
350350
}
351351
}
352352

0 commit comments

Comments
 (0)