Skip to content

Commit 48112e1

Browse files
committed
Integrate host health monitoring in startup path (#1999)
1 parent 7c7ac3b commit 48112e1

19 files changed

+258
-136
lines changed

src/WebJobs.Script.WebHost/Controllers/AdminController.cs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,6 @@ public IActionResult GetHostStatus()
117117
status.Errors.Add(Utility.FlattenException(lastError));
118118
}
119119

120-
var parameters = Request.Query;
121-
if (parameters.TryGetValue(ScriptConstants.CheckLoadQueryParameterName, out StringValues value) && value == "1")
122-
{
123-
status.Load = new LoadStatus
124-
{
125-
IsHigh = _scriptHostManager.PerformanceManager.IsUnderHighLoad()
126-
};
127-
}
128-
129120
string message = $"Host Status: {JsonConvert.SerializeObject(status, Formatting.Indented)}";
130121
_logger?.LogInformation(message);
131122

src/WebJobs.Script.WebHost/Models/HostStatus.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,5 @@ public class HostStatus
3737
/// </summary>
3838
[JsonProperty(PropertyName = "errors", DefaultValueHandling = DefaultValueHandling.Ignore)]
3939
public Collection<string> Errors { get; set; }
40-
41-
/// <summary>
42-
/// Gets or sets a the <see cref="LoadStatus"/>.
43-
/// </summary>
44-
[JsonProperty(PropertyName = "load", DefaultValueHandling = DefaultValueHandling.Ignore)]
45-
public LoadStatus Load { get; set; }
4640
}
4741
}

src/WebJobs.Script.WebHost/Models/LoadStatus.cs

Lines changed: 0 additions & 16 deletions
This file was deleted.

src/WebJobs.Script.WebHost/WebHostResolver.cs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
using System.Threading.Tasks;
88
using Microsoft.Azure.WebJobs.Extensions.Http;
99
using Microsoft.Azure.WebJobs.Script.Config;
10-
using Microsoft.Azure.WebJobs.Script.Diagnostics;
1110
using Microsoft.Azure.WebJobs.Script.Eventing;
1211
using Microsoft.Azure.WebJobs.Script.WebHost.Properties;
1312
using Microsoft.Extensions.Configuration;
@@ -69,11 +68,6 @@ public ISecretManager GetSecretManager(WebHostSettings settings)
6968
return GetWebScriptHostManager(settings).SecretManager;
7069
}
7170

72-
public HostPerformanceManager GetPerformanceManager(WebHostSettings settings)
73-
{
74-
return GetWebScriptHostManager(settings).PerformanceManager;
75-
}
76-
7771
public WebScriptHostManager GetWebScriptHostManager() =>
7872
GetWebScriptHostManager(_settings);
7973

src/WebJobs.Script.WebHost/WebScriptHostManager.cs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
using Microsoft.Azure.WebJobs.Script.Description;
1919
using Microsoft.Azure.WebJobs.Script.Diagnostics;
2020
using Microsoft.Azure.WebJobs.Script.Eventing;
21+
using Microsoft.Azure.WebJobs.Script.Scale;
2122
using Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics;
2223
using Microsoft.Extensions.Logging;
2324

@@ -29,7 +30,6 @@ public class WebScriptHostManager : ScriptHostManager
2930

3031
private readonly WebHostMetricsLogger _metricsLogger;
3132
private readonly ISecretManager _secretManager;
32-
private readonly HostPerformanceManager _performanceManager;
3333
private readonly WebHostSettings _webHostSettings;
3434

3535
private readonly IWebJobsExceptionHandler _exceptionHandler;
@@ -53,9 +53,10 @@ public WebScriptHostManager(ScriptHostConfiguration config,
5353
IScriptHostFactory scriptHostFactory = null,
5454
ISecretsRepositoryFactory secretsRepositoryFactory = null,
5555
ILoggerFactoryBuilder loggerFactoryBuilder = null,
56+
HostPerformanceManager hostPerformanceManager = null,
5657
int hostTimeoutSeconds = 30,
5758
int hostPollingIntervalMilliseconds = 500)
58-
: base(config, settingsManager, scriptHostFactory, eventManager, environment: null, loggerFactoryBuilder: loggerFactoryBuilder)
59+
: base(config, settingsManager, scriptHostFactory, eventManager, environment: null, loggerFactoryBuilder: loggerFactoryBuilder, hostPerformanceManager: hostPerformanceManager)
5960
{
6061
_config = config;
6162

@@ -79,8 +80,6 @@ public WebScriptHostManager(ScriptHostConfiguration config,
7980

8081
config.IsSelfHost = webHostSettings.IsSelfHost;
8182

82-
_performanceManager = new HostPerformanceManager(settingsManager, config.TraceWriter);
83-
8483
secretsRepositoryFactory = secretsRepositoryFactory ?? new DefaultSecretsRepositoryFactory();
8584
var secretsRepository = secretsRepositoryFactory.Create(settingsManager, webHostSettings, config);
8685
_secretManager = secretManagerFactory.Create(settingsManager, config.HostConfig.LoggerFactory, secretsRepository);
@@ -115,8 +114,6 @@ public WebScriptHostManager(ScriptHostConfiguration config,
115114

116115
public ISecretManager SecretManager => _secretManager;
117116

118-
public HostPerformanceManager PerformanceManager => _performanceManager;
119-
120117
public virtual bool Initialized
121118
{
122119
get
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
4+
#if WEBREQUESTMANAGER
5+
using System;
6+
using System.Collections.ObjectModel;
7+
using System.Net.Http;
8+
using Microsoft.Azure.WebJobs.Extensions.Http;
9+
using Microsoft.Azure.WebJobs.Host;
10+
using Microsoft.Azure.WebJobs.Script.Description;
11+
using Microsoft.Azure.WebJobs.Script.Diagnostics;
12+
using Microsoft.Azure.WebJobs.Script.Scale;
13+
14+
namespace Microsoft.Azure.WebJobs.Script.WebHost
15+
{
16+
public class WebScriptHostRequestManager : HttpRequestManager
17+
{
18+
private readonly HostPerformanceManager _performanceManager;
19+
private readonly IMetricsLogger _metricsLogger;
20+
private readonly int _performanceCheckPeriodSeconds;
21+
private DateTime _lastPerformanceCheck;
22+
private bool _rejectAllRequests;
23+
24+
public WebScriptHostRequestManager(HttpExtensionConfiguration config, HostPerformanceManager performanceManager, IMetricsLogger metricsLogger, TraceWriter traceWriter, int performanceCheckPeriodSeconds = 15) : base(config, traceWriter)
25+
{
26+
_performanceManager = performanceManager;
27+
_metricsLogger = metricsLogger;
28+
_performanceCheckPeriodSeconds = performanceCheckPeriodSeconds;
29+
}
30+
31+
protected override bool RejectAllRequests()
32+
{
33+
if (base.RejectAllRequests())
34+
{
35+
return true;
36+
}
37+
38+
if (Config.DynamicThrottlesEnabled &&
39+
((DateTime.UtcNow - _lastPerformanceCheck) > TimeSpan.FromSeconds(_performanceCheckPeriodSeconds)))
40+
{
41+
// only check host status periodically
42+
Collection<string> exceededCounters = new Collection<string>();
43+
_rejectAllRequests = _performanceManager.IsUnderHighLoad(exceededCounters);
44+
_lastPerformanceCheck = DateTime.UtcNow;
45+
if (_rejectAllRequests)
46+
{
47+
TraceWriter.Warning($"Thresholds for the following counters have been exceeded: [{string.Join(", ", exceededCounters)}]");
48+
}
49+
}
50+
51+
return _rejectAllRequests;
52+
}
53+
54+
protected override HttpResponseMessage RejectRequest(HttpRequestMessage request)
55+
{
56+
var function = request.GetPropertyOrDefault<FunctionDescriptor>(ScriptConstants.AzureFunctionsHttpFunctionKey);
57+
_metricsLogger.LogEvent(MetricEventNames.FunctionInvokeThrottled, function.Name);
58+
59+
return base.RejectRequest(request);
60+
}
61+
}
62+
}
63+
#endif

src/WebJobs.Script/Config/ScriptHostConfiguration.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ public ScriptHostConfiguration()
2222
RootScriptPath = Environment.CurrentDirectory;
2323
RootLogPath = Path.Combine(Path.GetTempPath(), "Functions");
2424
LogFilter = new LogCategoryFilter();
25+
HostHealthMonitorEnabled = true;
2526
}
2627

2728
/// <summary>
@@ -86,6 +87,12 @@ public ScriptHostConfiguration()
8687
/// </summary>
8788
public TimeSpan? FunctionTimeout { get; set; }
8889

90+
/// <summary>
91+
/// Gets or sets a value indicating whether the hosting environment will be monitored
92+
/// for health (e.g. socket thresholds, etc.). Default is true.
93+
/// </summary>
94+
public bool HostHealthMonitorEnabled { get; set; }
95+
8996
/// <summary>
9097
/// Gets or sets a value indicating whether the host is running
9198
/// outside of the normal Azure hosting environment. E.g. when running

src/WebJobs.Script/Host/ScriptHost.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,12 @@ internal static void ApplyConfiguration(JObject config, ScriptHostConfiguration
15341534
}
15351535
}
15361536

1537+
JToken hostHealthMonitorEnabled = (JToken)config["hostHealthMonitorEnabled"];
1538+
if (hostHealthMonitorEnabled != null && hostHealthMonitorEnabled.Type == JTokenType.Boolean)
1539+
{
1540+
scriptConfig.HostHealthMonitorEnabled = (bool)hostHealthMonitorEnabled;
1541+
}
1542+
15371543
// Apply Singleton configuration
15381544
JObject configSection = (JObject)config["singleton"];
15391545
JToken value = null;

src/WebJobs.Script/Host/ScriptHostManager.cs

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Collections.Generic;
6+
using System.Collections.ObjectModel;
67
using System.Diagnostics;
78
using System.Globalization;
89
using System.IO;
@@ -15,6 +16,7 @@
1516
using Microsoft.Azure.WebJobs.Script.Config;
1617
using Microsoft.Azure.WebJobs.Script.Diagnostics;
1718
using Microsoft.Azure.WebJobs.Script.Eventing;
19+
using Microsoft.Azure.WebJobs.Script.Scale;
1820
using Microsoft.Extensions.Logging;
1921

2022
namespace Microsoft.Azure.WebJobs.Script
@@ -34,6 +36,9 @@ public class ScriptHostManager : IScriptHostEnvironment, IDisposable
3436
private readonly IScriptHostEnvironment _environment;
3537
private readonly IDisposable _fileEventSubscription;
3638
private readonly StructuredLogWriter _structuredLogWriter;
39+
private readonly HostPerformanceManager _performanceManager;
40+
private readonly Timer _hostHealthCheckTimer;
41+
private readonly TimeSpan hostHealthCheckInterval = TimeSpan.FromSeconds(15);
3742
private ScriptHost _currentInstance;
3843
private int _hostStartCount;
3944

@@ -57,8 +62,9 @@ public ScriptHostManager(
5762
ScriptHostConfiguration config,
5863
IScriptEventManager eventManager = null,
5964
IScriptHostEnvironment environment = null,
60-
ILoggerFactoryBuilder loggerFactoryBuilder = null)
61-
: this(config, ScriptSettingsManager.Instance, new ScriptHostFactory(), eventManager, environment, loggerFactoryBuilder)
65+
ILoggerFactoryBuilder loggerFactoryBuilder = null,
66+
HostPerformanceManager hostPerformanceManager = null)
67+
: this(config, ScriptSettingsManager.Instance, new ScriptHostFactory(), eventManager, environment, loggerFactoryBuilder, hostPerformanceManager)
6268
{
6369
if (config.FileWatchingEnabled)
6470
{
@@ -75,8 +81,19 @@ public ScriptHostManager(ScriptHostConfiguration config,
7581
IScriptHostFactory scriptHostFactory,
7682
IScriptEventManager eventManager = null,
7783
IScriptHostEnvironment environment = null,
78-
ILoggerFactoryBuilder loggerFactoryBuilder = null)
84+
ILoggerFactoryBuilder loggerFactoryBuilder = null,
85+
HostPerformanceManager hostPerformanceManager = null)
7986
{
87+
if (config == null)
88+
{
89+
throw new ArgumentNullException(nameof(config));
90+
}
91+
if (settingsManager == null)
92+
{
93+
throw new ArgumentNullException(nameof(settingsManager));
94+
}
95+
96+
scriptHostFactory = scriptHostFactory ?? new ScriptHostFactory();
8097
_environment = environment ?? this;
8198
_config = config;
8299
_settingsManager = settingsManager;
@@ -86,8 +103,16 @@ public ScriptHostManager(ScriptHostConfiguration config,
86103
EventManager = eventManager ?? new ScriptEventManager();
87104

88105
_structuredLogWriter = new StructuredLogWriter(EventManager, config.RootLogPath);
106+
_performanceManager = hostPerformanceManager ?? new HostPerformanceManager(settingsManager);
107+
108+
if (config.HostHealthMonitorEnabled && settingsManager.IsAzureEnvironment)
109+
{
110+
_hostHealthCheckTimer = new Timer(OnHostHealthCheckTimer, null, TimeSpan.Zero, hostHealthCheckInterval);
111+
}
89112
}
90113

114+
protected HostPerformanceManager PerformanceManager => _performanceManager;
115+
91116
protected IScriptEventManager EventManager { get; }
92117

93118
public virtual ScriptHost Instance
@@ -137,6 +162,8 @@ public bool CanInvoke()
137162
State = ScriptHostState.Default;
138163
}
139164

165+
OnHostStarting();
166+
140167
// Create a new host config, but keep the host id from existing one
141168
_config.HostConfig = new JobHostConfiguration(_settingsManager.Configuration)
142169
{
@@ -372,7 +399,12 @@ protected virtual void OnHostCreated()
372399
State = ScriptHostState.Created;
373400
}
374401

375-
protected virtual void OnHostStarted()
402+
protected virtual void OnHostStarting()
403+
{
404+
IsHostHealthy(throwWhenUnhealthy: true);
405+
}
406+
407+
protected virtual void OnHostStarted()
376408
{
377409
var metricsLogger = _config.HostConfig.GetService<IMetricsLogger>();
378410
metricsLogger.LogEvent(new HostStarted(Instance));
@@ -401,6 +433,7 @@ protected virtual void Dispose(bool disposing)
401433
}
402434
}
403435

436+
_hostHealthCheckTimer?.Dispose();
404437
_stopEvent.Dispose();
405438
_restartDelayTokenSource?.Dispose();
406439
_fileEventSubscription?.Dispose();
@@ -431,6 +464,39 @@ public virtual void Shutdown()
431464
Process.GetCurrentProcess().Close();
432465
}
433466

467+
private void OnHostHealthCheckTimer(object state)
468+
{
469+
if (State == ScriptHostState.Running && !IsHostHealthy())
470+
{
471+
// This periodic check allows us to break out of the host run
472+
// loop. The health check performed in OnHostStarting will then
473+
// fail and we'll enter a restart loop (exponentially backing off)
474+
// until the host is healthy again and we can resume host processing.
475+
RestartHost();
476+
}
477+
}
478+
479+
internal bool IsHostHealthy(bool throwWhenUnhealthy = false)
480+
{
481+
if (!_config.HostHealthMonitorEnabled || !_settingsManager.IsAzureEnvironment)
482+
{
483+
return true;
484+
}
485+
486+
var exceededCounters = new Collection<string>();
487+
if (PerformanceManager.IsUnderHighLoad(exceededCounters))
488+
{
489+
string formattedCounters = string.Join(", ", exceededCounters);
490+
if (throwWhenUnhealthy)
491+
{
492+
throw new InvalidOperationException($"Host thresholds exceeded: [{formattedCounters}]");
493+
}
494+
return false;
495+
}
496+
497+
return true;
498+
}
499+
434500
public async Task<bool> DelayUntilHostReady(int timeoutSeconds = HostCheckTimeoutSeconds, int pollingIntervalMilliseconds = HostCheckPollingIntervalMilliseconds)
435501
{
436502
TimeSpan timeout = TimeSpan.FromSeconds(timeoutSeconds);

src/WebJobs.Script/Diagnostics/ApplicationPerformanceCounters.cs renamed to src/WebJobs.Script/Scale/ApplicationPerformanceCounters.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the MIT License. See License.txt in the project root for license information.
33

4-
namespace Microsoft.Azure.WebJobs.Script.Diagnostics
4+
namespace Microsoft.Azure.WebJobs.Script.Scale
55
{
66
public class ApplicationPerformanceCounters
77
{

0 commit comments

Comments
 (0)