Skip to content

Commit 3330cd7

Browse files
authored
[HealthChecks] Add HealthChecks endpoint, register telemetry publisher (#11341)
* Hook up HealthCheckTelemetryPublisher * Add /runtime/health/ endpoint * Update health check tests * Change HealthCheckMetrics meter name * Update release_notes.md * Require admin auth for health checks * Add HealthCheck E2E tests * remove host.json * Add newlines * Fix empty lines * Move health API under /admin/health * Add assertion message for failing test
1 parent 50491ff commit 3330cd7

File tree

10 files changed

+164
-28
lines changed

10 files changed

+164
-28
lines changed

release_notes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@
1616
- Reduce allocations in `Utility.IsAzureMonitorLoggingEnabled` (#11323)
1717
- Update PowerShell worker to [4.0.4581](https://github.com/Azure/azure-functions-powershell-worker/releases/tag/v4.0.4581)
1818
- Bug fix that fails in-flight invocations when a worker channel shuts down (#11159)
19+
- Adds WebHost and ScriptHost health checks. (#11341, #11183, #11178, #11173, #11161)
1920
- Update Node.js Worker Version to [3.12.0](https://github.com/Azure/azure-functions-nodejs-worker/releases/tag/v3.12.0)

src/WebJobs.Script.WebHost/Controllers/FunctionsController.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
using Microsoft.AspNetCore.Http.Extensions;
1313
using Microsoft.AspNetCore.Mvc;
1414
using Microsoft.Azure.WebJobs.Extensions.Http;
15-
using Microsoft.Azure.WebJobs.Host;
1615
using Microsoft.Azure.WebJobs.Script.Description;
1716
using Microsoft.Azure.WebJobs.Script.Management.Models;
1817
using Microsoft.Azure.WebJobs.Script.WebHost.Extensions;
@@ -28,7 +27,7 @@ namespace Microsoft.Azure.WebJobs.Script.WebHost.Controllers
2827
{
2928
/// <summary>
3029
/// Controller responsible for administrative and management operations on functions
31-
/// example retrieving a list of functions, invoking a function, creating a function, etc
30+
/// example retrieving a list of functions, invoking a function, creating a function, etc.
3231
/// </summary>
3332
public class FunctionsController : Controller
3433
{

src/WebJobs.Script.WebHost/Controllers/HostController.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
using Microsoft.Azure.WebJobs.Script.Diagnostics;
2020
using Microsoft.Azure.WebJobs.Script.ExtensionBundle;
2121
using Microsoft.Azure.WebJobs.Script.Scale;
22-
using Microsoft.Azure.WebJobs.Script.WebHost.Extensions;
2322
using Microsoft.Azure.WebJobs.Script.WebHost.Filters;
2423
using Microsoft.Azure.WebJobs.Script.WebHost.Management;
2524
using Microsoft.Azure.WebJobs.Script.WebHost.Models;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.Threading.Tasks;
6+
using Microsoft.AspNetCore.Authentication;
7+
using Microsoft.AspNetCore.Authorization;
8+
using Microsoft.AspNetCore.Authorization.Policy;
9+
using Microsoft.AspNetCore.Http;
10+
using Microsoft.Azure.WebJobs.Script.WebHost.Security.Authorization.Policies;
11+
12+
namespace Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.HealthChecks
13+
{
14+
public sealed class HealthCheckAuthMiddleware(
15+
RequestDelegate next, IPolicyEvaluator policy, IAuthorizationPolicyProvider provider)
16+
{
17+
private readonly RequestDelegate _next = next ?? throw new ArgumentNullException(nameof(next));
18+
private readonly IPolicyEvaluator _policy = policy ?? throw new ArgumentNullException(nameof(policy));
19+
private readonly IAuthorizationPolicyProvider _provider = provider ?? throw new ArgumentNullException(nameof(provider));
20+
21+
public async Task InvokeAsync(HttpContext context)
22+
{
23+
ArgumentNullException.ThrowIfNull(context);
24+
25+
AuthorizationPolicy policy = await _provider.GetPolicyAsync(PolicyNames.AdminAuthLevel)
26+
.ConfigureAwait(false);
27+
28+
AuthenticateResult authentication = await _policy.AuthenticateAsync(policy, context)
29+
.ConfigureAwait(false);
30+
31+
if (!authentication.Succeeded)
32+
{
33+
context.Response.StatusCode = StatusCodes.Status401Unauthorized;
34+
return;
35+
}
36+
37+
PolicyAuthorizationResult authorization = await _policy.AuthorizeAsync(
38+
policy, authentication, context, null).ConfigureAwait(false);
39+
40+
if (!authorization.Succeeded)
41+
{
42+
context.Response.StatusCode = StatusCodes.Status403Forbidden;
43+
return;
44+
}
45+
46+
await _next(context);
47+
}
48+
}
49+
}

src/WebJobs.Script.WebHost/WebJobsApplicationBuilderExtension.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33

44
using System;
55
using Microsoft.AspNetCore.Builder;
6+
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
67
using Microsoft.AspNetCore.Hosting;
8+
using Microsoft.AspNetCore.Http;
79
using Microsoft.Azure.WebJobs.Extensions.Http;
810
using Microsoft.Azure.WebJobs.Script.Extensions;
911
using Microsoft.Azure.WebJobs.Script.WebHost.Configuration;
12+
using Microsoft.Azure.WebJobs.Script.WebHost.Diagnostics.HealthChecks;
1013
using Microsoft.Azure.WebJobs.Script.WebHost.Middleware;
1114
using Microsoft.Extensions.DependencyInjection;
1215
using Microsoft.Extensions.Options;
@@ -41,6 +44,10 @@ public static IApplicationBuilder UseWebJobsScriptHost(this IApplicationBuilder
4144
builder.UseMiddleware<SystemTraceMiddleware>();
4245
builder.UseMiddleware<HandleCancellationMiddleware>();
4346
builder.UseMiddleware<HostnameFixupMiddleware>();
47+
48+
// Health is registered early in the pipeline to ensure it can avoid failures from the rest of the pipeline.
49+
builder.UseHealthChecks();
50+
4451
if (environment.IsAnyLinuxConsumption() || environment.IsAnyKubernetesEnvironment())
4552
{
4653
builder.UseMiddleware<EnvironmentReadyCheckMiddleware>();
@@ -116,5 +123,44 @@ public static IApplicationBuilder UseWebJobsScriptHost(this IApplicationBuilder
116123

117124
return builder;
118125
}
126+
127+
private static void UseHealthChecks(this IApplicationBuilder app)
128+
{
129+
// To start we are putting health under 'admin' to:
130+
// 1. Avoid conflicts with function routes.
131+
// 2. Allow for the same auth model as other admin APIs.
132+
// 3. Ensure this is always available to platform callers.
133+
// 4. Bypass easy-auth auth.
134+
const string healthPrefix = "/admin/health";
135+
static bool Predicate(HttpContext context)
136+
{
137+
return context.Request.Path.StartsWithSegments(healthPrefix);
138+
}
139+
140+
app.MapWhen(Predicate, app =>
141+
{
142+
app.UseMiddleware<HealthCheckAuthMiddleware>();
143+
144+
// This supports the ?wait={seconds} query string.
145+
app.UseMiddleware<HealthCheckWaitMiddleware>();
146+
147+
app.UseHealthChecks(healthPrefix, new HealthCheckOptions
148+
{
149+
ResponseWriter = HealthCheckResponseWriter.WriteResponseAsync,
150+
});
151+
152+
app.UseHealthChecks($"{healthPrefix}/live", new HealthCheckOptions
153+
{
154+
Predicate = r => r.Tags.Contains("az.functions.liveness"),
155+
ResponseWriter = HealthCheckResponseWriter.WriteResponseAsync,
156+
});
157+
158+
app.UseHealthChecks($"{healthPrefix}/ready", new HealthCheckOptions
159+
{
160+
Predicate = r => r.Tags.Contains("az.functions.readiness"),
161+
ResponseWriter = HealthCheckResponseWriter.WriteResponseAsync,
162+
});
163+
});
164+
}
119165
}
120166
}

src/WebJobs.Script/Diagnostics/HealthChecks/HealthCheckExtensions.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ public static IHealthChecksBuilder AddWebJobsScriptHealthChecks(this IHealthChec
2525
ArgumentNullException.ThrowIfNull(builder);
2626
builder
2727
.AddWebHostHealthCheck()
28-
.AddScriptHostHealthCheck();
28+
.AddScriptHostHealthCheck()
29+
.AddTelemetryPublisher(HealthCheckTags.Liveness, HealthCheckTags.Readiness);
2930
return builder;
3031
}
3132

src/WebJobs.Script/Diagnostics/HealthChecks/HealthCheckMetrics.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Diagnostics.Metrics;
6+
using Microsoft.Azure.WebJobs.Script.Metrics;
67

78
namespace Microsoft.Azure.WebJobs.Script.Diagnostics.HealthChecks
89
{
@@ -22,7 +23,7 @@ public HealthCheckMetrics(IMeterFactory meterFactory)
2223
// We don't dispose the meter because IMeterFactory handles that
2324
// An issue on analyzer side: https://github.com/dotnet/roslyn-analyzers/issues/6912
2425
// Related documentation: https://github.com/dotnet/docs/pull/37170
25-
Meter meter = meterFactory.Create("Microsoft.Azure.WebJobs.Script");
26+
Meter meter = meterFactory.Create(HostMetrics.FaasMeterName, HostMetrics.FaasMeterVersion);
2627
#pragma warning restore CA2000 // Dispose objects before losing scope
2728

2829
HealthCheckReport = HealthCheckMetricsGeneration.CreateHealthCheckReportHistogram(meter);

src/WebJobs.Script/Metrics/HostMetrics.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ public class HostMetrics : IHostMetrics
2525
// FaaS Metrics
2626
public const string FaasInvokeDuration = "faas.invoke_duration";
2727

28+
public static readonly string FaasMeterVersion = typeof(HostMetrics).Assembly.GetName().Version?.ToString();
29+
2830
private Counter<long> _appFailureCount;
2931
private Counter<long> _startedInvocationCount;
3032
private Histogram<double> _faasInvokeDuration;
@@ -65,7 +67,7 @@ public HostMetrics(IMeterFactory meterFactory, IEnvironment environment, ILogger
6567
_appFailureCount = meter.CreateCounter<long>(AppFailureCount, "numeric", "Number of times the host has failed to start.");
6668
_startedInvocationCount = meter.CreateCounter<long>(StartedInvocationCount, "numeric", "Number of function invocations that have started.");
6769

68-
var faasMeter = meterFactory.Create(new MeterOptions(FaasMeterName));
70+
var faasMeter = meterFactory.Create(new MeterOptions(FaasMeterName) { Version = FaasMeterVersion });
6971
_faasInvokeDuration = faasMeter.CreateHistogram<double>(
7072
name: FaasInvokeDuration,
7173
unit: "s",

test/WebJobs.Script.Tests.Integration/WebHostEndToEnd/SamplesEndToEndTests_CSharp.cs

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.Net;
99
using System.Net.Http;
1010
using System.Net.Http.Headers;
11+
using System.Net.Http.Json;
1112
using System.Reflection;
1213
using System.Text;
1314
using System.Threading;
@@ -316,6 +317,35 @@ public async Task HostPing_Succeeds(string method)
316317
Assert.Equal("no-store, no-cache", cacheHeader);
317318
}
318319

320+
[Theory]
321+
[InlineData("/admin/health")]
322+
[InlineData("/admin/health/live")]
323+
[InlineData("/admin/health/ready")]
324+
public async Task HealthCheck_AdminToken_Succeeds(string uri)
325+
{
326+
// token specified as bearer token
327+
HttpRequestMessage request = new(HttpMethod.Get, uri);
328+
string token = _fixture.Host.GenerateAdminJwtToken();
329+
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", token);
330+
HttpResponseMessage response = await _fixture.Host.HttpClient.SendAsync(request);
331+
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
332+
333+
string body = await response.Content.ReadAsStringAsync();
334+
Assert.Equal("{\"status\":\"Healthy\"}", body);
335+
}
336+
337+
[Theory]
338+
[InlineData("/admin/health")]
339+
[InlineData("/admin/health/live")]
340+
[InlineData("/admin/health/ready")]
341+
public async Task HealthCheck_NoAdminToken_Fail(string uri)
342+
{
343+
// token specified as bearer token
344+
HttpRequestMessage request = new(HttpMethod.Get, uri);
345+
HttpResponseMessage response = await _fixture.Host.HttpClient.SendAsync(request);
346+
Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode);
347+
}
348+
319349
[Fact]
320350
public async Task InstallExtensionsEnsureOldPathReturns404()
321351
{
@@ -497,8 +527,8 @@ public async Task HostLog_AdminLevel_Succeeds()
497527
{
498528
var request = new HttpRequestMessage(HttpMethod.Post, "admin/host/log");
499529
request.Headers.Add(AuthenticationLevelHandler.FunctionsKeyHeaderName, await _fixture.Host.GetMasterKeyAsync());
500-
var logs = new HostLogEntry[]
501-
{
530+
HostLogEntry[] logs =
531+
[
502532
new HostLogEntry
503533
{
504534
Level = System.Diagnostics.TraceLevel.Verbose,
@@ -524,13 +554,9 @@ public async Task HostLog_AdminLevel_Succeeds()
524554
FunctionName = "TestFunction",
525555
Message = string.Format("Test Error log {0}", Guid.NewGuid().ToString())
526556
}
527-
};
528-
var serializer = new JsonSerializer();
529-
var writer = new StringWriter();
530-
serializer.Serialize(writer, logs);
531-
var json = writer.ToString();
532-
request.Content = new StringContent(json);
533-
request.Content.Headers.ContentType = new MediaTypeHeaderValue("application/json");
557+
];
558+
559+
request.Content = JsonContent.Create(logs);
534560

535561
var response = await _fixture.Host.HttpClient.SendAsync(request);
536562
Assert.Equal(HttpStatusCode.OK, response.StatusCode);
@@ -540,7 +566,9 @@ public async Task HostLog_AdminLevel_Succeeds()
540566
var hostLogs = _fixture.Host.GetScriptHostLogMessages();
541567
foreach (var expectedLog in logs.Select(p => p.Message))
542568
{
543-
Assert.Equal(1, hostLogs.Count(p => p.FormattedMessage != null && p.FormattedMessage.Contains(expectedLog)));
569+
Assert.True(
570+
1 == hostLogs.Count(p => p.FormattedMessage != null && p.FormattedMessage.Contains(expectedLog)),
571+
$"Expected log message '{expectedLog}' not found. Log count: {hostLogs.Count}");
544572
}
545573
}
546574

test/WebJobs.Script.Tests/Diagnostics/HealthChecks/HealthCheckExtensionsTests.cs

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,12 @@ public void AddScriptHostHealthCheck_ThrowsOnNullBuilder()
5050
}
5151

5252
[Fact]
53-
public void AddWebJobsScriptHealthChecks_RegistersBothHealthChecks()
53+
public void AddWebJobsScriptHealthChecks_RegistersExpectedServices()
5454
{
5555
// arrange
56+
ServiceCollection services = new();
5657
Mock<IHealthChecksBuilder> builder = new(MockBehavior.Strict);
58+
builder.Setup(b => b.Services).Returns(services);
5759
builder.Setup(b => b.Add(It.IsAny<HealthCheckRegistration>())).Returns(builder.Object);
5860

5961
// act
@@ -67,7 +69,10 @@ public void AddWebJobsScriptHealthChecks_RegistersBothHealthChecks()
6769
builder.Verify(b => b.Add(IsRegistration<ScriptHostHealthCheck>(
6870
HealthCheckNames.ScriptHostLifeCycle, HealthCheckTags.Readiness)),
6971
Times.Once);
72+
builder.Verify(b => b.Services, Times.AtLeastOnce);
7073
builder.VerifyNoOtherCalls();
74+
75+
VerifyPublishers(services, null, HealthCheckTags.Liveness, HealthCheckTags.Readiness);
7176
}
7277

7378
[Fact]
@@ -216,17 +221,7 @@ public void AddTelemetryPublisher_RegistersExpected(string[] tags, string[] expe
216221
builder.AddTelemetryPublisher(tags);
217222

218223
// assert
219-
services.Where(x => x.ServiceType == typeof(IHealthCheckPublisher)).Should().HaveCount(expected.Length)
220-
.And.AllSatisfy(x => x.Lifetime.Should().Be(ServiceLifetime.Singleton));
221-
222-
ServiceProvider provider = services.BuildServiceProvider();
223-
IEnumerable<IHealthCheckPublisher> publishers = provider.GetServices<IHealthCheckPublisher>();
224-
225-
publishers.Should().HaveCount(expected.Length);
226-
foreach (string tag in expected)
227-
{
228-
publishers.Should().ContainSingle(p => VerifyPublisher(p, tag));
229-
}
224+
VerifyPublishers(services, expected);
230225
}
231226

232227
private static HealthCheckRegistration IsRegistration<T>(string name, string tag)
@@ -248,6 +243,21 @@ static bool IsType(HealthCheckRegistration registration)
248243
});
249244
}
250245

246+
private static void VerifyPublishers(IServiceCollection services, params string[] tags)
247+
{
248+
services.Where(x => x.ServiceType == typeof(IHealthCheckPublisher)).Should().HaveCount(tags.Length)
249+
.And.AllSatisfy(x => x.Lifetime.Should().Be(ServiceLifetime.Singleton));
250+
251+
ServiceProvider provider = services.BuildServiceProvider();
252+
IEnumerable<IHealthCheckPublisher> publishers = provider.GetServices<IHealthCheckPublisher>();
253+
254+
publishers.Should().HaveCount(tags.Length);
255+
foreach (string tag in tags)
256+
{
257+
publishers.Should().ContainSingle(p => VerifyPublisher(p, tag));
258+
}
259+
}
260+
251261
private static bool VerifyPublisher(IHealthCheckPublisher publisher, string tag)
252262
{
253263
return publisher is TelemetryHealthCheckPublisher telemetryPublisher

0 commit comments

Comments
 (0)