Skip to content

Commit 38461b9

Browse files
Copilotjongalloway
andcommitted
Add comprehensive health check system with endpoints and tests
Co-authored-by: jongalloway <[email protected]>
1 parent 4b5a1b7 commit 38461b9

12 files changed

+686
-0
lines changed

NLWebNet.sln

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{0AB3BF05
1515
EndProject
1616
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NLWebNet.Tests", "tests\NLWebNet.Tests\NLWebNet.Tests.csproj", "{21F486B2-CB3A-4D61-8C1F-FBCE3CA48CFE}"
1717
EndProject
18+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NLWebNet.Tests.MSTest", "tests\NLWebNet.Tests.MSTest\NLWebNet.Tests.MSTest.csproj", "{4155FF59-5F84-4597-BACA-4AE32519EC0F}"
19+
EndProject
1820
Global
1921
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2022
Debug|Any CPU = Debug|Any CPU
@@ -61,6 +63,18 @@ Global
6163
{21F486B2-CB3A-4D61-8C1F-FBCE3CA48CFE}.Release|x64.Build.0 = Release|Any CPU
6264
{21F486B2-CB3A-4D61-8C1F-FBCE3CA48CFE}.Release|x86.ActiveCfg = Release|Any CPU
6365
{21F486B2-CB3A-4D61-8C1F-FBCE3CA48CFE}.Release|x86.Build.0 = Release|Any CPU
66+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
67+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|Any CPU.Build.0 = Debug|Any CPU
68+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|x64.ActiveCfg = Debug|Any CPU
69+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|x64.Build.0 = Debug|Any CPU
70+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|x86.ActiveCfg = Debug|Any CPU
71+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Debug|x86.Build.0 = Debug|Any CPU
72+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|Any CPU.ActiveCfg = Release|Any CPU
73+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|Any CPU.Build.0 = Release|Any CPU
74+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|x64.ActiveCfg = Release|Any CPU
75+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|x64.Build.0 = Release|Any CPU
76+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|x86.ActiveCfg = Release|Any CPU
77+
{4155FF59-5F84-4597-BACA-4AE32519EC0F}.Release|x86.Build.0 = Release|Any CPU
6478
EndGlobalSection
6579
GlobalSection(SolutionProperties) = preSolution
6680
HideSolutionNode = FALSE
@@ -69,5 +83,6 @@ Global
6983
{1E458E72-D542-44BB-9F84-1EDE008FBB1D} = {827E0CD3-B72D-47B6-A68D-7590B98EB39B}
7084
{6F25FD99-AF67-4509-A46C-FCD450F6A775} = {A39C23D2-F2C0-258D-165A-CF1E7FEE6E7B}
7185
{21F486B2-CB3A-4D61-8C1F-FBCE3CA48CFE} = {0AB3BF05-4346-4AA6-1389-037BE0695223}
86+
{4155FF59-5F84-4597-BACA-4AE32519EC0F} = {0AB3BF05-4346-4AA6-1389-037BE0695223}
7287
EndGlobalSection
7388
EndGlobal
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
using Microsoft.AspNetCore.Builder;
2+
using Microsoft.AspNetCore.Http;
3+
using Microsoft.AspNetCore.Mvc;
4+
using Microsoft.AspNetCore.Routing;
5+
using Microsoft.Extensions.DependencyInjection;
6+
using Microsoft.Extensions.Diagnostics.HealthChecks;
7+
using Microsoft.Extensions.Logging;
8+
using System.Text.Json;
9+
10+
namespace NLWebNet.Endpoints;
11+
12+
/// <summary>
13+
/// Minimal API endpoints for health checks and monitoring
14+
/// </summary>
15+
public static class HealthEndpoints
16+
{
17+
/// <summary>
18+
/// Maps health check endpoints to the application
19+
/// </summary>
20+
/// <param name="app">The endpoint route builder</param>
21+
/// <returns>The endpoint route builder for chaining</returns>
22+
public static IEndpointRouteBuilder MapHealthEndpoints(this IEndpointRouteBuilder app)
23+
{
24+
// Basic health check endpoint
25+
app.MapGet("/health", GetBasicHealthAsync)
26+
.WithName("GetHealth")
27+
.WithTags("Health")
28+
.WithOpenApi(operation => new(operation)
29+
{
30+
Summary = "Basic health check",
31+
Description = "Returns the basic health status of the NLWebNet service"
32+
})
33+
.Produces<HealthCheckResponse>(StatusCodes.Status200OK)
34+
.Produces<HealthCheckResponse>(StatusCodes.Status503ServiceUnavailable);
35+
36+
// Detailed health check endpoint
37+
app.MapGet("/health/detailed", GetDetailedHealthAsync)
38+
.WithName("GetDetailedHealth")
39+
.WithTags("Health")
40+
.WithOpenApi(operation => new(operation)
41+
{
42+
Summary = "Detailed health check",
43+
Description = "Returns detailed health status including individual service checks"
44+
})
45+
.Produces<DetailedHealthCheckResponse>(StatusCodes.Status200OK)
46+
.Produces<DetailedHealthCheckResponse>(StatusCodes.Status503ServiceUnavailable);
47+
48+
return app;
49+
}
50+
51+
private static async Task<IResult> GetBasicHealthAsync(
52+
[FromServices] HealthCheckService healthCheckService,
53+
[FromServices] ILoggerFactory loggerFactory,
54+
CancellationToken cancellationToken = default)
55+
{
56+
var logger = loggerFactory.CreateLogger(nameof(HealthEndpoints));
57+
58+
try
59+
{
60+
var healthReport = await healthCheckService.CheckHealthAsync(cancellationToken);
61+
62+
var response = new HealthCheckResponse
63+
{
64+
Status = healthReport.Status.ToString(),
65+
TotalDuration = healthReport.TotalDuration
66+
};
67+
68+
var statusCode = healthReport.Status == HealthStatus.Healthy
69+
? StatusCodes.Status200OK
70+
: StatusCodes.Status503ServiceUnavailable;
71+
72+
logger.LogInformation("Health check completed with status: {Status}", healthReport.Status);
73+
74+
return Results.Json(response, statusCode: statusCode);
75+
}
76+
catch (Exception ex)
77+
{
78+
logger.LogError(ex, "Health check failed with exception");
79+
80+
var response = new HealthCheckResponse
81+
{
82+
Status = "Unhealthy",
83+
TotalDuration = TimeSpan.Zero
84+
};
85+
86+
return Results.Json(response, statusCode: StatusCodes.Status503ServiceUnavailable);
87+
}
88+
}
89+
90+
private static async Task<IResult> GetDetailedHealthAsync(
91+
[FromServices] HealthCheckService healthCheckService,
92+
[FromServices] ILoggerFactory loggerFactory,
93+
CancellationToken cancellationToken = default)
94+
{
95+
var logger = loggerFactory.CreateLogger(nameof(HealthEndpoints));
96+
97+
try
98+
{
99+
var healthReport = await healthCheckService.CheckHealthAsync(cancellationToken);
100+
101+
var response = new DetailedHealthCheckResponse
102+
{
103+
Status = healthReport.Status.ToString(),
104+
TotalDuration = healthReport.TotalDuration,
105+
Entries = healthReport.Entries.ToDictionary(
106+
kvp => kvp.Key,
107+
kvp => new HealthCheckEntry
108+
{
109+
Status = kvp.Value.Status.ToString(),
110+
Description = kvp.Value.Description,
111+
Duration = kvp.Value.Duration,
112+
Exception = kvp.Value.Exception?.Message,
113+
Data = kvp.Value.Data.Any() ? kvp.Value.Data : null
114+
})
115+
};
116+
117+
var statusCode = healthReport.Status == HealthStatus.Healthy
118+
? StatusCodes.Status200OK
119+
: StatusCodes.Status503ServiceUnavailable;
120+
121+
logger.LogInformation("Detailed health check completed with status: {Status}, Entries: {EntryCount}",
122+
healthReport.Status, healthReport.Entries.Count);
123+
124+
return Results.Json(response, statusCode: statusCode);
125+
}
126+
catch (Exception ex)
127+
{
128+
logger.LogError(ex, "Detailed health check failed with exception");
129+
130+
var response = new DetailedHealthCheckResponse
131+
{
132+
Status = "Unhealthy",
133+
TotalDuration = TimeSpan.Zero,
134+
Entries = new Dictionary<string, HealthCheckEntry>
135+
{
136+
["system"] = new HealthCheckEntry
137+
{
138+
Status = "Unhealthy",
139+
Description = "Health check system failure",
140+
Duration = TimeSpan.Zero,
141+
Exception = ex.Message
142+
}
143+
}
144+
};
145+
146+
return Results.Json(response, statusCode: StatusCodes.Status503ServiceUnavailable);
147+
}
148+
}
149+
}
150+
151+
/// <summary>
152+
/// Basic health check response
153+
/// </summary>
154+
public class HealthCheckResponse
155+
{
156+
public string Status { get; set; } = string.Empty;
157+
public TimeSpan TotalDuration { get; set; }
158+
}
159+
160+
/// <summary>
161+
/// Detailed health check response with individual service status
162+
/// </summary>
163+
public class DetailedHealthCheckResponse
164+
{
165+
public string Status { get; set; } = string.Empty;
166+
public TimeSpan TotalDuration { get; set; }
167+
public Dictionary<string, HealthCheckEntry> Entries { get; set; } = new();
168+
}
169+
170+
/// <summary>
171+
/// Individual health check entry details
172+
/// </summary>
173+
public class HealthCheckEntry
174+
{
175+
public string Status { get; set; } = string.Empty;
176+
public string? Description { get; set; }
177+
public TimeSpan Duration { get; set; }
178+
public string? Exception { get; set; }
179+
public IReadOnlyDictionary<string, object>? Data { get; set; }
180+
}

src/NLWebNet/Extensions/ApplicationBuilderExtensions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public static WebApplication MapNLWebNet(this WebApplication app)
2929
// Map minimal API endpoints directly
3030
AskEndpoints.MapAskEndpoints(app);
3131
McpEndpoints.MapMcpEndpoints(app);
32+
HealthEndpoints.MapHealthEndpoints(app);
3233

3334
return app;
3435
}
@@ -43,6 +44,7 @@ public static IEndpointRouteBuilder MapNLWebNet(this IEndpointRouteBuilder app)
4344
// Map minimal API endpoints directly
4445
AskEndpoints.MapAskEndpoints(app);
4546
McpEndpoints.MapMcpEndpoints(app);
47+
HealthEndpoints.MapHealthEndpoints(app);
4648

4749
return app;
4850
}

src/NLWebNet/Extensions/ServiceCollectionExtensions.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
using Microsoft.Extensions.DependencyInjection;
2+
using Microsoft.Extensions.Diagnostics.HealthChecks;
23
using NLWebNet.Models;
34
using NLWebNet.Services;
45
using NLWebNet.MCP;
56
using NLWebNet.Controllers;
7+
using NLWebNet.Health;
68

79
namespace NLWebNet;
810

@@ -38,6 +40,12 @@ public static IServiceCollection AddNLWebNet(this IServiceCollection services, A
3840
services.AddTransient<AskController>();
3941
services.AddTransient<McpController>();
4042

43+
// Add health checks
44+
services.AddHealthChecks()
45+
.AddCheck<NLWebHealthCheck>("nlweb")
46+
.AddCheck<DataBackendHealthCheck>("data-backend")
47+
.AddCheck<AIServiceHealthCheck>("ai-service");
48+
4149
return services;
4250
}
4351

@@ -62,9 +70,18 @@ public static IServiceCollection AddNLWebNet<TDataBackend>(this IServiceCollecti
6270
services.AddScoped<IQueryProcessor, QueryProcessor>();
6371
services.AddScoped<IResultGenerator, ResultGenerator>();
6472

73+
// Register MCP services
74+
services.AddScoped<IMcpService, McpService>();
75+
6576
// Register custom data backend
6677
services.AddScoped<IDataBackend, TDataBackend>();
6778

79+
// Add health checks
80+
services.AddHealthChecks()
81+
.AddCheck<NLWebHealthCheck>("nlweb")
82+
.AddCheck<DataBackendHealthCheck>("data-backend")
83+
.AddCheck<AIServiceHealthCheck>("ai-service");
84+
6885
return services;
6986
}
7087
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
using Microsoft.Extensions.Diagnostics.HealthChecks;
2+
using Microsoft.Extensions.Logging;
3+
using NLWebNet.MCP;
4+
5+
namespace NLWebNet.Health;
6+
7+
/// <summary>
8+
/// Health check for AI/MCP service connectivity
9+
/// </summary>
10+
public class AIServiceHealthCheck : IHealthCheck
11+
{
12+
private readonly IMcpService _mcpService;
13+
private readonly ILogger<AIServiceHealthCheck> _logger;
14+
15+
public AIServiceHealthCheck(IMcpService mcpService, ILogger<AIServiceHealthCheck> logger)
16+
{
17+
_mcpService = mcpService ?? throw new ArgumentNullException(nameof(mcpService));
18+
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
19+
}
20+
21+
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
22+
{
23+
try
24+
{
25+
_logger.LogDebug("Performing AI service health check");
26+
27+
// Check if the MCP service is responsive
28+
if (_mcpService == null)
29+
{
30+
return HealthCheckResult.Unhealthy("AI/MCP service is not available");
31+
}
32+
33+
// Test basic connectivity by checking available tools
34+
// This is a lightweight operation that validates the service is operational
35+
var toolsResult = await _mcpService.ListToolsAsync(cancellationToken);
36+
37+
if (toolsResult == null)
38+
{
39+
return HealthCheckResult.Degraded("AI/MCP service responded but returned null tools list");
40+
}
41+
42+
_logger.LogDebug("AI service health check completed successfully");
43+
return HealthCheckResult.Healthy("AI/MCP service is operational");
44+
}
45+
catch (Exception ex)
46+
{
47+
_logger.LogError(ex, "AI service health check failed");
48+
return HealthCheckResult.Unhealthy($"AI service health check failed: {ex.Message}", ex);
49+
}
50+
}
51+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
using Microsoft.Extensions.Diagnostics.HealthChecks;
2+
using Microsoft.Extensions.Logging;
3+
using NLWebNet.Services;
4+
5+
namespace NLWebNet.Health;
6+
7+
/// <summary>
8+
/// Health check for data backend connectivity
9+
/// </summary>
10+
public class DataBackendHealthCheck : IHealthCheck
11+
{
12+
private readonly IDataBackend _dataBackend;
13+
private readonly ILogger<DataBackendHealthCheck> _logger;
14+
15+
public DataBackendHealthCheck(IDataBackend dataBackend, ILogger<DataBackendHealthCheck> logger)
16+
{
17+
_dataBackend = dataBackend ?? throw new ArgumentNullException(nameof(dataBackend));
18+
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
19+
}
20+
21+
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default)
22+
{
23+
try
24+
{
25+
_logger.LogDebug("Performing data backend health check");
26+
27+
// Check if the data backend is responsive
28+
if (_dataBackend == null)
29+
{
30+
return HealthCheckResult.Unhealthy("Data backend is not available");
31+
}
32+
33+
// Test basic connectivity by attempting a simple query
34+
// This is a lightweight check that doesn't impact performance
35+
var testResults = await _dataBackend.SearchAsync("health-check", cancellationToken: cancellationToken);
36+
37+
// The search should complete without throwing an exception
38+
// We don't care about the results, just that the backend is responsive
39+
40+
_logger.LogDebug("Data backend health check completed successfully");
41+
return HealthCheckResult.Healthy($"Data backend ({_dataBackend.GetType().Name}) is operational");
42+
}
43+
catch (NotImplementedException)
44+
{
45+
// Some backends might not implement SearchAsync
46+
_logger.LogDebug("Data backend doesn't support SearchAsync, checking availability only");
47+
return HealthCheckResult.Healthy($"Data backend ({_dataBackend.GetType().Name}) is available (limited functionality)");
48+
}
49+
catch (Exception ex)
50+
{
51+
_logger.LogError(ex, "Data backend health check failed");
52+
return HealthCheckResult.Unhealthy($"Data backend health check failed: {ex.Message}", ex);
53+
}
54+
}
55+
}

0 commit comments

Comments
 (0)