Skip to content

Commit aea8465

Browse files
mauroservientibordingandreasohlund
authored
Add a RavenDB dirty memory custom check (#4868)
* Add an initial draft of a dirty memory custom check * Add some memory analysis * Use the database configuration to get the server URL * Add more logging * Add a to-do * minor tweaks * Because editorconfig * Use better variable names * Refactor the memory information retriever to check the content schema * Register the MemoryInformationRetriever in DI * Fix formatting * Update the HTTP GET URL to trim the response size * Make the custom check work in both embedded and external mode * Custom checks approved list * Update log statements to mention RavenDB * Properly invert custom check id and category * Add the CheckDirtyMemory custom check to the primary instance * Link to the troubleshooting guidance page * Fix casing * Remove the trends evaluation from the dirty memory custom check It'll be added in a separate PR * Deep link to guidance * Reword custom check and log warning message Co-authored-by: Andreas Öhlund <[email protected]> * Rename the dirty memory custom check ID * reword comments --------- Co-authored-by: Brandon Ording <[email protected]> Co-authored-by: Andreas Öhlund <[email protected]>
1 parent 17d5c3e commit aea8465

File tree

9 files changed

+139
-0
lines changed

9 files changed

+139
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace ServiceControl.Audit.Persistence.RavenDB.CustomChecks;
2+
3+
using System;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using NServiceBus.CustomChecks;
7+
using NServiceBus.Logging;
8+
9+
class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl.Audit Health", TimeSpan.FromMinutes(5))
10+
{
11+
public override async Task<CheckResult> PerformCheck(CancellationToken cancellationToken = default)
12+
{
13+
var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken);
14+
15+
if (isHighDirty)
16+
{
17+
var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue.";
18+
Log.Warn(message);
19+
return CheckResult.Failed(message);
20+
}
21+
22+
return CheckResult.Pass;
23+
}
24+
25+
static readonly ILog Log = LogManager.GetLogger<CheckDirtyMemory>();
26+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
namespace ServiceControl.Audit.Persistence.RavenDB;
2+
3+
using System;
4+
using System.Net.Http;
5+
using System.Text.Json;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
class MemoryInformationRetriever(DatabaseConfiguration databaseConfiguration)
10+
{
11+
// What does a connection string look like? Is it only a URI or could it contain other stuff?
12+
// The ?? operator is needed because ServerUrl is populated when running embedded and connection
13+
// string when running in external mode. However, the tricky part is that when tests are run they
14+
// behave like if it was external mode. If the connection string contain always only the server
15+
// URL, this code is safe, otherwise it need to be adjusted to extract the server URL.
16+
readonly HttpClient client = new() { BaseAddress = new Uri(databaseConfiguration.ServerConfiguration.ServerUrl ?? databaseConfiguration.ServerConfiguration.ConnectionString) };
17+
18+
record ResponseDto
19+
{
20+
public MemoryInformation MemoryInformation { get; set; }
21+
}
22+
23+
record MemoryInformation
24+
{
25+
public bool IsHighDirty { get; set; }
26+
public string DirtyMemory { get; set; }
27+
}
28+
29+
public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default)
30+
{
31+
var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken);
32+
var responseDto = JsonSerializer.Deserialize<ResponseDto>(await httpResponse.Content.ReadAsStringAsync(cancellationToken));
33+
34+
var values = responseDto.MemoryInformation.DirtyMemory.Split(' ');
35+
if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase))
36+
{
37+
throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}");
38+
}
39+
return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0]));
40+
}
41+
}

src/ServiceControl.Audit.Persistence.RavenDB/RavenPersistence.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public void AddPersistence(IServiceCollection services)
2121
static void ConfigureLifecycle(IServiceCollection services, DatabaseConfiguration databaseConfiguration)
2222
{
2323
services.AddSingleton(databaseConfiguration);
24+
services.AddSingleton<MemoryInformationRetriever>();
2425

2526
services.AddSingleton<IRavenSessionProvider, RavenSessionProvider>();
2627
services.AddHostedService<RavenPersistenceLifecycleHostedService>();
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
ServiceControl.Audit Health: Audit Database Index Lag
22
ServiceControl.Audit Health: Audit Message Ingestion Process
3+
ServiceControl.Audit Health: RavenDB dirty memory
34
Storage space: ServiceControl.Audit database
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace ServiceControl.Persistence.RavenDB.CustomChecks;
2+
3+
using System;
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using NServiceBus.CustomChecks;
7+
using NServiceBus.Logging;
8+
9+
class CheckDirtyMemory(MemoryInformationRetriever memoryInformationRetriever) : CustomCheck("RavenDB dirty memory", "ServiceControl Health", TimeSpan.FromMinutes(5))
10+
{
11+
public override async Task<CheckResult> PerformCheck(CancellationToken cancellationToken = default)
12+
{
13+
var (isHighDirty, dirtyMemoryKb) = await memoryInformationRetriever.GetMemoryInformation(cancellationToken);
14+
15+
if (isHighDirty)
16+
{
17+
var message = $"There is a high level of RavenDB dirty memory ({dirtyMemoryKb}kb). See https://docs.particular.net/servicecontrol/troubleshooting#ravendb-dirty-memory for guidance on how to mitigate the issue.";
18+
Log.Warn(message);
19+
return CheckResult.Failed(message);
20+
}
21+
22+
return CheckResult.Pass;
23+
}
24+
25+
static readonly ILog Log = LogManager.GetLogger<CheckDirtyMemory>();
26+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
namespace ServiceControl.Persistence.RavenDB;
2+
3+
using System;
4+
using System.Net.Http;
5+
using System.Text.Json;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
class MemoryInformationRetriever(RavenPersisterSettings persisterSettings)
10+
{
11+
// What does a connection string look like? Is it only a URI or could it contain other stuff?
12+
// The primary instance has only the concept of a connection string (vs the Audit instance having
13+
// both a ServiceUrl and a ConnectionString). If the connection string contain always only the
14+
// server URL, this code is safe, otherwise it need to be adjusted to extract the server URL.
15+
readonly HttpClient client = new() { BaseAddress = new Uri(persisterSettings.ConnectionString) };
16+
17+
record ResponseDto
18+
{
19+
public MemoryInformation MemoryInformation { get; set; }
20+
}
21+
22+
record MemoryInformation
23+
{
24+
public bool IsHighDirty { get; set; }
25+
public string DirtyMemory { get; set; }
26+
}
27+
28+
public async Task<(bool IsHighDirty, int DirtyMemoryKb)> GetMemoryInformation(CancellationToken cancellationToken = default)
29+
{
30+
var httpResponse = await client.GetAsync("/admin/debug/memory/stats?includeThreads=false&includeMappings=false", cancellationToken);
31+
var responseDto = JsonSerializer.Deserialize<ResponseDto>(await httpResponse.Content.ReadAsStringAsync(cancellationToken));
32+
33+
var values = responseDto.MemoryInformation.DirtyMemory.Split(' ');
34+
if (!string.Equals(values[1], "KBytes", StringComparison.OrdinalIgnoreCase))
35+
{
36+
throw new InvalidOperationException($"Unexpected response. Was expecting memory details in KBytes, instead received: {responseDto.MemoryInformation.DirtyMemory}");
37+
}
38+
return (responseDto.MemoryInformation.IsHighDirty, int.Parse(values[0]));
39+
}
40+
}

src/ServiceControl.Persistence.RavenDB/RavenPersistence.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ public void AddPersistence(IServiceCollection services)
4949
services.AddCustomCheck<CheckRavenDBIndexLag>();
5050
services.AddCustomCheck<CheckFreeDiskSpace>();
5151
services.AddCustomCheck<CheckMinimumStorageRequiredForIngestion>();
52+
services.AddCustomCheck<CheckDirtyMemory>();
5253

54+
services.AddSingleton<MemoryInformationRetriever>();
5355
services.AddSingleton<OperationsManager>();
5456

5557
services.AddSingleton<IArchiveMessages, MessageArchiver>();
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ServiceControl Health: Error Database Index Errors
22
ServiceControl Health: Error Database Index Lag
33
ServiceControl Health: Message Ingestion Process
4+
ServiceControl Health: RavenDB dirty memory
45
Storage space: ServiceControl database
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
ServiceControl Health: Error Database Index Errors
22
ServiceControl Health: Error Database Index Lag
33
ServiceControl Health: Message Ingestion Process
4+
ServiceControl Health: RavenDB dirty memory
45
Storage space: ServiceControl database

0 commit comments

Comments
 (0)