Skip to content

Commit c84de9a

Browse files
authored
Improve memory diagnoser accuracy (#2562)
* Added `AggressiveOptimization` to methods involved in measuring allocations. Warm up allocation measurement before taking actual measurement. Isolated allocation measurement. Changed some `RuntimeInformation` properties to static readonly fields. Removed enable monitoring in Engine (GcStats handles it). Removed `GC.Collect()` from allocation measurement. Sleep thread to account for tiered jit in Core runtimes 3.0 to 6.0. Updated MemoryDiagnoserTests. Block finalizer thread during memory tests. Disabled EventSource for integration tests. * Use Monitor.Wait instead of ManualResetEventSlim. * Swap IsWasm and IsNativeAOT declaration order.
1 parent c9548fa commit c84de9a

File tree

5 files changed

+244
-98
lines changed

5 files changed

+244
-98
lines changed

src/BenchmarkDotNet/Engines/Engine.cs

Lines changed: 104 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Globalization;
55
using System.Linq;
66
using System.Runtime.CompilerServices;
7+
using System.Threading;
78
using BenchmarkDotNet.Characteristics;
89
using BenchmarkDotNet.Environments;
910
using BenchmarkDotNet.Jobs;
@@ -220,31 +221,56 @@ private ClockSpan Measure(Action<long> action, long invokeCount)
220221

221222
private (GcStats, ThreadingStats, double) GetExtraStats(IterationData data)
222223
{
223-
// we enable monitoring after main target run, for this single iteration which is executed at the end
224-
// so even if we enable AppDomain monitoring in separate process
225-
// it does not matter, because we have already obtained the results!
226-
EnableMonitoring();
224+
// Warm up the measurement functions before starting the actual measurement.
225+
DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadInitial());
226+
DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadFinal());
227227

228228
IterationSetupAction(); // we run iteration setup first, so even if it allocates, it is not included in the results
229229

230230
var initialThreadingStats = ThreadingStats.ReadInitial(); // this method might allocate
231231
var exceptionsStats = new ExceptionsStats(); // allocates
232232
exceptionsStats.StartListening(); // this method might allocate
233-
var initialGcStats = GcStats.ReadInitial();
234233

235-
WorkloadAction(data.InvokeCount / data.UnrollFactor);
234+
#if !NET7_0_OR_GREATER
235+
if (RuntimeInformation.IsNetCore && Environment.Version.Major is >= 3 and <= 6 && RuntimeInformation.IsTieredJitEnabled)
236+
{
237+
// #1542
238+
// We put the current thread to sleep so tiered jit can kick in, compile its stuff,
239+
// and NOT allocate anything on the background thread when we are measuring allocations.
240+
// This is only an issue on netcoreapp3.0 to net6.0. Tiered jit allocations were "fixed" in net7.0
241+
// (maybe not completely eliminated forever, but at least reduced to a point where measurements are much more stable),
242+
// and netcoreapp2.X uses only GetAllocatedBytesForCurrentThread which doesn't capture the tiered jit allocations.
243+
Thread.Sleep(TimeSpan.FromMilliseconds(500));
244+
}
245+
#endif
236246

237-
exceptionsStats.Stop();
238-
var finalGcStats = GcStats.ReadFinal();
247+
// GC collect before measuring allocations.
248+
ForceGcCollect();
249+
GcStats gcStats;
250+
using (FinalizerBlocker.MaybeStart())
251+
{
252+
gcStats = MeasureWithGc(data.InvokeCount / data.UnrollFactor);
253+
}
254+
255+
exceptionsStats.Stop(); // this method might (de)allocate
239256
var finalThreadingStats = ThreadingStats.ReadFinal();
240257

241258
IterationCleanupAction(); // we run iteration cleanup after collecting GC stats
242259

243260
var totalOperationsCount = data.InvokeCount * OperationsPerInvoke;
244-
GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperations(totalOperationsCount);
245-
ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(data.InvokeCount * OperationsPerInvoke);
261+
return (gcStats.WithTotalOperations(totalOperationsCount),
262+
(finalThreadingStats - initialThreadingStats).WithTotalOperations(totalOperationsCount),
263+
exceptionsStats.ExceptionsCount / (double)totalOperationsCount);
264+
}
246265

247-
return (gcStats, threadingStats, exceptionsStats.ExceptionsCount / (double)totalOperationsCount);
266+
// Isolate the allocation measurement and skip tier0 jit to make sure we don't get any unexpected allocations.
267+
[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
268+
private GcStats MeasureWithGc(long invokeCount)
269+
{
270+
var initialGcStats = GcStats.ReadInitial();
271+
WorkloadAction(invokeCount);
272+
var finalGcStats = GcStats.ReadFinal();
273+
return finalGcStats - initialGcStats;
248274
}
249275

250276
private void RandomizeManagedHeapMemory()
@@ -273,7 +299,7 @@ private void GcCollect()
273299
ForceGcCollect();
274300
}
275301

276-
private static void ForceGcCollect()
302+
internal static void ForceGcCollect()
277303
{
278304
GC.Collect();
279305
GC.WaitForPendingFinalizers();
@@ -284,15 +310,6 @@ private static void ForceGcCollect()
284310

285311
public void WriteLine() => Host.WriteLine();
286312

287-
private static void EnableMonitoring()
288-
{
289-
if (RuntimeInformation.IsOldMono) // Monitoring is not available in Mono, see http://stackoverflow.com/questions/40234948/how-to-get-the-number-of-allocated-bytes-in-mono
290-
return;
291-
292-
if (RuntimeInformation.IsFullFramework)
293-
AppDomain.MonitoringIsEnabled = true;
294-
}
295-
296313
[UsedImplicitly]
297314
public static class Signals
298315
{
@@ -315,5 +332,71 @@ private static readonly Dictionary<string, HostSignal> MessagesToSignals
315332
public static bool TryGetSignal(string message, out HostSignal signal)
316333
=> MessagesToSignals.TryGetValue(message, out signal);
317334
}
335+
336+
// Very long key and value so this shouldn't be used outside of unit tests.
337+
internal const string UnitTestBlockFinalizerEnvKey = "BENCHMARKDOTNET_UNITTEST_BLOCK_FINALIZER_FOR_MEMORYDIAGNOSER";
338+
internal const string UnitTestBlockFinalizerEnvValue = UnitTestBlockFinalizerEnvKey + "_ACTIVE";
339+
340+
// To prevent finalizers interfering with allocation measurements for unit tests,
341+
// we block the finalizer thread until we've completed the measurement.
342+
// https://github.com/dotnet/runtime/issues/101536#issuecomment-2077647417
343+
private readonly struct FinalizerBlocker : IDisposable
344+
{
345+
private readonly object hangLock;
346+
347+
private FinalizerBlocker(object hangLock) => this.hangLock = hangLock;
348+
349+
private sealed class Impl
350+
{
351+
// ManualResetEvent(Slim) allocates when it is waited and yields the thread,
352+
// so we use Monitor.Wait instead which does not allocate managed memory.
353+
// This behavior is not documented, but was observed with the VS Profiler.
354+
private readonly object hangLock = new ();
355+
private readonly ManualResetEventSlim enteredFinalizerEvent = new (false);
356+
357+
~Impl()
358+
{
359+
lock (hangLock)
360+
{
361+
enteredFinalizerEvent.Set();
362+
Monitor.Wait(hangLock);
363+
}
364+
}
365+
366+
[MethodImpl(MethodImplOptions.NoInlining)]
367+
internal static (object hangLock, ManualResetEventSlim enteredFinalizerEvent) CreateWeakly()
368+
{
369+
var impl = new Impl();
370+
return (impl.hangLock, impl.enteredFinalizerEvent);
371+
}
372+
}
373+
374+
internal static FinalizerBlocker MaybeStart()
375+
{
376+
if (Environment.GetEnvironmentVariable(UnitTestBlockFinalizerEnvKey) != UnitTestBlockFinalizerEnvValue)
377+
{
378+
return default;
379+
}
380+
var (hangLock, enteredFinalizerEvent) = Impl.CreateWeakly();
381+
do
382+
{
383+
GC.Collect();
384+
// Do NOT call GC.WaitForPendingFinalizers.
385+
}
386+
while (!enteredFinalizerEvent.IsSet);
387+
return new FinalizerBlocker(hangLock);
388+
}
389+
390+
public void Dispose()
391+
{
392+
if (hangLock is not null)
393+
{
394+
lock (hangLock)
395+
{
396+
Monitor.Pulse(hangLock);
397+
}
398+
}
399+
}
400+
}
318401
}
319402
}

src/BenchmarkDotNet/Engines/GcStats.cs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System;
22
using System.Reflection;
3+
using System.Runtime.CompilerServices;
34
using BenchmarkDotNet.Columns;
45
using BenchmarkDotNet.Jobs;
56
using BenchmarkDotNet.Portability;
@@ -106,9 +107,10 @@ public int GetCollectionsCount(int generation)
106107
return AllocatedBytes <= AllocationQuantum ? 0L : AllocatedBytes;
107108
}
108109

110+
// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
111+
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
109112
public static GcStats ReadInitial()
110113
{
111-
// this will force GC.Collect, so we want to do this before collecting collections counts
112114
long? allocatedBytes = GetAllocatedBytes();
113115

114116
return new GcStats(
@@ -119,15 +121,14 @@ public static GcStats ReadInitial()
119121
0);
120122
}
121123

124+
// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
125+
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
122126
public static GcStats ReadFinal()
123127
{
124128
return new GcStats(
125129
GC.CollectionCount(0),
126130
GC.CollectionCount(1),
127131
GC.CollectionCount(2),
128-
129-
// this will force GC.Collect, so we want to do this after collecting collections counts
130-
// to exclude this single full forced collection from results
131132
GetAllocatedBytes(),
132133
0);
133134
}
@@ -136,17 +137,16 @@ public static GcStats ReadFinal()
136137
public static GcStats FromForced(int forcedFullGarbageCollections)
137138
=> new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0);
138139

140+
// Skip tier0 jit to make sure we don't get any unexpected allocations in this method.
141+
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
139142
private static long? GetAllocatedBytes()
140143
{
141144
// we have no tests for WASM and don't want to risk introducing a new bug (https://github.com/dotnet/BenchmarkDotNet/issues/2226)
142145
if (RuntimeInformation.IsWasm)
143146
return null;
144147

145-
// "This instance Int64 property returns the number of bytes that have been allocated by a specific
146-
// AppDomain. The number is accurate as of the last garbage collection." - CLR via C#
147-
// so we enforce GC.Collect here just to make sure we get accurate results
148-
GC.Collect();
149-
148+
// Do NOT call GC.Collect() here, as it causes finalizers to run and possibly allocate. https://github.com/dotnet/runtime/issues/101536#issuecomment-2077533242
149+
// Instead, we call it before we start the measurement in the Engine.
150150
#if NET6_0_OR_GREATER
151151
return GC.GetTotalAllocatedBytes(precise: true);
152152
#else
@@ -218,9 +218,7 @@ private static long CalculateAllocationQuantumSize()
218218
break;
219219
}
220220

221-
GC.Collect();
222-
GC.WaitForPendingFinalizers();
223-
GC.Collect();
221+
Engine.ForceGcCollect();
224222

225223
result = GC.GetTotalMemory(false);
226224
var tmp = new object();

src/BenchmarkDotNet/Portability/RuntimeInformation.cs

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,47 +27,47 @@ internal static class RuntimeInformation
2727
internal const string ReleaseConfigurationName = "RELEASE";
2828
internal const string Unknown = "?";
2929

30+
// Many of these checks allocate and/or are expensive to compute. We store the results in static readonly fields to keep Engine non-allocating.
31+
// Static readonly fields are used instead of properties to avoid an extra getter method call that might not be tier1 jitted.
32+
// This class is internal, so we don't need to expose these as properties.
33+
3034
/// <summary>
3135
/// returns true for both the old (implementation of .NET Framework) and new Mono (.NET 6+ flavour)
3236
/// </summary>
33-
public static bool IsMono { get; } =
34-
Type.GetType("Mono.RuntimeStructs") != null; // it allocates a lot of memory, we need to check it once in order to keep Engine non-allocating!
37+
public static readonly bool IsMono = Type.GetType("Mono.RuntimeStructs") != null;
3538

36-
public static bool IsOldMono { get; } = Type.GetType("Mono.Runtime") != null;
39+
public static readonly bool IsOldMono = Type.GetType("Mono.Runtime") != null;
3740

38-
public static bool IsNewMono { get; } = IsMono && !IsOldMono;
41+
public static readonly bool IsNewMono = IsMono && !IsOldMono;
3942

40-
public static bool IsFullFramework =>
43+
public static readonly bool IsFullFramework =
4144
#if NET6_0_OR_GREATER
45+
// This could be const, but we want to avoid unreachable code warnings.
4246
false;
4347
#else
4448
FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase);
4549
#endif
4650

47-
[PublicAPI]
48-
public static bool IsNetNative => FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase);
49-
50-
public static bool IsNetCore
51-
=> ((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase))
52-
&& !string.IsNullOrEmpty(typeof(object).Assembly.Location);
51+
public static readonly bool IsNetNative = FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase);
5352

54-
public static bool IsNativeAOT
55-
=> Environment.Version.Major >= 5
56-
&& string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null
57-
&& !IsWasm; // Wasm also returns "" for assembly locations
53+
public static readonly bool IsNetCore =
54+
((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase))
55+
&& !string.IsNullOrEmpty(typeof(object).Assembly.Location);
5856

5957
#if NET6_0_OR_GREATER
6058
[System.Runtime.Versioning.SupportedOSPlatformGuard("browser")]
61-
#endif
62-
public static bool IsWasm =>
63-
#if NET6_0_OR_GREATER
64-
OperatingSystem.IsBrowser();
59+
public static readonly bool IsWasm = OperatingSystem.IsBrowser();
6560
#else
66-
IsOSPlatform(OSPlatform.Create("BROWSER"));
61+
public static readonly bool IsWasm = IsOSPlatform(OSPlatform.Create("BROWSER"));
6762
#endif
6863

64+
public static readonly bool IsNativeAOT =
65+
Environment.Version.Major >= 5
66+
&& string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null
67+
&& !IsWasm; // Wasm also returns "" for assembly locations
68+
6969
#if NETSTANDARD2_0
70-
public static bool IsAot { get; } = IsAotMethod(); // This allocates, so we only want to call it once statically.
70+
public static readonly bool IsAot = IsAotMethod();
7171

7272
private static bool IsAotMethod()
7373
{
@@ -85,11 +85,22 @@ private static bool IsAotMethod()
8585
return false;
8686
}
8787
#else
88-
public static bool IsAot => !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled;
88+
public static readonly bool IsAot = !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled;
8989
#endif
9090

91-
public static bool IsRunningInContainer => string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true");
92-
91+
public static readonly bool IsTieredJitEnabled =
92+
IsNetCore
93+
&& (Environment.Version.Major < 3
94+
// Disabled by default in netcoreapp2.X, check if it's enabled.
95+
? Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") == "1"
96+
|| Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") == "1"
97+
|| (AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out bool isEnabled) && isEnabled)
98+
// Enabled by default in netcoreapp3.0+, check if it's disabled.
99+
: Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") != "0"
100+
&& Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") != "0"
101+
&& (!AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out isEnabled) || isEnabled));
102+
103+
public static readonly bool IsRunningInContainer = string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true");
93104

94105
internal static string GetArchitecture() => GetCurrentPlatform().ToString();
95106

tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
<Content Include="xunit.runner.json">
1919
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
2020
</Content>
21+
<!-- Disable EventSource to stabilize MemoryDiagnoserTests. https://github.com/dotnet/BenchmarkDotNet/pull/2562#issuecomment-2081317379 -->
22+
<RuntimeHostConfigurationOption Include="System.Diagnostics.Tracing.EventSource.IsSupported" Value="false" />
2123
</ItemGroup>
2224
<ItemGroup>
2325
<ProjectReference Include="..\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly.csproj" />

0 commit comments

Comments
 (0)