Skip to content

Commit 9f8f517

Browse files
authored
Best effort to avoid GC during cold start specialization (#8521) (#8547)
1 parent 9131442 commit 9f8f517

File tree

4 files changed

+122
-1
lines changed

4 files changed

+122
-1
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
using System;
4+
using System.Runtime;
5+
using System.Threading;
6+
using System.Threading.Tasks;
7+
using Microsoft.AspNetCore.Http;
8+
using Microsoft.Extensions.Logging;
9+
10+
namespace Microsoft.Azure.WebJobs.Script.WebHost.Middleware
11+
{
12+
/// <summary>
13+
/// A middleware responsible for Optimizing CLR settings like GC to help with cold start
14+
/// </summary>
15+
internal class ClrOptimizationMiddleware
16+
{
17+
// This is double the amount of memory allocated during cold start specialization.
18+
// This value is calculated based on prod profiles across all languages observed for an extended period of time.
19+
// This value is just a best effort and if for any reason CLR needs to allocate more memory then it will ignore this value.
20+
private const long AllocationBudgetForGCDuringSpecialization = 16 * 1024 * 1024;
21+
private readonly ILogger _logger;
22+
private readonly RequestDelegate _next;
23+
private readonly IScriptWebHostEnvironment _webHostEnvironment;
24+
private RequestDelegate _invoke;
25+
private double _specialized = 0;
26+
27+
public ClrOptimizationMiddleware(RequestDelegate next, IScriptWebHostEnvironment webHostEnvironment, ILogger<SystemTraceMiddleware> logger)
28+
{
29+
_webHostEnvironment = webHostEnvironment;
30+
_logger = logger;
31+
_next = next;
32+
_invoke = InvokeClrOptimizationCheck;
33+
}
34+
35+
public Task Invoke(HttpContext context)
36+
{
37+
return _invoke(context);
38+
}
39+
40+
private Task InvokeClrOptimizationCheck(HttpContext context)
41+
{
42+
var task = _next.Invoke(context).ContinueWith(task =>
43+
{
44+
// We are tweaking GC behavior in ClrOptimizationMiddleware as this is one of the last call stacks that get executed during standby mode as well as function exection.
45+
// We force a GC and enter no GC region in standby mode and exit no GC region after first function execution during specialization.
46+
StartStopGCAsBestEffort();
47+
}, TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnRanToCompletion);
48+
49+
return task;
50+
}
51+
52+
private void StartStopGCAsBestEffort()
53+
{
54+
try
55+
{
56+
if (_webHostEnvironment.InStandbyMode)
57+
{
58+
// This is just to make sure we do not enter NoGCRegion multiple times during standby mode.
59+
if (GCSettings.LatencyMode != GCLatencyMode.NoGCRegion)
60+
{
61+
// In standby mode, we enforce a GC then enter NoGCRegion mode as best effort.
62+
// This is to try to avoid GC during cold start specialization.
63+
GC.Collect();
64+
if (!GC.TryStartNoGCRegion(AllocationBudgetForGCDuringSpecialization, disallowFullBlockingGC: false))
65+
{
66+
_logger.LogError($"CLR runtime failed to commit the requested amount of memory: {AllocationBudgetForGCDuringSpecialization}");
67+
}
68+
_logger.LogInformation($"Collection count for gen 0: {GC.CollectionCount(0)}, gen 1: {GC.CollectionCount(1)}, gen 2: {GC.CollectionCount(2)}");
69+
}
70+
}
71+
else
72+
{
73+
// if not in standby mode and we are in NoGCRegion then we end NoGCRegion.
74+
if (GCSettings.LatencyMode == GCLatencyMode.NoGCRegion)
75+
{
76+
GC.EndNoGCRegion();
77+
_logger.LogInformation($"Collection count for gen 0: {GC.CollectionCount(0)}, gen 1: {GC.CollectionCount(1)}, gen 2: {GC.CollectionCount(2)}");
78+
}
79+
80+
// This logic needs to run only once during specialization, so replacing the RequestDelegate after specialization
81+
if (Interlocked.CompareExchange(ref _specialized, 1, 0) == 0)
82+
{
83+
Interlocked.Exchange(ref _invoke, _next);
84+
}
85+
}
86+
}
87+
catch (Exception ex)
88+
{
89+
_logger.LogError(ex.Message);
90+
}
91+
}
92+
}
93+
}

src/WebJobs.Script.WebHost/WebJobsApplicationBuilderExtension.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ public static IApplicationBuilder UseWebJobsScriptHost(this IApplicationBuilder
2727
IOptionsMonitor<HttpBodyControlOptions> httpBodyControlOptions = builder.ApplicationServices.GetService<IOptionsMonitor<HttpBodyControlOptions>>();
2828
IServiceProvider serviceProvider = builder.ApplicationServices;
2929

30+
// Ensure the ClrOptimizationMiddleware is registered before all middleware
31+
builder.UseMiddleware<ClrOptimizationMiddleware>();
3032
builder.UseMiddleware<HttpRequestBodySizeMiddleware>();
3133
builder.UseMiddleware<SystemTraceMiddleware>();
3234
builder.UseMiddleware<HostnameFixupMiddleware>();

src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ internal bool ShouldMonitorHostHealth
161161
{
162162
get
163163
{
164-
return _healthMonitorOptions.Value.Enabled && _environment.IsAppService();
164+
return _healthMonitorOptions.Value.Enabled && _environment.IsAppService() && !_scriptWebHostEnvironment.InStandbyMode;
165165
}
166166
}
167167

test/WebJobs.Script.Tests.Integration/WebHostEndToEnd/SpecializationE2ETests.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Linq;
88
using System.Net;
99
using System.Net.Http;
10+
using System.Runtime;
1011
using System.Threading;
1112
using System.Threading.Tasks;
1213
using Microsoft.ApplicationInsights.Channel;
@@ -240,6 +241,31 @@ public async Task Specialization_ResetsSharedLoadContext()
240241
}
241242
}
242243

244+
[Fact]
245+
public async Task Specialization_GCMode()
246+
{
247+
var builder = CreateStandbyHostBuilder("FunctionExecutionContext");
248+
249+
using (var testServer = new TestServer(builder))
250+
{
251+
var client = testServer.CreateClient();
252+
253+
// GC's LatencyMode should be Interactive as default, switch to NoGCRegion in placeholder mode and back to Interactive when specialization is complete.
254+
Assert.True(GCSettings.LatencyMode != GCLatencyMode.NoGCRegion, "GCLatencyMode should *not* be NoGCRegion at the beginning");
255+
256+
var response = await client.GetAsync("api/warmup");
257+
response.EnsureSuccessStatusCode();
258+
259+
_environment.SetEnvironmentVariable(EnvironmentSettingNames.AzureWebsiteContainerReady, "1");
260+
_environment.SetEnvironmentVariable(EnvironmentSettingNames.AzureWebsitePlaceholderMode, "0");
261+
262+
response = await client.GetAsync("api/functionexecutioncontext");
263+
response.EnsureSuccessStatusCode();
264+
265+
Assert.True(GCSettings.LatencyMode != GCLatencyMode.NoGCRegion, "GCLatencyMode should *not* be NoGCRegion at the end of specialization");
266+
}
267+
}
268+
243269
[Fact]
244270
public async Task Specialization_ResetsSecretManagerRepository()
245271
{

0 commit comments

Comments
 (0)