Skip to content

Commit 47f0434

Browse files
authored
Best effort to avoid GC during cold start specialization (#8521)
1 parent a503649 commit 47f0434

File tree

4 files changed

+122
-1
lines changed

4 files changed

+122
-1
lines changed
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
using System;
4+
using System.Runtime;
5+
using System.Threading;
6+
using System.Threading.Tasks;
7+
using Microsoft.AspNetCore.Http;
8+
using Microsoft.Extensions.Logging;
9+
10+
namespace Microsoft.Azure.WebJobs.Script.WebHost.Middleware
11+
{
12+
/// <summary>
13+
/// A middleware responsible for Optimizing CLR settings like GC to help with cold start
14+
/// </summary>
15+
internal class ClrOptimizationMiddleware
16+
{
17+
// This is double the amount of memory allocated during cold start specialization.
18+
// This value is calculated based on prod profiles across all languages observed for an extended period of time.
19+
// This value is just a best effort and if for any reason CLR needs to allocate more memory then it will ignore this value.
20+
private const long AllocationBudgetForGCDuringSpecialization = 16 * 1024 * 1024;
21+
private readonly ILogger _logger;
22+
private readonly RequestDelegate _next;
23+
private readonly IScriptWebHostEnvironment _webHostEnvironment;
24+
private RequestDelegate _invoke;
25+
private double _specialized = 0;
26+
27+
public ClrOptimizationMiddleware(RequestDelegate next, IScriptWebHostEnvironment webHostEnvironment, ILogger<SystemTraceMiddleware> logger)
28+
{
29+
_webHostEnvironment = webHostEnvironment;
30+
_logger = logger;
31+
_next = next;
32+
_invoke = InvokeClrOptimizationCheck;
33+
}
34+
35+
public Task Invoke(HttpContext context)
36+
{
37+
return _invoke(context);
38+
}
39+
40+
private Task InvokeClrOptimizationCheck(HttpContext context)
41+
{
42+
var task = _next.Invoke(context).ContinueWith(task =>
43+
{
44+
// We are tweaking GC behavior in ClrOptimizationMiddleware as this is one of the last call stacks that get executed during standby mode as well as function exection.
45+
// We force a GC and enter no GC region in standby mode and exit no GC region after first function execution during specialization.
46+
StartStopGCAsBestEffort();
47+
}, TaskContinuationOptions.ExecuteSynchronously | TaskContinuationOptions.OnlyOnRanToCompletion);
48+
49+
return task;
50+
}
51+
52+
private void StartStopGCAsBestEffort()
53+
{
54+
try
55+
{
56+
if (_webHostEnvironment.InStandbyMode)
57+
{
58+
// This is just to make sure we do not enter NoGCRegion multiple times during standby mode.
59+
if (GCSettings.LatencyMode != GCLatencyMode.NoGCRegion)
60+
{
61+
// In standby mode, we enforce a GC then enter NoGCRegion mode as best effort.
62+
// This is to try to avoid GC during cold start specialization.
63+
GC.Collect();
64+
if (!GC.TryStartNoGCRegion(AllocationBudgetForGCDuringSpecialization, disallowFullBlockingGC: false))
65+
{
66+
_logger.LogError($"CLR runtime failed to commit the requested amount of memory: {AllocationBudgetForGCDuringSpecialization}");
67+
}
68+
_logger.LogInformation($"Collection count for gen 0: {GC.CollectionCount(0)}, gen 1: {GC.CollectionCount(1)}, gen 2: {GC.CollectionCount(2)}");
69+
}
70+
}
71+
else
72+
{
73+
// if not in standby mode and we are in NoGCRegion then we end NoGCRegion.
74+
if (GCSettings.LatencyMode == GCLatencyMode.NoGCRegion)
75+
{
76+
GC.EndNoGCRegion();
77+
_logger.LogInformation($"Collection count for gen 0: {GC.CollectionCount(0)}, gen 1: {GC.CollectionCount(1)}, gen 2: {GC.CollectionCount(2)}");
78+
}
79+
80+
// This logic needs to run only once during specialization, so replacing the RequestDelegate after specialization
81+
if (Interlocked.CompareExchange(ref _specialized, 1, 0) == 0)
82+
{
83+
Interlocked.Exchange(ref _invoke, _next);
84+
}
85+
}
86+
}
87+
catch (Exception ex)
88+
{
89+
_logger.LogError(ex.Message);
90+
}
91+
}
92+
}
93+
}

src/WebJobs.Script.WebHost/WebJobsApplicationBuilderExtension.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ public static IApplicationBuilder UseWebJobsScriptHost(this IApplicationBuilder
3333
HttpBodyControlOptions httpBodyControlOptions = httpBodyControlOptionsMonitor.CurrentValue;
3434
httpBodyControlOptionsMonitor.OnChange(newOptions => httpBodyControlOptions = newOptions);
3535

36+
// Ensure the ClrOptimizationMiddleware is registered before all middleware
37+
builder.UseMiddleware<ClrOptimizationMiddleware>();
3638
builder.UseMiddleware<HttpRequestBodySizeMiddleware>();
3739
builder.UseMiddleware<SystemTraceMiddleware>();
3840
builder.UseMiddleware<HostnameFixupMiddleware>();

src/WebJobs.Script.WebHost/WebJobsScriptHostService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ internal bool ShouldMonitorHostHealth
159159
{
160160
get
161161
{
162-
return _healthMonitorOptions.Value.Enabled && _environment.IsAppService();
162+
return _healthMonitorOptions.Value.Enabled && _environment.IsAppService() && !_scriptWebHostEnvironment.InStandbyMode;
163163
}
164164
}
165165

test/WebJobs.Script.Tests.Integration/WebHostEndToEnd/SpecializationE2ETests.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Linq;
88
using System.Net;
99
using System.Net.Http;
10+
using System.Runtime;
1011
using System.Threading;
1112
using System.Threading.Tasks;
1213
using Azure.Storage.Blobs;
@@ -243,6 +244,31 @@ public async Task Specialization_ResetsSharedLoadContext()
243244
}
244245
}
245246

247+
[Fact]
248+
public async Task Specialization_GCMode()
249+
{
250+
var builder = CreateStandbyHostBuilder("FunctionExecutionContext");
251+
252+
using (var testServer = new TestServer(builder))
253+
{
254+
var client = testServer.CreateClient();
255+
256+
// GC's LatencyMode should be Interactive as default, switch to NoGCRegion in placeholder mode and back to Interactive when specialization is complete.
257+
Assert.True(GCSettings.LatencyMode != GCLatencyMode.NoGCRegion, "GCLatencyMode should *not* be NoGCRegion at the beginning");
258+
259+
var response = await client.GetAsync("api/warmup");
260+
response.EnsureSuccessStatusCode();
261+
262+
_environment.SetEnvironmentVariable(EnvironmentSettingNames.AzureWebsiteContainerReady, "1");
263+
_environment.SetEnvironmentVariable(EnvironmentSettingNames.AzureWebsitePlaceholderMode, "0");
264+
265+
response = await client.GetAsync("api/functionexecutioncontext");
266+
response.EnsureSuccessStatusCode();
267+
268+
Assert.True(GCSettings.LatencyMode != GCLatencyMode.NoGCRegion, "GCLatencyMode should *not* be NoGCRegion at the end of specialization");
269+
}
270+
}
271+
246272
[Fact]
247273
public async Task Specialization_ResetsSecretManagerRepository()
248274
{

0 commit comments

Comments
 (0)