Skip to content

Commit f32abed

Browse files
authored
Legion CV2 Crash Recovery (#9237)
1 parent 8caea0f commit f32abed

File tree

7 files changed

+367
-100
lines changed

7 files changed

+367
-100
lines changed

src/WebJobs.Script.WebHost/ContainerManagement/LinuxContainerInitializationHostService.cs renamed to src/WebJobs.Script.WebHost/ContainerManagement/AtlasContainerInitializationHostedService.cs

Lines changed: 19 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,74 +9,31 @@
99
using Azure.Storage.Blobs;
1010
using Microsoft.Azure.WebJobs.Script.WebHost.Management;
1111
using Microsoft.Azure.WebJobs.Script.WebHost.Models;
12-
using Microsoft.Extensions.Hosting;
1312
using Microsoft.Extensions.Logging;
14-
using Newtonsoft.Json;
1513

1614
namespace Microsoft.Azure.WebJobs.Script.WebHost.ContainerManagement
1715
{
18-
public class LinuxContainerInitializationHostService : IHostedService
16+
public class AtlasContainerInitializationHostedService : LinuxContainerInitializationHostedService
1917
{
2018
private readonly IEnvironment _environment;
2119
private readonly IInstanceManager _instanceManager;
2220
private readonly ILogger _logger;
2321
private readonly StartupContextProvider _startupContextProvider;
2422
private CancellationToken _cancellationToken;
2523

26-
public LinuxContainerInitializationHostService(IEnvironment environment, IInstanceManager instanceManager, ILogger<LinuxContainerInitializationHostService> logger, StartupContextProvider startupContextProvider)
24+
public AtlasContainerInitializationHostedService(IEnvironment environment, IInstanceManager instanceManager,
25+
ILogger<AtlasContainerInitializationHostedService> logger, StartupContextProvider startupContextProvider)
26+
: base(environment, instanceManager, logger, startupContextProvider)
2727
{
2828
_environment = environment;
2929
_instanceManager = instanceManager;
3030
_logger = logger;
3131
_startupContextProvider = startupContextProvider;
3232
}
3333

34-
public async Task StartAsync(CancellationToken cancellationToken)
34+
protected override async Task<(bool HasStartContext, string StartContext)> TryGetStartContextOrNullAsync(CancellationToken cancellationToken)
3535
{
36-
_logger.LogInformation("Initializing LinuxContainerInitializationService.");
3736
_cancellationToken = cancellationToken;
38-
39-
// The service should be registered in Linux Consumption only, but do additional check here.
40-
if (_environment.IsLinuxConsumptionOnAtlas())
41-
{
42-
await ApplyStartContextIfPresent();
43-
}
44-
else if (_environment.IsFlexConsumptionSku())
45-
{
46-
_logger.LogInformation("Container has (re)started. Waiting for specialization");
47-
}
48-
}
49-
50-
private async Task ApplyStartContextIfPresent()
51-
{
52-
var startContext = await GetStartContextOrNullAsync();
53-
54-
if (!string.IsNullOrEmpty(startContext))
55-
{
56-
_logger.LogInformation("Applying host context");
57-
58-
var encryptedAssignmentContext = JsonConvert.DeserializeObject<EncryptedHostAssignmentContext>(startContext);
59-
var assignmentContext = _startupContextProvider.SetContext(encryptedAssignmentContext);
60-
61-
var msiError = await _instanceManager.SpecializeMSISidecar(assignmentContext);
62-
if (!string.IsNullOrEmpty(msiError))
63-
{
64-
// Log and continue specializing even in case of failures.
65-
// There will be other mechanisms to recover the container.
66-
_logger.LogError("MSI Specialization failed with '{msiError}'", msiError);
67-
}
68-
69-
bool success = _instanceManager.StartAssignment(assignmentContext);
70-
_logger.LogInformation($"StartAssignment invoked (Success={success})");
71-
}
72-
else
73-
{
74-
_logger.LogInformation("No host context specified. Waiting for host assignment");
75-
}
76-
}
77-
78-
private async Task<string> GetStartContextOrNullAsync()
79-
{
8037
var startContext = _environment.GetEnvironmentVariable(EnvironmentSettingNames.ContainerStartContext);
8138

8239
// Container start context is not available directly
@@ -87,16 +44,27 @@ private async Task<string> GetStartContextOrNullAsync()
8744

8845
if (!string.IsNullOrEmpty(sasUri))
8946
{
90-
_logger.LogInformation("Host context specified via CONTAINER_START_CONTEXT_SAS_URI");
47+
_logger.LogDebug($"Host context specified via {EnvironmentSettingNames.ContainerStartContextSasUri}");
9148
startContext = await GetAssignmentContextFromSasUri(sasUri);
9249
}
9350
}
9451
else
9552
{
96-
_logger.LogInformation("Host context specified via CONTAINER_START_CONTEXT");
53+
_logger.LogDebug($"Host context specified via {EnvironmentSettingNames.ContainerStartContext}");
9754
}
9855

99-
return startContext;
56+
return (!string.IsNullOrEmpty(startContext), startContext);
57+
}
58+
59+
protected override async Task SpecializeMSISideCar(HostAssignmentContext assignmentContext)
60+
{
61+
var msiError = await _instanceManager.SpecializeMSISidecar(assignmentContext);
62+
if (!string.IsNullOrEmpty(msiError))
63+
{
64+
// Log and continue specializing even in case of failures.
65+
// There will be other mechanisms to recover the container.
66+
_logger.LogError("MSI Specialization failed with '{msiError}'", msiError);
67+
}
10068
}
10169

10270
private async Task<string> GetAssignmentContextFromSasUri(string sasUri)
@@ -139,10 +107,5 @@ public virtual async Task<string> Read(string uri)
139107

140108
return string.Empty;
141109
}
142-
143-
public Task StopAsync(CancellationToken cancellationToken)
144-
{
145-
return Task.CompletedTask;
146-
}
147110
}
148111
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.IO;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
using Microsoft.Azure.WebJobs.Script.WebHost.Management;
9+
using Microsoft.Azure.WebJobs.Script.WebHost.Models;
10+
using Microsoft.Extensions.Hosting;
11+
using Microsoft.Extensions.Logging;
12+
13+
namespace Microsoft.Azure.WebJobs.Script.WebHost.ContainerManagement
14+
{
15+
public class LegionContainerInitializationHostedService : LinuxContainerInitializationHostedService
16+
{
17+
private const string ContextFile = "Context.txt";
18+
19+
private readonly IEnvironment _environment;
20+
private readonly ILogger _logger;
21+
22+
public LegionContainerInitializationHostedService(IEnvironment environment, IInstanceManager instanceManager,
23+
ILogger<LegionContainerInitializationHostedService> logger, StartupContextProvider startupContextProvider)
24+
: base(environment, instanceManager, logger, startupContextProvider)
25+
{
26+
_environment = environment;
27+
_logger = logger;
28+
}
29+
30+
protected override Task<(bool HasStartContext, string StartContext)> TryGetStartContextOrNullAsync(CancellationToken cancellationToken)
31+
{
32+
string containerSpecializationContextMountPath = _environment.GetEnvironmentVariable(EnvironmentSettingNames.ContainerSpecializationContextVolumePath);
33+
34+
// The CONTAINER_SPECIALIZATION_CONTEXT_MOUNT_PATH environment variable should be set during pod creation
35+
if (string.IsNullOrEmpty(containerSpecializationContextMountPath))
36+
{
37+
_logger.LogError($"{EnvironmentSettingNames.ContainerSpecializationContextVolumePath} is Null or Empty");
38+
return Task.FromResult((false, string.Empty));
39+
}
40+
41+
// The CONTAINER_SPECIALIZATION_CONTEXT_MOUNT_PATH emptyDir volume should be mounted by Legion during pod creation
42+
if (!Directory.Exists(containerSpecializationContextMountPath))
43+
{
44+
_logger.LogError("Container specialization context mount does not exist");
45+
return Task.FromResult((false, string.Empty));
46+
}
47+
48+
string contextFilePath = Path.Combine(containerSpecializationContextMountPath, ContextFile);
49+
50+
if (File.Exists(contextFilePath))
51+
{
52+
_logger.LogDebug($"Previous start context found");
53+
try
54+
{
55+
var startContext = File.ReadAllText(contextFilePath);
56+
return Task.FromResult((true, startContext));
57+
}
58+
catch (Exception e)
59+
{
60+
_logger.LogError($"Error reading previous start context: {e.ToString()}");
61+
}
62+
}
63+
64+
return Task.FromResult((false, string.Empty));
65+
}
66+
67+
// No-op
68+
protected override Task SpecializeMSISideCar(HostAssignmentContext assignmentContext)
69+
{
70+
return Task.CompletedTask;
71+
}
72+
}
73+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright (c) .NET Foundation. All rights reserved.
2+
// Licensed under the MIT License. See License.txt in the project root for license information.
3+
4+
using System.Threading;
5+
using System.Threading.Tasks;
6+
using Microsoft.Azure.WebJobs.Script.WebHost.Management;
7+
using Microsoft.Azure.WebJobs.Script.WebHost.Models;
8+
using Microsoft.Extensions.Hosting;
9+
using Microsoft.Extensions.Logging;
10+
using Newtonsoft.Json;
11+
12+
namespace Microsoft.Azure.WebJobs.Script.WebHost.ContainerManagement
13+
{
14+
public abstract class LinuxContainerInitializationHostedService : IHostedService
15+
{
16+
private readonly IEnvironment _environment;
17+
private readonly IInstanceManager _instanceManager;
18+
private readonly ILogger _logger;
19+
private readonly StartupContextProvider _startupContextProvider;
20+
private CancellationToken _cancellationToken;
21+
22+
public LinuxContainerInitializationHostedService(IEnvironment environment, IInstanceManager instanceManager, ILogger logger, StartupContextProvider startupContextProvider)
23+
{
24+
_environment = environment;
25+
_instanceManager = instanceManager;
26+
_logger = logger;
27+
_startupContextProvider = startupContextProvider;
28+
}
29+
30+
public async Task StartAsync(CancellationToken cancellationToken)
31+
{
32+
_logger.LogDebug("Starting container initialization service.");
33+
_cancellationToken = cancellationToken;
34+
35+
// The service should be registered in Linux Consumption only, but do additional check here.
36+
if (_environment.IsAnyLinuxConsumption())
37+
{
38+
await ApplyStartContextIfPresent(cancellationToken);
39+
}
40+
}
41+
42+
private async Task ApplyStartContextIfPresent(CancellationToken cancellationToken)
43+
{
44+
(bool hasStartContext, string startContext) = await TryGetStartContextOrNullAsync(cancellationToken);
45+
46+
if (hasStartContext && !string.IsNullOrEmpty(startContext))
47+
{
48+
_logger.LogDebug("Applying host context");
49+
50+
var encryptedAssignmentContext = JsonConvert.DeserializeObject<EncryptedHostAssignmentContext>(startContext);
51+
var assignmentContext = _startupContextProvider.SetContext(encryptedAssignmentContext);
52+
await SpecializeMSISideCar(assignmentContext);
53+
54+
bool success = _instanceManager.StartAssignment(assignmentContext);
55+
_logger.LogDebug($"StartAssignment invoked (Success={success})");
56+
}
57+
else
58+
{
59+
_logger.LogDebug("No host context specified. Waiting for host assignment");
60+
}
61+
}
62+
63+
protected abstract Task<(bool HasStartContext, string StartContext)> TryGetStartContextOrNullAsync(CancellationToken cancellationToken);
64+
65+
protected abstract Task SpecializeMSISideCar(HostAssignmentContext assignmentContext);
66+
67+
public Task StopAsync(CancellationToken cancellationToken)
68+
{
69+
return Task.CompletedTask;
70+
}
71+
}
72+
}

src/WebJobs.Script.WebHost/WebHostServiceCollectionExtensions.cs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,19 @@ private static void AddLinuxContainerServices(this IServiceCollection services)
240240
services.AddSingleton<IHostedService>(s =>
241241
{
242242
var environment = s.GetService<IEnvironment>();
243-
//todo: Replace with legion specific service
244-
if (environment.IsAnyLinuxConsumption())
243+
if (environment.IsLinuxConsumptionOnAtlas())
244+
{
245+
var instanceManager = s.GetService<IInstanceManager>();
246+
var logger = s.GetService<ILogger<AtlasContainerInitializationHostedService>>();
247+
var startupContextProvider = s.GetService<StartupContextProvider>();
248+
return new AtlasContainerInitializationHostedService(environment, instanceManager, logger, startupContextProvider);
249+
}
250+
else if (environment.IsFlexConsumptionSku())
245251
{
246252
var instanceManager = s.GetService<IInstanceManager>();
247-
var logger = s.GetService<ILogger<LinuxContainerInitializationHostService>>();
253+
var logger = s.GetService<ILogger<LegionContainerInitializationHostedService>>();
248254
var startupContextProvider = s.GetService<StartupContextProvider>();
249-
return new LinuxContainerInitializationHostService(environment, instanceManager, logger, startupContextProvider);
255+
return new LegionContainerInitializationHostedService(environment, instanceManager, logger, startupContextProvider);
250256
}
251257

252258
return NullHostedService.Instance;

src/WebJobs.Script/Environment/EnvironmentSettingNames.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ public static class EnvironmentSettingNames
8989
public const string ContainerStartContext = "CONTAINER_START_CONTEXT";
9090
public const string ContainerStartContextSasUri = "CONTAINER_START_CONTEXT_SAS_URI";
9191
public const string FunctionsLogsMountPath = "FUNCTIONS_LOGS_MOUNT_PATH";
92+
public const string ContainerSpecializationContextVolumePath = "CONTAINER_SPECIALIZATION_CONTEXT_MOUNT_PATH";
9293

9394
// unfortunately there are 3 versions of this setting that have to be supported
9495
// due to renames

0 commit comments

Comments
 (0)