-
Notifications
You must be signed in to change notification settings - Fork 896
Impoving container initialize logic #5280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
d894921
96d9384
0a10016
93dd186
7f1bb60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -209,9 +209,14 @@ public async Task<int> DockerStart(IExecutionContext context, string containerId | |
ArgUtil.NotNull(context, nameof(context)); | ||
ArgUtil.NotNull(containerId, nameof(containerId)); | ||
|
||
var action = new Func<Task<int>>(async () => await ExecuteDockerCommandAsync(context, "start", containerId, context.CancellationToken)); | ||
const string command = "Docker start"; | ||
return await ExecuteDockerCommandAsyncWithRetries(context, action, command); | ||
if (!AgentKnobs.CheckBeforeRetryDockerStart.GetValue(context).AsBoolean()) | ||
{ | ||
var action = new Func<Task<int>>(async () => await ExecuteDockerCommandAsync(context, "start", containerId, context.CancellationToken)); | ||
const string command = "Docker start"; | ||
return await ExecuteDockerCommandAsyncWithRetries(context, action, command); | ||
} | ||
// Use the new helper for start with retries and running-state checks | ||
return await ExecuteDockerStartWithRetriesAndCheck(context, containerId); | ||
} | ||
|
||
public async Task<int> DockerRemove(IExecutionContext context, string containerId) | ||
|
@@ -374,7 +379,7 @@ public async Task<List<PortMapping>> DockerPort(IExecutionContext context, strin | |
/// <returns | ||
/// <c>true</c>, if specified container is running, <c>false</c> otherwise. | ||
/// </returns> | ||
public async Task<bool> IsContainerRunning(IExecutionContext context, string containerId) | ||
public virtual async Task<bool> IsContainerRunning(IExecutionContext context, string containerId) | ||
{ | ||
List<string> filteredItems = await DockerPS(context, $"--filter id={containerId}"); | ||
|
||
|
@@ -386,8 +391,8 @@ public async Task<bool> IsContainerRunning(IExecutionContext context, string con | |
|
||
return isContainerRunning; | ||
} | ||
|
||
private Task<int> ExecuteDockerCommandAsync(IExecutionContext context, string command, string options, CancellationToken cancellationToken = default(CancellationToken)) | ||
// making it protected for unit testing | ||
protected virtual Task<int> ExecuteDockerCommandAsync(IExecutionContext context, string command, string options, CancellationToken cancellationToken = default(CancellationToken)) | ||
{ | ||
return ExecuteDockerCommandAsync(context, command, options, null, cancellationToken); | ||
} | ||
|
@@ -533,5 +538,60 @@ private static async Task<List<string>> ExecuteDockerCommandAsyncWithRetries(IEx | |
|
||
return output; | ||
} | ||
|
||
/// <summary> | ||
/// Executes 'docker start' with retries, checking if the container is already running before each retry. | ||
/// Returns 0 if the container is running or started successfully, otherwise returns the last exit code. | ||
/// </summary> | ||
protected virtual async Task<int> ExecuteDockerStartWithRetriesAndCheck(IExecutionContext context, string containerId) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any specific reason why we have made this function virtual? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same reason as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of overriding a function in unit tests, a better practice is to mock the function in test with given input/output. |
||
{ | ||
bool dockerActionRetries = AgentKnobs.DockerActionRetries.GetValue(context).AsBoolean(); | ||
context.Output($"DockerActionRetries variable value: {dockerActionRetries}"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need this log? |
||
|
||
int retryCount = 0; | ||
const int maxRetries = 3; | ||
TimeSpan delayInSeconds = TimeSpan.FromSeconds(10); | ||
int exitCode = 0; | ||
|
||
while (retryCount < maxRetries) | ||
{ | ||
// Check if container is already running before attempting to start | ||
if (await IsContainerRunning(context, containerId)) | ||
{ | ||
context.Output($"Container {containerId} is running before attempt {retryCount + 1}."); | ||
break; | ||
} | ||
|
||
exitCode = await ExecuteDockerCommandAsync(context, "start", containerId, context.CancellationToken); | ||
if (exitCode == 0 || !dockerActionRetries) | ||
{ | ||
break; | ||
} | ||
|
||
context.Warning($"Docker start failed with exit code {exitCode}, back off {delayInSeconds} seconds before retry."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this logging required? anyhow logs are below when docker is already running vs started |
||
retryCount++; | ||
await Task.Delay(delayInSeconds); | ||
|
||
} | ||
|
||
// handle the case where container is already running after retries but exit code is not 0 | ||
if (exitCode != 0 && await IsContainerRunning(context, containerId)) | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you please check simplifying this if/else as if container is already running/started do we need to check exitcode? |
||
context.Output($"Container {containerId} is already running after {retryCount} retries. but exit code was {exitCode}."); | ||
exitCode = 0; // Indicate success | ||
} | ||
// If the container is still not running after retries, log a warning | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warning inside this block already shows container not running after retires exhausted. No need for comment here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should log a warning that the container didn't start after all the retries, and the last exit code. |
||
if (exitCode != 0) | ||
{ | ||
context.Warning($"Container {containerId} is not running after {retryCount} retries. Last exit code: {exitCode}"); | ||
} | ||
else | ||
{ | ||
context.Output($"Container {containerId} started successfully after {retryCount} retries."); | ||
} | ||
//return the exit code | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this comment necessary. This seems redundant so can be removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It marks the end of the function Docker Start, with the exit code. |
||
context.Debug($"Docker start completed with exit code {exitCode}."); | ||
return exitCode; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
using System; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Microsoft.VisualStudio.Services.Agent.Worker; | ||
using Xunit; | ||
using Moq; | ||
using Agent.Sdk; | ||
using Microsoft.VisualStudio.Services.Agent.Worker.Container; | ||
|
||
namespace Microsoft.VisualStudio.Services.Agent.Tests.Worker.Container | ||
{ | ||
public class DockerCommandManagerL0Tests | ||
{ | ||
private Mock<IExecutionContext> _ec; | ||
|
||
[Fact] | ||
[Trait("Level", "L0")] | ||
[Trait("Category", "worker.container")] | ||
public async Task ReturnsZeroIfContainerAlreadyRunningBeforeStart() | ||
{ | ||
_ec = new Mock<IExecutionContext>(); | ||
SetupEnvironmentVariables("true"); | ||
var manager = new TestableDockerCommandManager(isRunningOnFirstCheck: true); | ||
int result = await manager.ExecuteDockerStartWithRetriesAndCheckPublic(_ec.Object, "cid"); | ||
Assert.Equal(0, result); | ||
Assert.Equal(1, manager.IsContainerRunningCallCount); | ||
} | ||
|
||
[Fact] | ||
[Trait("Level", "L0")] | ||
[Trait("Category", "worker.container")] | ||
public async Task ReturnsZeroIfStartSucceedsFirstTry() | ||
{ | ||
_ec = new Mock<IExecutionContext>(); | ||
SetupEnvironmentVariables("true"); | ||
var manager = new TestableDockerCommandManager(exitCodes: new[] { 0 }, runningOnRetry: new[] { false, true }); | ||
int result = await manager.ExecuteDockerStartWithRetriesAndCheckPublic(_ec.Object, "cid"); | ||
Assert.Equal(0, result); | ||
Assert.Equal(1, manager.IsContainerRunningCallCount); | ||
} | ||
|
||
[Fact] | ||
[Trait("Level", "L0")] | ||
[Trait("Category", "worker.container")] | ||
public async Task ReturnsZeroIfContainerStartsOnThirdRetry() | ||
{ | ||
_ec = new Mock<IExecutionContext>(); | ||
SetupEnvironmentVariables("true"); | ||
var manager = new TestableDockerCommandManager(exitCodes: new[] { 1, 1, 0 }, runningOnRetry: new[] { false, false, false, true }); | ||
int result = await manager.ExecuteDockerStartWithRetriesAndCheckPublic(_ec.Object, "cid"); | ||
Assert.Equal(0, result); | ||
Assert.Equal(3, manager.IsContainerRunningCallCount); | ||
} | ||
|
||
[Fact] | ||
[Trait("Level", "L0")] | ||
[Trait("Category", "worker.container")] | ||
public async Task ReturnsExitCodeIfContainerNeverStarts() | ||
{ | ||
_ec = new Mock<IExecutionContext>(); | ||
SetupEnvironmentVariables("true"); | ||
var manager = new TestableDockerCommandManager(exitCodes: new[] { 1, 2, 3 }, runningOnRetry: new[] { false, false, false, false }); | ||
int result = await manager.ExecuteDockerStartWithRetriesAndCheckPublic(_ec.Object, "cid"); | ||
Assert.Equal(3, result); | ||
Assert.Equal(4, manager.IsContainerRunningCallCount); | ||
} | ||
|
||
[Fact] | ||
[Trait("Level", "L0")] | ||
[Trait("Category", "worker.container")] | ||
public async Task ReturnsZeroIfContainerStartsButExitCodeNotZero() | ||
{ | ||
_ec = new Mock<IExecutionContext>(); | ||
SetupEnvironmentVariables("true"); | ||
// exitCode is 1, but container is running after | ||
var manager = new TestableDockerCommandManager(exitCodes: new[] { 1 }, runningOnRetry: new[] { false, true }); | ||
int result = await manager.ExecuteDockerStartWithRetriesAndCheckPublic(_ec.Object, "cid"); | ||
Assert.Equal(0, result); | ||
Assert.Equal(3, manager.IsContainerRunningCallCount); | ||
} | ||
|
||
private class TestableDockerCommandManager : DockerCommandManager | ||
{ | ||
private readonly int[] _exitCodes; | ||
private readonly bool[] _runningOnRetry; | ||
private int _startCallCount = 0; | ||
private int _runningCallCount = 0; | ||
public int IsContainerRunningCallCount => _runningCallCount; | ||
|
||
public TestableDockerCommandManager(bool isRunningOnFirstCheck = false) | ||
{ | ||
_exitCodes = new[] { 1 }; | ||
_runningOnRetry = new[] { isRunningOnFirstCheck }; | ||
} | ||
public TestableDockerCommandManager(int[] exitCodes, bool[] runningOnRetry) | ||
{ | ||
_exitCodes = exitCodes; | ||
_runningOnRetry = runningOnRetry; | ||
} | ||
public Task<int> ExecuteDockerStartWithRetriesAndCheckPublic(IExecutionContext context, string containerId) | ||
{ | ||
return base.ExecuteDockerStartWithRetriesAndCheck(context, containerId); | ||
} | ||
protected override Task<int> ExecuteDockerCommandAsync(IExecutionContext context, string command, string options, CancellationToken cancellationToken = default) | ||
{ | ||
int code = _exitCodes[Math.Min(_startCallCount, _exitCodes.Length - 1)]; | ||
_startCallCount++; | ||
return Task.FromResult(code); | ||
} | ||
public override Task<bool> IsContainerRunning(IExecutionContext context, string containerId) | ||
{ | ||
bool running = _runningOnRetry[Math.Min(_runningCallCount, _runningOnRetry.Length - 1)]; | ||
_runningCallCount++; | ||
return Task.FromResult(running); | ||
} | ||
} | ||
|
||
private void SetupEnvironmentVariables(string allowDockerActionRetries) | ||
{ | ||
var environment = new SystemEnvironment(); | ||
environment.SetEnvironmentVariable("VSTSAGENT_DOCKER_ACTION_RETRIES", allowDockerActionRetries); | ||
_ec.Setup(x => x.GetScopedEnvironment()).Returns(environment); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any specific reason why we have made this function virtual?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Made the function virtual to override it in the unit test class DockerCommandManagerL0Tests.cs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of overriding a function in unit tests, a better practice is to mock the function in test with given input/output.