@@ -209,9 +209,14 @@ public async Task<int> DockerStart(IExecutionContext context, string containerId
209
209
ArgUtil . NotNull ( context , nameof ( context ) ) ;
210
210
ArgUtil . NotNull ( containerId , nameof ( containerId ) ) ;
211
211
212
- var action = new Func < Task < int > > ( async ( ) => await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ) ;
213
- const string command = "Docker start" ;
214
- return await ExecuteDockerCommandAsyncWithRetries ( context , action , command ) ;
212
+ if ( ! AgentKnobs . CheckBeforeRetryDockerStart . GetValue ( context ) . AsBoolean ( ) )
213
+ {
214
+ var action = new Func < Task < int > > ( async ( ) => await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ) ;
215
+ const string command = "Docker start" ;
216
+ return await ExecuteDockerCommandAsyncWithRetries ( context , action , command ) ;
217
+ }
218
+ // Use the new helper for start with retries and running-state checks
219
+ return await ExecuteDockerStartWithRetriesAndCheck ( context , containerId ) ;
215
220
}
216
221
217
222
public async Task < int > DockerRemove ( IExecutionContext context , string containerId )
@@ -533,5 +538,60 @@ private static async Task<List<string>> ExecuteDockerCommandAsyncWithRetries(IEx
533
538
534
539
return output ;
535
540
}
541
+
542
+ /// <summary>
543
+ /// Executes 'docker start' with retries, checking if the container is already running before each retry.
544
+ /// Returns 0 if the container is running or started successfully, otherwise returns the last exit code.
545
+ /// </summary>
546
+ private async Task < int > ExecuteDockerStartWithRetriesAndCheck ( IExecutionContext context , string containerId )
547
+ {
548
+ bool dockerActionRetries = AgentKnobs . DockerActionRetries . GetValue ( context ) . AsBoolean ( ) ;
549
+ context . Output ( $ "DockerActionRetries variable value: { dockerActionRetries } ") ;
550
+
551
+ int retryCount = 0 ;
552
+ const int maxRetries = 3 ;
553
+ TimeSpan delayInSeconds = TimeSpan . FromSeconds ( 10 ) ;
554
+ int exitCode = 0 ;
555
+
556
+ while ( retryCount < maxRetries )
557
+ {
558
+ // Check if container is already running before attempting to start
559
+ if ( await IsContainerRunning ( context , containerId ) )
560
+ {
561
+ context . Output ( $ "Container { containerId } is running before attempt { retryCount + 1 } .") ;
562
+ break ;
563
+ }
564
+
565
+ exitCode = await ExecuteDockerCommandAsync ( context , "start" , containerId , context . CancellationToken ) ;
566
+ if ( exitCode == 0 || ! dockerActionRetries )
567
+ {
568
+ break ;
569
+ }
570
+
571
+ context . Warning ( $ "Docker start failed with exit code { exitCode } , back off { delayInSeconds } seconds before retry.") ;
572
+ retryCount ++ ;
573
+ await Task . Delay ( delayInSeconds ) ;
574
+
575
+ }
576
+
577
+ // handle the case where container is already running after retries but exit code is not 0
578
+ if ( exitCode != 0 && await IsContainerRunning ( context , containerId ) )
579
+ {
580
+ context . Output ( $ "Container { containerId } is already running after { retryCount } retries. but exit code was { exitCode } .") ;
581
+ exitCode = 0 ; // Indicate success
582
+ }
583
+ // If the container is still not running after retries, log a warning
584
+ if ( exitCode != 0 )
585
+ {
586
+ context . Warning ( $ "Container { containerId } is not running after { retryCount } retries. Last exit code: { exitCode } ") ;
587
+ }
588
+ else
589
+ {
590
+ context . Output ( $ "Container { containerId } started successfully after { retryCount } retries.") ;
591
+ }
592
+ //return the exit code
593
+ context . Debug ( $ "Docker start completed with exit code { exitCode } .") ;
594
+ return exitCode ;
595
+ }
536
596
}
537
597
}
0 commit comments