@@ -21,6 +21,7 @@ import (
2121 "github.com/docker/docker/client"
2222 "github.com/docker/go-connections/nat"
2323 "github.com/google/uuid"
24+ "github.com/pkg/errors"
2425 "github.com/rs/zerolog"
2526 tc "github.com/testcontainers/testcontainers-go"
2627 "golang.org/x/sync/errgroup"
@@ -446,3 +447,94 @@ func NoDNS(noDNS bool, hc *container.HostConfig) {
446447 hc .DNS = []string {"127.0.0.1" }
447448 }
448449}
450+
451+ // Retry functions copied from lib/docker/docker.go to avoid depending on that package
452+ type StartContainerRetrier func (l zerolog.Logger , ctx context.Context , startErr error , req tc.GenericContainerRequest ) (tc.Container , error )
453+
454+ // NaiveRetrier is a simple retrier that tries to start the container again without any modifications.
455+ // It will remove the container if it exists and try to start it again.
456+ var NaiveRetrier = func (l zerolog.Logger , ctx context.Context , startErr error , req tc.GenericContainerRequest ) (tc.Container , error ) {
457+ l .Debug ().
458+ Str ("Start error" , startErr .Error ()).
459+ Str ("Retrier" , "NaiveRetrier" ).
460+ Msgf ("Attempting to start %s container" , req .Name )
461+
462+ req .Reuse = false // We need to force a new container to be created
463+
464+ removeErr := removeContainer (ctx , req )
465+ if removeErr != nil {
466+ l .Error ().Err (removeErr ).Msgf ("Failed to remove %s container to initiate restart" , req .Name )
467+ return nil , removeErr
468+ }
469+
470+ ct , err := tc .GenericContainer (ctx , req )
471+ if err == nil {
472+ l .Debug ().
473+ Str ("Retrier" , "NaiveRetrier" ).
474+ Msgf ("Successfully started %s container" , req .Name )
475+ return ct , nil
476+ }
477+ if ct != nil {
478+ err := ct .Terminate (ctx )
479+ if err != nil {
480+ l .Error ().
481+ Err (err ).
482+ Msgf ("Cannot terminate %s container to initiate restart" , req .Name )
483+ return nil , err
484+ }
485+ }
486+
487+ l .Debug ().
488+ Str ("Original start error" , startErr .Error ()).
489+ Str ("Current start error" , err .Error ()).
490+ Str ("Retrier" , "NaiveRetrier" ).
491+ Msgf ("Failed to start %s container," , req .Name )
492+
493+ return nil , startErr
494+ }
495+
496+ // StartContainerWithRetry attempts to start a container with 3 retry attempts.
497+ // It will try to start the container with the provided retriers, if none are provided it will use the default retrier, which
498+ // simply tries to start the container again without any modifications.
499+ func StartContainerWithRetry (l zerolog.Logger , ctx context.Context , req tc.GenericContainerRequest , retriers ... StartContainerRetrier ) (tc.Container , error ) {
500+ var (
501+ ct tc.Container
502+ err error
503+ )
504+
505+ ct , err = tc .GenericContainer (ctx , req )
506+ if err == nil {
507+ return ct , nil
508+ }
509+
510+ if len (retriers ) == 0 {
511+ retriers = append (retriers , NaiveRetrier )
512+ }
513+
514+ l .Warn ().Err (err ).Msgf ("Cannot start %s container, retrying" , req .Name )
515+
516+ req .Reuse = true // Try and see if we can reuse the container for a retry
517+ for _ , retrier := range retriers {
518+ ct , err = retrier (l , ctx , err , req )
519+ if err == nil {
520+ return ct , nil
521+ }
522+ }
523+
524+ return nil , err
525+ }
526+
527+ func removeContainer (ctx context.Context , req tc.GenericContainerRequest ) error {
528+ provider , providerErr := tc .NewDockerProvider ()
529+ if providerErr != nil {
530+ return errors .Wrapf (providerErr , "failed to create Docker provider" )
531+ }
532+
533+ removeErr := provider .Client ().ContainerRemove (ctx , req .Name , container.RemoveOptions {Force : true })
534+ if removeErr != nil && strings .Contains (strings .ToLower (removeErr .Error ()), "no such container" ) {
535+ // container doesn't exist, nothing to remove
536+ return nil
537+ }
538+
539+ return removeErr
540+ }
0 commit comments