diff --git a/framework/.changeset/v0.9.1.md b/framework/.changeset/v0.9.1.md new file mode 100644 index 000000000..b583dc989 --- /dev/null +++ b/framework/.changeset/v0.9.1.md @@ -0,0 +1 @@ +- Try to start EVM blockchain, JD, CL node and Postgres containers 3x \ No newline at end of file diff --git a/framework/components/blockchain/containers.go b/framework/components/blockchain/containers.go index 8a7e1ff7f..2d44a72a4 100644 --- a/framework/components/blockchain/containers.go +++ b/framework/components/blockchain/containers.go @@ -64,7 +64,7 @@ func baseRequest(in *Input, useWS ExposeWs) testcontainers.ContainerRequest { func createGenericEvmContainer(in *Input, req testcontainers.ContainerRequest, useWS bool) (*Output, error) { ctx := context.Background() - c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + c, err := framework.StartContainerWithRetry(ctx, framework.L, testcontainers.GenericContainerRequest{ ContainerRequest: req, Started: true, }) diff --git a/framework/components/clnode/clnode.go b/framework/components/clnode/clnode.go index 9589381fe..d2307de14 100644 --- a/framework/components/clnode/clnode.go +++ b/framework/components/clnode/clnode.go @@ -324,7 +324,7 @@ func newNode(in *Input, pgOut *postgres.Output) (*NodeOut, error) { } req.KeepImage = false } - c, err := tc.GenericContainer(ctx, tc.GenericContainerRequest{ + c, err := framework.StartContainerWithRetry(ctx, framework.L, tc.GenericContainerRequest{ ContainerRequest: req, Started: true, }) diff --git a/framework/components/jd/jd.go b/framework/components/jd/jd.go index baec2b8d2..032fb09ee 100644 --- a/framework/components/jd/jd.go +++ b/framework/components/jd/jd.go @@ -116,10 +116,12 @@ func NewJD(in *Input) (*Output, error) { } req.KeepImage = false } - c, err := tc.GenericContainer(ctx, tc.GenericContainerRequest{ + + c, err := framework.StartContainerWithRetry(ctx, framework.L, tc.GenericContainerRequest{ ContainerRequest: req, Started: true, }) + if err != nil { return nil, err } diff --git a/framework/components/postgres/postgres.go b/framework/components/postgres/postgres.go index d01b1cd04..32118fa86 100644 --- a/framework/components/postgres/postgres.go +++ b/framework/components/postgres/postgres.go @@ -153,7 +153,7 @@ func NewPostgreSQL(in *Input) (*Output, error) { } framework.ResourceLimitsFunc(h, in.ContainerResources) } - c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{ + c, err := framework.StartContainerWithRetry(ctx, framework.L, testcontainers.GenericContainerRequest{ ContainerRequest: req, Started: true, Reuse: true, diff --git a/framework/docker.go b/framework/docker.go index ea790eca1..21e0d77dd 100644 --- a/framework/docker.go +++ b/framework/docker.go @@ -21,6 +21,7 @@ import ( "github.com/docker/docker/client" "github.com/docker/go-connections/nat" "github.com/google/uuid" + "github.com/pkg/errors" "github.com/rs/zerolog" tc "github.com/testcontainers/testcontainers-go" "golang.org/x/sync/errgroup" @@ -446,3 +447,94 @@ func NoDNS(noDNS bool, hc *container.HostConfig) { hc.DNS = []string{"127.0.0.1"} } } + +// Retry functions copied from lib/docker/docker.go to avoid depending on that package +type StartContainerRetrier func(l zerolog.Logger, ctx context.Context, startErr error, req tc.GenericContainerRequest) (tc.Container, error) + +// NaiveRetrier is a simple retrier that tries to start the container again without any modifications. +// It will remove the container if it exists and try to start it again. +var NaiveRetrier = func(l zerolog.Logger, ctx context.Context, startErr error, req tc.GenericContainerRequest) (tc.Container, error) { + l.Debug(). + Str("Start error", startErr.Error()). + Str("Retrier", "NaiveRetrier"). + Msgf("Attempting to start %s container", req.Name) + + req.Reuse = false // We need to force a new container to be created + + removeErr := removeContainer(ctx, req) + if removeErr != nil { + l.Error().Err(removeErr).Msgf("Failed to remove %s container to initiate restart", req.Name) + return nil, removeErr + } + + ct, err := tc.GenericContainer(ctx, req) + if err == nil { + l.Debug(). + Str("Retrier", "NaiveRetrier"). + Msgf("Successfully started %s container", req.Name) + return ct, nil + } + if ct != nil { + err := ct.Terminate(ctx) + if err != nil { + l.Error(). + Err(err). + Msgf("Cannot terminate %s container to initiate restart", req.Name) + return nil, err + } + } + + l.Debug(). + Str("Original start error", startErr.Error()). + Str("Current start error", err.Error()). + Str("Retrier", "NaiveRetrier"). + Msgf("Failed to start %s container,", req.Name) + + return nil, startErr +} + +// StartContainerWithRetry attempts to start a container with 3 retry attempts. +// It will try to start the container with the provided retriers, if none are provided it will use the default retrier, which +// simply tries to start the container again without any modifications. +func StartContainerWithRetry(ctx context.Context, l zerolog.Logger, req tc.GenericContainerRequest, retriers ...StartContainerRetrier) (tc.Container, error) { + var ( + ct tc.Container + err error + ) + + ct, err = tc.GenericContainer(ctx, req) + if err == nil { + return ct, nil + } + + if len(retriers) == 0 { + retriers = append(retriers, NaiveRetrier) + } + + l.Warn().Err(err).Msgf("Cannot start %s container, retrying", req.Name) + + req.Reuse = true // Try and see if we can reuse the container for a retry + for _, retrier := range retriers { + ct, err = retrier(l, ctx, err, req) + if err == nil { + return ct, nil + } + } + + return nil, err +} + +func removeContainer(ctx context.Context, req tc.GenericContainerRequest) error { + provider, providerErr := tc.NewDockerProvider() + if providerErr != nil { + return errors.Wrapf(providerErr, "failed to create Docker provider") + } + + removeErr := provider.Client().ContainerRemove(ctx, req.Name, container.RemoveOptions{Force: true}) + if removeErr != nil && strings.Contains(strings.ToLower(removeErr.Error()), "no such container") { + // container doesn't exist, nothing to remove + return nil + } + + return removeErr +}