Skip to content

Commit 856743c

Browse files
Retry docker pull expontentially (#773)
docker pull commands sometimes fail with network-related flakiness. This retries them all to avoid us seeing tests fail because the k3s image couldn't be pulled, for example.
1 parent 7d974d4 commit 856743c

File tree

2 files changed

+51
-16
lines changed

2 files changed

+51
-16
lines changed

internal/docker/docker.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,33 @@ func (d *Client) pull(ctx context.Context, ref name.Reference) error {
472472
return fmt.Errorf("marshaling auth data: %w", err)
473473
}
474474

475-
pull, err := d.inner.ImagePull(ctx, ref.Name(), image.PullOptions{
476-
RegistryAuth: base64.URLEncoding.EncodeToString(authdata),
477-
})
478-
if err != nil {
479-
return err
480-
}
475+
var lastErr error
476+
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
477+
Duration: 1 * time.Second,
478+
Factor: 2.0,
479+
Jitter: 0.1,
480+
Steps: 5,
481+
Cap: 1 * time.Minute,
482+
}, func(ctx context.Context) (bool, error) {
483+
pull, err := d.inner.ImagePull(ctx, ref.Name(), image.PullOptions{
484+
RegistryAuth: base64.URLEncoding.EncodeToString(authdata),
485+
})
486+
if err != nil {
487+
clog.WarnContext(ctx, "failed to pull image, retrying", "ref", ref.Name(), "error", err)
488+
lastErr = err
489+
return false, nil
490+
}
491+
492+
// Block until the image is pulled by discarding the reader
493+
if _, err := io.Copy(io.Discard, pull); err != nil {
494+
clog.WarnContext(ctx, "failed to pull image, retrying", "ref", ref.Name(), "error", err)
495+
lastErr = err
496+
return false, nil
497+
}
481498

482-
// Block until the image is pulled by discarding the reader
483-
if _, err := io.Copy(io.Discard, pull); err != nil {
484-
return fmt.Errorf("pulling image: %w", err)
499+
return true, nil
500+
}); err != nil {
501+
return fmt.Errorf("pulling image: %w: last error: %w", err, lastErr)
485502
}
486503

487504
return nil

internal/drivers/ec2/docker.go

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/google/go-containerregistry/pkg/authn"
2424
"github.com/google/go-containerregistry/pkg/name"
2525
v1 "github.com/moby/docker-image-spec/specs-go/v1"
26+
"k8s.io/apimachinery/pkg/util/wait"
2627
)
2728

2829
func (d *driver) dockerClient(ctx context.Context) (*client.Client, error) {
@@ -75,14 +76,31 @@ func (d *driver) pullImage(ctx context.Context, cli *client.Client, ref name.Ref
7576
}
7677
authStr := base64.URLEncoding.EncodeToString(authJSON)
7778

78-
result, err := cli.ImagePull(ctx, ref.Name(), image.PullOptions{RegistryAuth: authStr})
79-
if err != nil {
80-
return fmt.Errorf("pulling image: %w", err)
81-
}
82-
defer result.Close()
79+
var lastErr error
80+
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
81+
Duration: 1 * time.Second,
82+
Factor: 2.0,
83+
Jitter: 0.1,
84+
Steps: 5,
85+
Cap: 1 * time.Minute,
86+
}, func(ctx context.Context) (bool, error) {
87+
result, err := cli.ImagePull(ctx, ref.Name(), image.PullOptions{RegistryAuth: authStr})
88+
if err != nil {
89+
clog.WarnContext(ctx, "failed to pull image, retrying", "ref", ref.Name(), "error", err)
90+
lastErr = err
91+
return false, nil
92+
}
93+
defer result.Close()
8394

84-
if _, err := io.Copy(io.Discard, result); err != nil {
85-
return fmt.Errorf("reading pull response: %w", err)
95+
if _, err := io.Copy(io.Discard, result); err != nil {
96+
clog.WarnContext(ctx, "failed to pull image, retrying", "ref", ref.Name(), "error", err)
97+
lastErr = err
98+
return false, nil
99+
}
100+
101+
return true, nil
102+
}); err != nil {
103+
return fmt.Errorf("pulling image: %w: last error: %w", err, lastErr)
86104
}
87105

88106
return nil

0 commit comments

Comments
 (0)