Skip to content

Commit 545a4bc

Browse files
yawangwangjkl73
andauthored
Apply retry logics in confidential computing API + workload image puller (#511)
* Apply retry logics in launcher * remove retry in agent --------- Co-authored-by: Jiankun Lü <jiankun@google.com>
1 parent 82b45ad commit 545a4bc

File tree

7 files changed

+410
-92
lines changed

7 files changed

+410
-92
lines changed

go.work.sum

Lines changed: 194 additions & 0 deletions
Large diffs are not rendered by default.

launcher/container_runner.go

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,12 @@ func getSignatureDiscoveryClient(cdClient *containerd.Client, mdsClient *metadat
267267
return registryauth.RefreshResolver(ctx, mdsClient)
268268
}
269269
imageFetcher := func(ctx context.Context, imageRef string, opts ...containerd.RemoteOpt) (containerd.Image, error) {
270-
image, err := cdClient.Pull(ctx, imageRef, opts...)
270+
image, err := pullImageWithRetries(
271+
func() (containerd.Image, error) {
272+
return cdClient.Pull(ctx, imageRef, opts...)
273+
},
274+
pullImageBackoffPolicy,
275+
)
271276
if err != nil {
272277
return nil, fmt.Errorf("cannot pull signature objects from the signature image [%s]: %w", imageRef, err)
273278
}
@@ -529,6 +534,11 @@ func defaultRetryPolicy() *backoff.ExponentialBackOff {
529534
return expBack
530535
}
531536

537+
func pullImageBackoffPolicy() backoff.BackOff {
538+
b := backoff.NewConstantBackOff(time.Millisecond * 500)
539+
return backoff.WithMaxRetries(b, 3)
540+
}
541+
532542
// Run the container
533543
// Container output will always be redirected to logger writer for now
534544
func (r *ContainerRunner) Run(ctx context.Context) error {
@@ -621,17 +631,39 @@ func (r *ContainerRunner) Run(ctx context.Context) error {
621631
return nil
622632
}
623633

634+
func pullImageWithRetries(f func() (containerd.Image, error), retry func() backoff.BackOff) (containerd.Image, error) {
635+
var err error
636+
var image containerd.Image
637+
err = backoff.Retry(func() error {
638+
image, err = f()
639+
return err
640+
}, retry())
641+
if err != nil {
642+
return nil, fmt.Errorf("failed to pull image with retries, the last error is: %w", err)
643+
}
644+
return image, nil
645+
}
646+
624647
func initImage(ctx context.Context, cdClient *containerd.Client, launchSpec spec.LaunchSpec, token oauth2.Token) (containerd.Image, error) {
625648
if token.Valid() {
626649
remoteOpt := containerd.WithResolver(registryauth.Resolver(token.AccessToken))
627-
628-
image, err := cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack, remoteOpt)
650+
image, err := pullImageWithRetries(
651+
func() (containerd.Image, error) {
652+
return cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack, remoteOpt)
653+
},
654+
pullImageBackoffPolicy,
655+
)
629656
if err != nil {
630657
return nil, fmt.Errorf("cannot pull the image: %w", err)
631658
}
632659
return image, nil
633660
}
634-
image, err := cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack)
661+
image, err := pullImageWithRetries(
662+
func() (containerd.Image, error) {
663+
return cdClient.Pull(ctx, launchSpec.ImageRef, containerd.WithPullUnpack)
664+
},
665+
pullImageBackoffPolicy,
666+
)
635667
if err != nil {
636668
return nil, fmt.Errorf("cannot pull the image (no token, only works for a public image): %w", err)
637669
}

launcher/container_runner_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,57 @@ func TestMeasureCELEvents(t *testing.T) {
638638
}
639639
}
640640

641+
func TestPullImageWithRetries(t *testing.T) {
642+
testCases := []struct {
643+
name string
644+
imagePuller func(int) (containerd.Image, error)
645+
wantPass bool
646+
}{
647+
{
648+
name: "success with single attempt",
649+
imagePuller: func(int) (containerd.Image, error) { return &fakeImage{}, nil },
650+
wantPass: true,
651+
},
652+
{
653+
name: "failure then success",
654+
imagePuller: func(attempts int) (containerd.Image, error) {
655+
if attempts%2 == 1 {
656+
return nil, errors.New("fake error")
657+
}
658+
return &fakeImage{}, nil
659+
},
660+
wantPass: true,
661+
},
662+
{
663+
name: "failure with attempts exceeded",
664+
imagePuller: func(int) (containerd.Image, error) {
665+
return nil, errors.New("fake error")
666+
},
667+
wantPass: false,
668+
},
669+
}
670+
671+
for _, tc := range testCases {
672+
t.Run(tc.name, func(t *testing.T) {
673+
retryPolicy := func() backoff.BackOff {
674+
b := backoff.NewExponentialBackOff()
675+
return backoff.WithMaxRetries(b, 2)
676+
}
677+
678+
attempts := 0
679+
_, err := pullImageWithRetries(
680+
func() (containerd.Image, error) {
681+
attempts++
682+
return tc.imagePuller(attempts)
683+
},
684+
retryPolicy)
685+
if gotPass := (err == nil); gotPass != tc.wantPass {
686+
t.Errorf("pullImageWithRetries failed, got %v, but want %v", gotPass, tc.wantPass)
687+
}
688+
})
689+
}
690+
}
691+
641692
// This ensures fakeContainer implements containerd.Container interface.
642693
var _ containerd.Container = &fakeContainer{}
643694

launcher/go.mod

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ module github.com/google/go-tpm-tools/launcher
33
go 1.21
44

55
require (
6-
cloud.google.com/go/compute/metadata v0.5.0
7-
cloud.google.com/go/logging v1.10.0
6+
cloud.google.com/go/compute/metadata v0.5.2
7+
cloud.google.com/go/logging v1.12.0
88
github.com/cenkalti/backoff/v4 v4.2.1
99
github.com/containerd/containerd v1.7.16
1010
github.com/coreos/go-systemd/v22 v22.5.0
@@ -16,18 +16,18 @@ require (
1616
github.com/opencontainers/go-digest v1.0.0
1717
github.com/opencontainers/image-spec v1.1.0
1818
github.com/opencontainers/runtime-spec v1.1.0
19-
golang.org/x/oauth2 v0.21.0
20-
google.golang.org/api v0.189.0
21-
google.golang.org/genproto/googleapis/api v0.0.0-20240722135656-d784300faade
22-
google.golang.org/protobuf v1.34.2
19+
golang.org/x/oauth2 v0.23.0
20+
google.golang.org/api v0.205.0
21+
google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53
22+
google.golang.org/protobuf v1.35.1
2323
)
2424

2525
require (
26-
cloud.google.com/go v0.115.0 // indirect
27-
cloud.google.com/go/auth v0.7.2 // indirect
28-
cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect
26+
cloud.google.com/go v0.116.0 // indirect
27+
cloud.google.com/go/auth v0.10.1 // indirect
28+
cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect
2929
cloud.google.com/go/confidentialcomputing v1.6.0 // indirect
30-
cloud.google.com/go/longrunning v0.5.9 // indirect
30+
cloud.google.com/go/longrunning v0.6.1 // indirect
3131
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
3232
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 // indirect
3333
github.com/Microsoft/go-winio v0.6.1 // indirect
@@ -55,9 +55,9 @@ require (
5555
github.com/google/go-tdx-guest v0.3.2-0.20241009005452-097ee70d0843 // indirect
5656
github.com/google/go-tspi v0.3.0 // indirect
5757
github.com/google/logger v1.1.1 // indirect
58-
github.com/google/s2a-go v0.1.7 // indirect
58+
github.com/google/s2a-go v0.1.8 // indirect
5959
github.com/google/uuid v1.6.0 // indirect
60-
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
60+
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
6161
github.com/googleapis/gax-go/v2 v2.13.0 // indirect
6262
github.com/klauspost/compress v1.16.7 // indirect
6363
github.com/moby/locker v1.0.1 // indirect
@@ -69,24 +69,24 @@ require (
6969
github.com/pkg/errors v0.9.1 // indirect
7070
github.com/sirupsen/logrus v1.9.3 // indirect
7171
go.opencensus.io v0.24.0 // indirect
72-
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
73-
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
74-
go.opentelemetry.io/otel v1.24.0 // indirect
75-
go.opentelemetry.io/otel/metric v1.24.0 // indirect
76-
go.opentelemetry.io/otel/trace v1.24.0 // indirect
72+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
73+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
74+
go.opentelemetry.io/otel v1.29.0 // indirect
75+
go.opentelemetry.io/otel/metric v1.29.0 // indirect
76+
go.opentelemetry.io/otel/trace v1.29.0 // indirect
7777
go.uber.org/multierr v1.11.0 // indirect
7878
golang.org/x/crypto v0.31.0 // indirect
7979
golang.org/x/exp v0.0.0-20240531132922-fd00a4e0eefc // indirect
8080
golang.org/x/mod v0.17.0 // indirect
81-
golang.org/x/net v0.27.0 // indirect
81+
golang.org/x/net v0.30.0 // indirect
8282
golang.org/x/sync v0.10.0 // indirect
8383
golang.org/x/sys v0.28.0 // indirect
8484
golang.org/x/text v0.21.0 // indirect
85-
golang.org/x/time v0.5.0 // indirect
85+
golang.org/x/time v0.7.0 // indirect
8686
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
87-
google.golang.org/genproto v0.0.0-20240722135656-d784300faade // indirect
88-
google.golang.org/genproto/googleapis/rpc v0.0.0-20240722135656-d784300faade // indirect
89-
google.golang.org/grpc v1.64.1 // indirect
87+
google.golang.org/genproto v0.0.0-20241021214115-324edc3d5d38 // indirect
88+
google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect
89+
google.golang.org/grpc v1.67.1 // indirect
9090
)
9191

9292
replace (

0 commit comments

Comments
 (0)