Skip to content

Commit f911fb2

Browse files
committed
[image-builder] Use req.GetUseRetryClient() instead of feature flag resolved on startup
[image-builder] Drop superfluous imports [image-builder] Switch to github.com/hashicorp/go-retryablehttp, incl. better unit tests at orchestrator level!
1 parent 37a1cdb commit f911fb2

File tree

3 files changed

+292
-22
lines changed

3 files changed

+292
-22
lines changed

components/image-builder-mk3/pkg/orchestrator/orchestrator.go

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"errors"
1212
"fmt"
1313
"io"
14+
"net/http"
1415
"os"
1516
"path/filepath"
1617
"sort"
@@ -21,6 +22,7 @@ import (
2122
"github.com/aws/aws-sdk-go-v2/service/ecr"
2223
"github.com/distribution/reference"
2324
"github.com/google/uuid"
25+
"github.com/hashicorp/go-retryablehttp"
2426
"github.com/opentracing/opentracing-go"
2527
"github.com/sirupsen/logrus"
2628
"golang.org/x/xerrors"
@@ -106,6 +108,8 @@ func NewOrchestratingBuilder(cfg config.Configuration) (res *Orchestrator, err e
106108
wsman = wsmanapi.NewWorkspaceManagerClient(conn)
107109
}
108110

111+
retryResolveClient := NewRetryTimeoutClient()
112+
109113
o := &Orchestrator{
110114
Config: cfg,
111115
Auth: authentication,
@@ -115,6 +119,8 @@ func NewOrchestratingBuilder(cfg config.Configuration) (res *Orchestrator, err e
115119
},
116120
RefResolver: &resolve.StandaloneRefResolver{},
117121

122+
retryResolveClient: retryResolveClient,
123+
118124
wsman: wsman,
119125
buildListener: make(map[string]map[buildListener]struct{}),
120126
logListener: make(map[string]map[logListener]struct{}),
@@ -133,6 +139,8 @@ type Orchestrator struct {
133139
AuthResolver auth.Resolver
134140
RefResolver resolve.DockerRefResolver
135141

142+
retryResolveClient *http.Client
143+
136144
wsman wsmanapi.WorkspaceManagerClient
137145

138146
buildListener map[string]map[buildListener]struct{}
@@ -161,7 +169,7 @@ func (o *Orchestrator) ResolveBaseImage(ctx context.Context, req *protocol.Resol
161169

162170
reqauth := o.AuthResolver.ResolveRequestAuth(ctx, req.Auth)
163171

164-
refstr, err := o.getAbsoluteImageRef(ctx, req.Ref, reqauth)
172+
refstr, err := o.getAbsoluteImageRef(ctx, req.Ref, reqauth, req.GetUseRetryClient())
165173
if err != nil {
166174
return nil, err
167175
}
@@ -178,7 +186,8 @@ func (o *Orchestrator) ResolveWorkspaceImage(ctx context.Context, req *protocol.
178186
tracing.LogRequestSafe(span, req)
179187

180188
reqauth := o.AuthResolver.ResolveRequestAuth(ctx, req.Auth)
181-
baseref, err := o.getBaseImageRef(ctx, req.Source, reqauth)
189+
useRetryClient := req.GetUseRetryClient()
190+
baseref, err := o.getBaseImageRef(ctx, req.Source, reqauth, useRetryClient)
182191
if _, ok := status.FromError(err); err != nil && ok {
183192
return nil, err
184193
}
@@ -197,7 +206,7 @@ func (o *Orchestrator) ResolveWorkspaceImage(ctx context.Context, req *protocol.
197206
if err != nil {
198207
return nil, status.Errorf(codes.Internal, "cannot get workspace image authentication: %v", err)
199208
}
200-
exists, err := o.checkImageExists(ctx, refstr, auth)
209+
exists, err := o.checkImageExists(ctx, refstr, auth, useRetryClient)
201210
if err != nil {
202211
return nil, status.Errorf(codes.Internal, "cannot resolve workspace image: %s", err.Error())
203212
}
@@ -228,8 +237,8 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
228237

229238
// resolve build request authentication
230239
reqauth := o.AuthResolver.ResolveRequestAuth(ctx, req.Auth)
231-
232-
log.WithField("forceRebuild", req.GetForceRebuild()).WithField("baseImageNameResolved", req.BaseImageNameResolved).Info("build request")
240+
useRetryClient := req.GetUseRetryClient()
241+
log.WithField("forceRebuild", req.GetForceRebuild()).WithField("baseImageNameResolved", req.BaseImageNameResolved).WithField("useRetryClient", useRetryClient).Info("build request")
233242

234243
// resolve to ref to baseImageNameResolved (if it exists)
235244
if req.BaseImageNameResolved != "" && !req.GetForceRebuild() {
@@ -249,7 +258,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
249258
}
250259

251260
// check if needs build -> early return
252-
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth)
261+
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth, useRetryClient)
253262
if err != nil {
254263
return status.Errorf(codes.Internal, "cannot check if image is already built: %q", err)
255264
}
@@ -264,7 +273,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
264273
}
265274
return nil
266275
}
267-
baseref, err := o.getAbsoluteImageRef(ctx, req.BaseImageNameResolved, reqauth)
276+
baseref, err := o.getAbsoluteImageRef(ctx, req.BaseImageNameResolved, reqauth, useRetryClient)
268277
if err == nil {
269278
req.Source.From = &protocol.BuildSource_Ref{
270279
Ref: &protocol.BuildSourceReference{
@@ -275,7 +284,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
275284
}
276285

277286
log.Info("falling through to old way of building")
278-
baseref, err := o.getBaseImageRef(ctx, req.Source, reqauth)
287+
baseref, err := o.getBaseImageRef(ctx, req.Source, reqauth, useRetryClient)
279288
if _, ok := status.FromError(err); err != nil && ok {
280289
log.WithError(err).Error("gRPC status error")
281290
return err
@@ -295,7 +304,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
295304
}
296305

297306
// check if needs build -> early return
298-
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth)
307+
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth, req.GetUseRetryClient())
299308
if err != nil {
300309
return status.Errorf(codes.Internal, "cannot check if image is already built: %q", err)
301310
}
@@ -465,7 +474,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
465474
// "cannot pull from reg.gitpod.io" error message. Instead the image-build should fail properly.
466475
// To do this, we resolve the built image afterwards to ensure it was actually built.
467476
if update.Status == protocol.BuildStatus_done_success {
468-
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth)
477+
exists, err := o.checkImageExists(ctx, wsrefstr, wsrefAuth, useRetryClient)
469478
if err != nil {
470479
update.Status = protocol.BuildStatus_done_failure
471480
update.Message = fmt.Sprintf("cannot check if workspace image exists after the build: %v", err)
@@ -582,12 +591,12 @@ func (o *Orchestrator) ListBuilds(ctx context.Context, req *protocol.ListBuildsR
582591
return &protocol.ListBuildsResponse{Builds: res}, nil
583592
}
584593

585-
func (o *Orchestrator) checkImageExists(ctx context.Context, ref string, authentication *auth.Authentication) (exists bool, err error) {
594+
func (o *Orchestrator) checkImageExists(ctx context.Context, ref string, authentication *auth.Authentication, useRetryClient bool) (exists bool, err error) {
586595
span, ctx := opentracing.StartSpanFromContext(ctx, "checkImageExists")
587596
defer tracing.FinishSpan(span, &err)
588597
span.SetTag("ref", ref)
589598

590-
_, err = o.RefResolver.Resolve(ctx, ref, resolve.WithAuthentication(authentication))
599+
_, err = o.RefResolver.Resolve(ctx, ref, resolve.WithAuthentication(authentication), o.withRetryIfEnabled(useRetryClient))
591600
if errors.Is(err, resolve.ErrNotFound) {
592601
return false, nil
593602
}
@@ -602,18 +611,19 @@ func (o *Orchestrator) checkImageExists(ctx context.Context, ref string, authent
602611
}
603612

604613
// getAbsoluteImageRef returns the "digest" form of an image, i.e. contains no mutable image tags
605-
func (o *Orchestrator) getAbsoluteImageRef(ctx context.Context, ref string, allowedAuth auth.AllowedAuthFor) (res string, err error) {
606-
span, ctx := opentracing.StartSpanFromContext(ctx, "getAbsoluteImageRef")
614+
func (o *Orchestrator) getAbsoluteImageRef(ctx context.Context, ref string, allowedAuth auth.AllowedAuthFor, useRetryClient bool) (res string, err error) {
615+
span, ctx := opentracing.StartSpanFromContext(ctx, "getAbsoluteImageRefWithResolver")
607616
defer tracing.FinishSpan(span, &err)
608617
span.LogKV("ref", ref)
618+
span.LogKV("useRetryClient", useRetryClient)
609619

610-
log.WithField("ref", ref).Debug("getAbsoluteImageRef")
620+
log.WithField("ref", ref).WithField("useRetryClient", useRetryClient).Debug("getAbsoluteImageRefWithResolver")
611621
auth, err := allowedAuth.GetAuthFor(ctx, o.Auth, ref)
612622
if err != nil {
613623
return "", status.Errorf(codes.InvalidArgument, "cannt resolve base image ref: %v", err)
614624
}
615625

616-
ref, err = o.RefResolver.Resolve(ctx, ref, resolve.WithAuthentication(auth))
626+
ref, err = o.RefResolver.Resolve(ctx, ref, resolve.WithAuthentication(auth), o.withRetryIfEnabled(useRetryClient))
617627
if errors.Is(err, resolve.ErrNotFound) {
618628
return "", status.Error(codes.NotFound, "cannot resolve image")
619629
}
@@ -634,13 +644,20 @@ func (o *Orchestrator) getAbsoluteImageRef(ctx context.Context, ref string, allo
634644
return ref, nil
635645
}
636646

637-
func (o *Orchestrator) getBaseImageRef(ctx context.Context, bs *protocol.BuildSource, allowedAuth auth.AllowedAuthFor) (res string, err error) {
647+
func (o *Orchestrator) withRetryIfEnabled(useRetryClient bool) resolve.DockerRefResolverOption {
648+
if useRetryClient {
649+
return resolve.WithHttpClient(o.retryResolveClient)
650+
}
651+
return resolve.WithHttpClient(nil)
652+
}
653+
654+
func (o *Orchestrator) getBaseImageRef(ctx context.Context, bs *protocol.BuildSource, allowedAuth auth.AllowedAuthFor, useRetryClient bool) (res string, err error) {
638655
span, ctx := opentracing.StartSpanFromContext(ctx, "getBaseImageRef")
639656
defer tracing.FinishSpan(span, &err)
640657

641658
switch src := bs.From.(type) {
642659
case *protocol.BuildSource_Ref:
643-
return o.getAbsoluteImageRef(ctx, src.Ref.Ref, allowedAuth)
660+
return o.getAbsoluteImageRef(ctx, src.Ref.Ref, allowedAuth, useRetryClient)
644661

645662
case *protocol.BuildSource_File:
646663
manifest := map[string]string{
@@ -884,3 +901,21 @@ func (o *Orchestrator) PublishLog(buildID string, message string) {
884901
}
885902
}
886903
}
904+
905+
func NewRetryTimeoutClient() *http.Client {
906+
retryClient := retryablehttp.NewClient()
907+
retryClient.Backoff = retryablehttp.LinearJitterBackoff
908+
retryClient.HTTPClient.Timeout = 15 * time.Second
909+
retryClient.RetryMax = 3
910+
retryClient.RetryWaitMin = 500 * time.Millisecond
911+
retryClient.RetryWaitMax = 2 * time.Microsecond
912+
retryClient.CheckRetry = retryablehttp.DefaultRetryPolicy
913+
retryClient.Logger = log.WithField("retry", "true")
914+
915+
// Use a custom transport to handle retries and timeouts
916+
retryClient.HTTPClient.Transport = &http.Transport{
917+
DisableKeepAlives: true, // Disable keep-alives to ensure fresh connections
918+
}
919+
920+
return retryClient.StandardClient()
921+
}

0 commit comments

Comments
 (0)