@@ -11,6 +11,7 @@ import (
1111 "errors"
1212 "fmt"
1313 "io"
14+ "net/http"
1415 "os"
1516 "path/filepath"
1617 "sort"
@@ -21,6 +22,7 @@ import (
2122 "github.com/aws/aws-sdk-go-v2/service/ecr"
2223 "github.com/distribution/reference"
2324 "github.com/google/uuid"
25+ "github.com/hashicorp/go-retryablehttp"
2426 "github.com/opentracing/opentracing-go"
2527 "github.com/sirupsen/logrus"
2628 "golang.org/x/xerrors"
@@ -106,6 +108,8 @@ func NewOrchestratingBuilder(cfg config.Configuration) (res *Orchestrator, err e
106108 wsman = wsmanapi .NewWorkspaceManagerClient (conn )
107109 }
108110
111+ retryResolveClient := NewRetryTimeoutClient ()
112+
109113 o := & Orchestrator {
110114 Config : cfg ,
111115 Auth : authentication ,
@@ -115,6 +119,8 @@ func NewOrchestratingBuilder(cfg config.Configuration) (res *Orchestrator, err e
115119 },
116120 RefResolver : & resolve.StandaloneRefResolver {},
117121
122+ retryResolveClient : retryResolveClient ,
123+
118124 wsman : wsman ,
119125 buildListener : make (map [string ]map [buildListener ]struct {}),
120126 logListener : make (map [string ]map [logListener ]struct {}),
@@ -133,6 +139,8 @@ type Orchestrator struct {
133139 AuthResolver auth.Resolver
134140 RefResolver resolve.DockerRefResolver
135141
142+ retryResolveClient * http.Client
143+
136144 wsman wsmanapi.WorkspaceManagerClient
137145
138146 buildListener map [string ]map [buildListener ]struct {}
@@ -161,7 +169,7 @@ func (o *Orchestrator) ResolveBaseImage(ctx context.Context, req *protocol.Resol
161169
162170 reqauth := o .AuthResolver .ResolveRequestAuth (ctx , req .Auth )
163171
164- refstr , err := o .getAbsoluteImageRef (ctx , req .Ref , reqauth )
172+ refstr , err := o .getAbsoluteImageRef (ctx , req .Ref , reqauth , req . GetUseRetryClient () )
165173 if err != nil {
166174 return nil , err
167175 }
@@ -178,7 +186,8 @@ func (o *Orchestrator) ResolveWorkspaceImage(ctx context.Context, req *protocol.
178186 tracing .LogRequestSafe (span , req )
179187
180188 reqauth := o .AuthResolver .ResolveRequestAuth (ctx , req .Auth )
181- baseref , err := o .getBaseImageRef (ctx , req .Source , reqauth )
189+ useRetryClient := req .GetUseRetryClient ()
190+ baseref , err := o .getBaseImageRef (ctx , req .Source , reqauth , useRetryClient )
182191 if _ , ok := status .FromError (err ); err != nil && ok {
183192 return nil , err
184193 }
@@ -197,7 +206,7 @@ func (o *Orchestrator) ResolveWorkspaceImage(ctx context.Context, req *protocol.
197206 if err != nil {
198207 return nil , status .Errorf (codes .Internal , "cannot get workspace image authentication: %v" , err )
199208 }
200- exists , err := o .checkImageExists (ctx , refstr , auth )
209+ exists , err := o .checkImageExists (ctx , refstr , auth , useRetryClient )
201210 if err != nil {
202211 return nil , status .Errorf (codes .Internal , "cannot resolve workspace image: %s" , err .Error ())
203212 }
@@ -228,8 +237,8 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
228237
229238 // resolve build request authentication
230239 reqauth := o .AuthResolver .ResolveRequestAuth (ctx , req .Auth )
231-
232- log .WithField ("forceRebuild" , req .GetForceRebuild ()).WithField ("baseImageNameResolved" , req .BaseImageNameResolved ).Info ("build request" )
240+ useRetryClient := req . GetUseRetryClient ()
241+ log .WithField ("forceRebuild" , req .GetForceRebuild ()).WithField ("baseImageNameResolved" , req .BaseImageNameResolved ).WithField ( "useRetryClient" , useRetryClient ). Info ("build request" )
233242
234243 // resolve to ref to baseImageNameResolved (if it exists)
235244 if req .BaseImageNameResolved != "" && ! req .GetForceRebuild () {
@@ -249,7 +258,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
249258 }
250259
251260 // check if needs build -> early return
252- exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth )
261+ exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth , useRetryClient )
253262 if err != nil {
254263 return status .Errorf (codes .Internal , "cannot check if image is already built: %q" , err )
255264 }
@@ -264,7 +273,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
264273 }
265274 return nil
266275 }
267- baseref , err := o .getAbsoluteImageRef (ctx , req .BaseImageNameResolved , reqauth )
276+ baseref , err := o .getAbsoluteImageRef (ctx , req .BaseImageNameResolved , reqauth , useRetryClient )
268277 if err == nil {
269278 req .Source .From = & protocol.BuildSource_Ref {
270279 Ref : & protocol.BuildSourceReference {
@@ -275,7 +284,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
275284 }
276285
277286 log .Info ("falling through to old way of building" )
278- baseref , err := o .getBaseImageRef (ctx , req .Source , reqauth )
287+ baseref , err := o .getBaseImageRef (ctx , req .Source , reqauth , useRetryClient )
279288 if _ , ok := status .FromError (err ); err != nil && ok {
280289 log .WithError (err ).Error ("gRPC status error" )
281290 return err
@@ -295,7 +304,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
295304 }
296305
297306 // check if needs build -> early return
298- exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth )
307+ exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth , req . GetUseRetryClient () )
299308 if err != nil {
300309 return status .Errorf (codes .Internal , "cannot check if image is already built: %q" , err )
301310 }
@@ -465,7 +474,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
465474 // "cannot pull from reg.gitpod.io" error message. Instead the image-build should fail properly.
466475 // To do this, we resolve the built image afterwards to ensure it was actually built.
467476 if update .Status == protocol .BuildStatus_done_success {
468- exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth )
477+ exists , err := o .checkImageExists (ctx , wsrefstr , wsrefAuth , useRetryClient )
469478 if err != nil {
470479 update .Status = protocol .BuildStatus_done_failure
471480 update .Message = fmt .Sprintf ("cannot check if workspace image exists after the build: %v" , err )
@@ -582,12 +591,12 @@ func (o *Orchestrator) ListBuilds(ctx context.Context, req *protocol.ListBuildsR
582591 return & protocol.ListBuildsResponse {Builds : res }, nil
583592}
584593
585- func (o * Orchestrator ) checkImageExists (ctx context.Context , ref string , authentication * auth.Authentication ) (exists bool , err error ) {
594+ func (o * Orchestrator ) checkImageExists (ctx context.Context , ref string , authentication * auth.Authentication , useRetryClient bool ) (exists bool , err error ) {
586595 span , ctx := opentracing .StartSpanFromContext (ctx , "checkImageExists" )
587596 defer tracing .FinishSpan (span , & err )
588597 span .SetTag ("ref" , ref )
589598
590- _ , err = o .RefResolver .Resolve (ctx , ref , resolve .WithAuthentication (authentication ))
599+ _ , err = o .RefResolver .Resolve (ctx , ref , resolve .WithAuthentication (authentication ), o . withRetryIfEnabled ( useRetryClient ) )
591600 if errors .Is (err , resolve .ErrNotFound ) {
592601 return false , nil
593602 }
@@ -602,18 +611,19 @@ func (o *Orchestrator) checkImageExists(ctx context.Context, ref string, authent
602611}
603612
604613// getAbsoluteImageRef returns the "digest" form of an image, i.e. contains no mutable image tags
605- func (o * Orchestrator ) getAbsoluteImageRef (ctx context.Context , ref string , allowedAuth auth.AllowedAuthFor ) (res string , err error ) {
606- span , ctx := opentracing .StartSpanFromContext (ctx , "getAbsoluteImageRef " )
614+ func (o * Orchestrator ) getAbsoluteImageRef (ctx context.Context , ref string , allowedAuth auth.AllowedAuthFor , useRetryClient bool ) (res string , err error ) {
615+ span , ctx := opentracing .StartSpanFromContext (ctx , "getAbsoluteImageRefWithResolver " )
607616 defer tracing .FinishSpan (span , & err )
608617 span .LogKV ("ref" , ref )
618+ span .LogKV ("useRetryClient" , useRetryClient )
609619
610- log .WithField ("ref" , ref ).Debug ("getAbsoluteImageRef " )
620+ log .WithField ("ref" , ref ).WithField ( "useRetryClient" , useRetryClient ). Debug ("getAbsoluteImageRefWithResolver " )
611621 auth , err := allowedAuth .GetAuthFor (ctx , o .Auth , ref )
612622 if err != nil {
613623 return "" , status .Errorf (codes .InvalidArgument , "cannt resolve base image ref: %v" , err )
614624 }
615625
616- ref , err = o .RefResolver .Resolve (ctx , ref , resolve .WithAuthentication (auth ))
626+ ref , err = o .RefResolver .Resolve (ctx , ref , resolve .WithAuthentication (auth ), o . withRetryIfEnabled ( useRetryClient ) )
617627 if errors .Is (err , resolve .ErrNotFound ) {
618628 return "" , status .Error (codes .NotFound , "cannot resolve image" )
619629 }
@@ -634,13 +644,20 @@ func (o *Orchestrator) getAbsoluteImageRef(ctx context.Context, ref string, allo
634644 return ref , nil
635645}
636646
637- func (o * Orchestrator ) getBaseImageRef (ctx context.Context , bs * protocol.BuildSource , allowedAuth auth.AllowedAuthFor ) (res string , err error ) {
647+ func (o * Orchestrator ) withRetryIfEnabled (useRetryClient bool ) resolve.DockerRefResolverOption {
648+ if useRetryClient {
649+ return resolve .WithHttpClient (o .retryResolveClient )
650+ }
651+ return resolve .WithHttpClient (nil )
652+ }
653+
654+ func (o * Orchestrator ) getBaseImageRef (ctx context.Context , bs * protocol.BuildSource , allowedAuth auth.AllowedAuthFor , useRetryClient bool ) (res string , err error ) {
638655 span , ctx := opentracing .StartSpanFromContext (ctx , "getBaseImageRef" )
639656 defer tracing .FinishSpan (span , & err )
640657
641658 switch src := bs .From .(type ) {
642659 case * protocol.BuildSource_Ref :
643- return o .getAbsoluteImageRef (ctx , src .Ref .Ref , allowedAuth )
660+ return o .getAbsoluteImageRef (ctx , src .Ref .Ref , allowedAuth , useRetryClient )
644661
645662 case * protocol.BuildSource_File :
646663 manifest := map [string ]string {
@@ -884,3 +901,21 @@ func (o *Orchestrator) PublishLog(buildID string, message string) {
884901 }
885902 }
886903}
904+
905+ func NewRetryTimeoutClient () * http.Client {
906+ retryClient := retryablehttp .NewClient ()
907+ retryClient .Backoff = retryablehttp .LinearJitterBackoff
908+ retryClient .HTTPClient .Timeout = 15 * time .Second
909+ retryClient .RetryMax = 3
910+ retryClient .RetryWaitMin = 500 * time .Millisecond
911+ retryClient .RetryWaitMax = 2 * time .Microsecond
912+ retryClient .CheckRetry = retryablehttp .DefaultRetryPolicy
913+ retryClient .Logger = log .WithField ("retry" , "true" )
914+
915+ // Use a custom transport to handle retries and timeouts
916+ retryClient .HTTPClient .Transport = & http.Transport {
917+ DisableKeepAlives : true , // Disable keep-alives to ensure fresh connections
918+ }
919+
920+ return retryClient .StandardClient ()
921+ }
0 commit comments