88 "net"
99 "net/http"
1010 "net/http/httptrace"
11+ "net/url"
1112 "os"
13+ "regexp"
1214 "time"
1315
1416 "github.com/apache/thrift/lib/go/thrift"
@@ -175,7 +177,11 @@ func (tsc *ThriftServiceClient) CancelOperation(ctx context.Context, req *cli_se
175177// InitThriftClient is a wrapper of the http transport, so we can have access to response code and headers.
176178// It is important to know the code and headers to know if we need to retry or not
177179func InitThriftClient (cfg * config.Config , httpclient * http.Client ) (* ThriftServiceClient , error ) {
178- endpoint := cfg .ToEndpointURL ()
180+ var err error
181+ endpoint , err := cfg .ToEndpointURL ()
182+ if err != nil {
183+ return nil , err
184+ }
179185 tcfg := & thrift.TConfiguration {
180186 TLSConfig : cfg .TLSConfig ,
181187 }
@@ -200,7 +206,6 @@ func InitThriftClient(cfg *config.Config, httpclient *http.Client) (*ThriftServi
200206 }
201207
202208 var tTrans thrift.TTransport
203- var err error
204209
205210 switch cfg .ThriftTransport {
206211 case "http" :
@@ -270,6 +275,8 @@ func SprintGuid(bts []byte) string {
270275 return fmt .Sprintf ("%x" , bts )
271276}
272277
278+ var retryableStatusCode = []int {http .StatusTooManyRequests , http .StatusServiceUnavailable }
279+
273280type Transport struct {
274281 Base * http.Transport
275282 Authr auth.Authenticator
@@ -309,6 +316,27 @@ func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
309316 if err != nil {
310317 return nil , err
311318 }
319+ if resp .StatusCode != http .StatusOK {
320+ reason := resp .Header .Get ("X-Databricks-Reason-Phrase" )
321+ terrmsg := resp .Header .Get ("X-Thriftserver-Error-Message" )
322+ for _ , c := range retryableStatusCode {
323+ if c == resp .StatusCode {
324+ if terrmsg != "" {
325+ logger .Warn ().Msg (terrmsg )
326+ }
327+ return resp , nil
328+ }
329+ }
330+ if reason != "" {
331+ logger .Err (fmt .Errorf (reason )).Msg ("non retryable error" )
332+ return nil , errors .New (reason )
333+ }
334+ if terrmsg != "" {
335+ logger .Err (fmt .Errorf (terrmsg )).Msg ("non retryable error" )
336+ return nil , errors .New (terrmsg )
337+ }
338+ return nil , errors .New (resp .Status )
339+ }
312340
313341 return resp , nil
314342}
@@ -322,7 +350,7 @@ func RetryableClient(cfg *config.Config) *http.Client {
322350 RetryWaitMax : cfg .RetryWaitMax ,
323351 RetryMax : cfg .RetryMax ,
324352 ErrorHandler : errorHandler ,
325- CheckRetry : retryablehttp . DefaultRetryPolicy ,
353+ CheckRetry : RetryPolicy ,
326354 Backoff : retryablehttp .DefaultBackoff ,
327355 }
328356 return retryableClient .StandardClient ()
@@ -412,3 +440,34 @@ func errorHandler(resp *http.Response, err error, numTries int) (*http.Response,
412440
413441 return resp , werr
414442}
443+
444+ func RetryPolicy (ctx context.Context , resp * http.Response , err error ) (bool , error ) {
445+ var lostConn = regexp .MustCompile (`EOF` )
446+
447+ // do not retry on context.Canceled or context.DeadlineExceeded
448+ if ctx .Err () != nil {
449+ return false , ctx .Err ()
450+ }
451+
452+ if err != nil {
453+ if v , ok := err .(* url.Error ); ok {
454+ if lostConn .MatchString (v .Error ()) {
455+ return true , v
456+ }
457+ }
458+ return false , nil
459+ }
460+
461+ // 429 Too Many Requests or 503 service unavailable is recoverable. Sometimes the server puts
462+ // a Retry-After response header to indicate when the server is
463+ // available to start processing request from client.
464+
465+ for _ , c := range retryableStatusCode {
466+ if c == resp .StatusCode {
467+ return true , nil
468+ }
469+ }
470+
471+ return false , nil
472+
473+ }
0 commit comments