Skip to content

Commit 3b02433

Browse files
authored
Merge pull request kubernetes#90645 from neolit123/1.19-fix-retry-etcd-member-add
kubeadm: fix flakes when performing etcd MemberAdd on slower setups
2 parents 61f0036 + 1c430ff commit 3b02433

File tree

1 file changed

+21
-12
lines changed

1 file changed

+21
-12
lines changed

cmd/kubeadm/app/util/etcd/etcd.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -351,23 +351,32 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
351351
return nil, errors.Wrapf(err, "error parsing peer address %s", peerAddrs)
352352
}
353353

354-
cli, err := clientv3.New(clientv3.Config{
355-
Endpoints: c.Endpoints,
356-
DialTimeout: dialTimeout,
357-
DialOptions: []grpc.DialOption{
358-
grpc.WithBlock(), // block until the underlying connection is up
359-
},
360-
TLS: c.TLS,
361-
})
362-
if err != nil {
363-
return nil, err
354+
// Exponential backoff for the MemberAdd operation (up to ~200 seconds)
355+
etcdBackoffAdd := wait.Backoff{
356+
Steps: 18,
357+
Duration: 100 * time.Millisecond,
358+
Factor: 1.5,
359+
Jitter: 0.1,
364360
}
365-
defer cli.Close()
366361

367362
// Adds a new member to the cluster
368363
var lastError error
369364
var resp *clientv3.MemberAddResponse
370-
err = wait.ExponentialBackoff(etcdBackoff, func() (bool, error) {
365+
err = wait.ExponentialBackoff(etcdBackoffAdd, func() (bool, error) {
366+
cli, err := clientv3.New(clientv3.Config{
367+
Endpoints: c.Endpoints,
368+
DialTimeout: etcdTimeout,
369+
DialOptions: []grpc.DialOption{
370+
grpc.WithBlock(), // block until the underlying connection is up
371+
},
372+
TLS: c.TLS,
373+
})
374+
if err != nil {
375+
lastError = err
376+
return false, nil
377+
}
378+
defer cli.Close()
379+
371380
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
372381
resp, err = cli.MemberAdd(ctx, []string{peerAddrs})
373382
cancel()

0 commit comments

Comments
 (0)