@@ -38,10 +38,11 @@ import (
38
38
"k8s.io/kubernetes/cmd/kubeadm/app/util/config"
39
39
)
40
40
41
- // Exponential backoff for MemberAdd/Remove (values exclude jitter):
42
- // 0, 50, 150, 350, 750, 1550, 3150, 6350, 12750 ms
43
- var addRemoveBackoff = wait.Backoff {
44
- Steps : 8 ,
41
+ const etcdTimeout = 2 * time .Second
42
+
43
+ // Exponential backoff for etcd operations
44
+ var etcdBackoff = wait.Backoff {
45
+ Steps : 9 ,
45
46
Duration : 50 * time .Millisecond ,
46
47
Factor : 2.0 ,
47
48
Jitter : 0.1 ,
@@ -146,11 +147,21 @@ func (c *Client) Sync() error {
146
147
}
147
148
defer cli .Close ()
148
149
149
- ctx , cancel := context .WithTimeout (context .Background (), 10 * time .Second )
150
- err = cli .Sync (ctx )
151
- cancel ()
150
+ // Syncs the list of endpoints
151
+ var lastError error
152
+ err = wait .ExponentialBackoff (etcdBackoff , func () (bool , error ) {
153
+ ctx , cancel := context .WithTimeout (context .Background (), etcdTimeout )
154
+ err = cli .Sync (ctx )
155
+ cancel ()
156
+ if err == nil {
157
+ return true , nil
158
+ }
159
+ klog .V (5 ).Infof ("Failed to sync etcd endpoints: %v" , err )
160
+ lastError = err
161
+ return false , nil
162
+ })
152
163
if err != nil {
153
- return err
164
+ return lastError
154
165
}
155
166
klog .V (1 ).Infof ("etcd endpoints read from etcd: %s" , strings .Join (cli .Endpoints (), "," ))
156
167
@@ -180,11 +191,22 @@ func (c *Client) GetMemberID(peerURL string) (uint64, error) {
180
191
}
181
192
defer cli .Close ()
182
193
183
- ctx , cancel := context .WithTimeout (context .Background (), 10 * time .Second )
184
- resp , err := cli .MemberList (ctx )
185
- cancel ()
194
+ // Gets the member list
195
+ var lastError error
196
+ var resp * clientv3.MemberListResponse
197
+ err = wait .ExponentialBackoff (etcdBackoff , func () (bool , error ) {
198
+ ctx , cancel := context .WithTimeout (context .Background (), etcdTimeout )
199
+ resp , err = cli .MemberList (ctx )
200
+ cancel ()
201
+ if err == nil {
202
+ return true , nil
203
+ }
204
+ klog .V (5 ).Infof ("Failed to get etcd member list: %v" , err )
205
+ lastError = err
206
+ return false , nil
207
+ })
186
208
if err != nil {
187
- return 0 , err
209
+ return 0 , lastError
188
210
}
189
211
190
212
for _ , member := range resp .Members {
@@ -213,11 +235,14 @@ func (c *Client) RemoveMember(id uint64) ([]Member, error) {
213
235
// Remove an existing member from the cluster
214
236
var lastError error
215
237
var resp * clientv3.MemberRemoveResponse
216
- err = wait .ExponentialBackoff (addRemoveBackoff , func () (bool , error ) {
217
- resp , err = cli .MemberRemove (context .Background (), id )
238
+ err = wait .ExponentialBackoff (etcdBackoff , func () (bool , error ) {
239
+ ctx , cancel := context .WithTimeout (context .Background (), etcdTimeout )
240
+ resp , err = cli .MemberRemove (ctx , id )
241
+ cancel ()
218
242
if err == nil {
219
243
return true , nil
220
244
}
245
+ klog .V (5 ).Infof ("Failed to remove etcd member: %v" , err )
221
246
lastError = err
222
247
return false , nil
223
248
})
@@ -260,11 +285,14 @@ func (c *Client) AddMember(name string, peerAddrs string) ([]Member, error) {
260
285
// Adds a new member to the cluster
261
286
var lastError error
262
287
var resp * clientv3.MemberAddResponse
263
- err = wait .ExponentialBackoff (addRemoveBackoff , func () (bool , error ) {
264
- resp , err = cli .MemberAdd (context .Background (), []string {peerAddrs })
288
+ err = wait .ExponentialBackoff (etcdBackoff , func () (bool , error ) {
289
+ ctx , cancel := context .WithTimeout (context .Background (), etcdTimeout )
290
+ resp , err = cli .MemberAdd (ctx , []string {peerAddrs })
291
+ cancel ()
265
292
if err == nil {
266
293
return true , nil
267
294
}
295
+ klog .V (5 ).Infof ("Failed to add etcd member: %v" , err )
268
296
lastError = err
269
297
return false , nil
270
298
})
@@ -347,12 +375,24 @@ func (c *Client) getClusterStatus() (map[string]*clientv3.StatusResponse, error)
347
375
348
376
clusterStatus := make (map [string ]* clientv3.StatusResponse )
349
377
for _ , ep := range c .Endpoints {
350
- ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Second )
351
- resp , err := cli .Status (ctx , ep )
352
- cancel ()
378
+ // Gets the member status
379
+ var lastError error
380
+ var resp * clientv3.StatusResponse
381
+ err = wait .ExponentialBackoff (etcdBackoff , func () (bool , error ) {
382
+ ctx , cancel := context .WithTimeout (context .Background (), etcdTimeout )
383
+ resp , err = cli .Status (ctx , ep )
384
+ cancel ()
385
+ if err == nil {
386
+ return true , nil
387
+ }
388
+ klog .V (5 ).Infof ("Failed to get etcd status for %s: %v" , ep , err )
389
+ lastError = err
390
+ return false , nil
391
+ })
353
392
if err != nil {
354
- return nil , err
393
+ return nil , lastError
355
394
}
395
+
356
396
clusterStatus [ep ] = resp
357
397
}
358
398
return clusterStatus , nil
0 commit comments