@@ -22,7 +22,7 @@ import (
22
22
"fmt"
23
23
"time"
24
24
25
- v1 "k8s.io/api/core/v1"
25
+ "k8s.io/api/core/v1"
26
26
apierrors "k8s.io/apimachinery/pkg/api/errors"
27
27
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28
28
"k8s.io/apimachinery/pkg/types"
@@ -287,6 +287,10 @@ func (cnc *CloudNodeController) updateNodeAddress(ctx context.Context, node *v1.
287
287
}
288
288
}
289
289
290
+ // nodeModifier is used to carry changes to node objects across multiple attempts to update them
291
+ // in a retry-if-conflict loop.
292
+ type nodeModifier func (* v1.Node )
293
+
290
294
func (cnc * CloudNodeController ) UpdateCloudNode (ctx context.Context , _ , newObj interface {}) {
291
295
node , ok := newObj .(* v1.Node )
292
296
if ! ok {
@@ -318,6 +322,7 @@ func (cnc *CloudNodeController) AddCloudNode(ctx context.Context, obj interface{
318
322
319
323
// This processes nodes that were added into the cluster, and cloud initialize them if appropriate
320
324
func (cnc * CloudNodeController ) initializeNode (ctx context.Context , node * v1.Node ) {
325
+ klog .Infof ("Initializing node %s with cloud provider" , node .Name )
321
326
322
327
instances , ok := cnc .cloud .Instances ()
323
328
if ! ok {
@@ -340,78 +345,51 @@ func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Nod
340
345
return err
341
346
}
342
347
}
348
+ return nil
349
+ })
350
+ if err != nil {
351
+ utilruntime .HandleError (err )
352
+ return
353
+ }
343
354
344
- curNode , err := cnc .kubeClient .CoreV1 ().Nodes ().Get (node .Name , metav1.GetOptions {})
345
- if err != nil {
346
- return err
347
- }
348
-
349
- cloudTaint := getCloudTaint (curNode .Spec .Taints )
350
- if cloudTaint == nil {
351
- // Node object received from event had the cloud taint but was outdated,
352
- // the node has actually already been initialized.
353
- return nil
354
- }
355
+ curNode , err := cnc .kubeClient .CoreV1 ().Nodes ().Get (node .Name , metav1.GetOptions {})
356
+ if err != nil {
357
+ utilruntime .HandleError (fmt .Errorf ("failed to get node %s: %v" , node .Name , err ))
358
+ return
359
+ }
355
360
356
- if curNode .Spec .ProviderID == "" {
357
- providerID , err := cloudprovider .GetInstanceProviderID (ctx , cnc .cloud , types .NodeName (curNode .Name ))
358
- if err == nil {
359
- curNode .Spec .ProviderID = providerID
360
- } else {
361
- // we should attempt to set providerID on curNode, but
362
- // we can continue if we fail since we will attempt to set
363
- // node addresses given the node name in getNodeAddressesByProviderIDOrName
364
- klog .Errorf ("failed to set node provider id: %v" , err )
365
- }
366
- }
361
+ cloudTaint := getCloudTaint (curNode .Spec .Taints )
362
+ if cloudTaint == nil {
363
+ // Node object received from event had the cloud taint but was outdated,
364
+ // the node has actually already been initialized.
365
+ return
366
+ }
367
367
368
- nodeAddresses , err := getNodeAddressesByProviderIDOrName (ctx , instances , curNode )
369
- if err != nil {
370
- return err
371
- }
368
+ nodeModifiers , err := cnc .getNodeModifiersFromCloudProvider (ctx , curNode , instances )
369
+ if err != nil {
370
+ utilruntime .HandleError (fmt .Errorf ("failed to initialize node %s at cloudprovider: %v" , node .Name , err ))
371
+ return
372
+ }
372
373
373
- // If user provided an IP address, ensure that IP address is found
374
- // in the cloud provider before removing the taint on the node
375
- if nodeIP , ok := ensureNodeProvidedIPExists (curNode , nodeAddresses ); ok {
376
- if nodeIP == nil {
377
- return errors .New ("failed to find kubelet node IP from cloud provider" )
378
- }
379
- }
374
+ nodeModifiers = append (nodeModifiers , func (n * v1.Node ) {
375
+ n .Spec .Taints = excludeCloudTaint (n .Spec .Taints )
376
+ })
380
377
381
- if instanceType , err := getInstanceTypeByProviderIDOrName (ctx , instances , curNode ); err != nil {
378
+ err = clientretry .RetryOnConflict (UpdateNodeSpecBackoff , func () error {
379
+ curNode , err := cnc .kubeClient .CoreV1 ().Nodes ().Get (node .Name , metav1.GetOptions {})
380
+ if err != nil {
382
381
return err
383
- } else if instanceType != "" {
384
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelInstanceType , instanceType )
385
- curNode .ObjectMeta .Labels [v1 .LabelInstanceType ] = instanceType
386
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelInstanceTypeStable , instanceType )
387
- curNode .ObjectMeta .Labels [v1 .LabelInstanceTypeStable ] = instanceType
388
382
}
389
383
390
- if zones , ok := cnc .cloud .Zones (); ok {
391
- zone , err := getZoneByProviderIDOrName (ctx , zones , curNode )
392
- if err != nil {
393
- return fmt .Errorf ("failed to get zone from cloud provider: %v" , err )
394
- }
395
- if zone .FailureDomain != "" {
396
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneFailureDomain , zone .FailureDomain )
397
- curNode .ObjectMeta .Labels [v1 .LabelZoneFailureDomain ] = zone .FailureDomain
398
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneFailureDomainStable , zone .FailureDomain )
399
- curNode .ObjectMeta .Labels [v1 .LabelZoneFailureDomainStable ] = zone .FailureDomain
400
- }
401
- if zone .Region != "" {
402
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneRegion , zone .Region )
403
- curNode .ObjectMeta .Labels [v1 .LabelZoneRegion ] = zone .Region
404
- klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneRegionStable , zone .Region )
405
- curNode .ObjectMeta .Labels [v1 .LabelZoneRegionStable ] = zone .Region
406
- }
384
+ for _ , modify := range nodeModifiers {
385
+ modify (curNode )
407
386
}
408
387
409
- curNode .Spec .Taints = excludeCloudTaint (curNode .Spec .Taints )
410
-
411
388
_ , err = cnc .kubeClient .CoreV1 ().Nodes ().Update (curNode )
412
389
if err != nil {
413
390
return err
414
391
}
392
+
415
393
// After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses
416
394
// So that users do not see any significant delay in IP addresses being filled into the node
417
395
cnc .updateNodeAddress (ctx , curNode , instances )
@@ -425,6 +403,87 @@ func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Nod
425
403
}
426
404
}
427
405
406
+ // getNodeModifiersFromCloudProvider returns a slice of nodeModifiers that update
407
+ // a node object with provider-specific information.
408
+ // All of the returned functions are idempotent, because they are used in a retry-if-conflict
409
+ // loop, meaning they could get called multiple times.
410
+ func (cnc * CloudNodeController ) getNodeModifiersFromCloudProvider (ctx context.Context , node * v1.Node , instances cloudprovider.Instances ) ([]nodeModifier , error ) {
411
+ var nodeModifiers []nodeModifier
412
+
413
+ if node .Spec .ProviderID == "" {
414
+ providerID , err := cloudprovider .GetInstanceProviderID (ctx , cnc .cloud , types .NodeName (node .Name ))
415
+ if err == nil {
416
+ nodeModifiers = append (nodeModifiers , func (n * v1.Node ) {
417
+ if n .Spec .ProviderID == "" {
418
+ n .Spec .ProviderID = providerID
419
+ }
420
+ })
421
+ } else {
422
+ // we should attempt to set providerID on node, but
423
+ // we can continue if we fail since we will attempt to set
424
+ // node addresses given the node name in getNodeAddressesByProviderIDOrName
425
+ klog .Errorf ("failed to set node provider id: %v" , err )
426
+ }
427
+ }
428
+
429
+ nodeAddresses , err := getNodeAddressesByProviderIDOrName (ctx , instances , node )
430
+ if err != nil {
431
+ return nil , err
432
+ }
433
+
434
+ // If user provided an IP address, ensure that IP address is found
435
+ // in the cloud provider before removing the taint on the node
436
+ if nodeIP , ok := ensureNodeProvidedIPExists (node , nodeAddresses ); ok {
437
+ if nodeIP == nil {
438
+ return nil , errors .New ("failed to find kubelet node IP from cloud provider" )
439
+ }
440
+ }
441
+
442
+ if instanceType , err := getInstanceTypeByProviderIDOrName (ctx , instances , node ); err != nil {
443
+ return nil , err
444
+ } else if instanceType != "" {
445
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelInstanceType , instanceType )
446
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelInstanceTypeStable , instanceType )
447
+ nodeModifiers = append (nodeModifiers , func (n * v1.Node ) {
448
+ if n .Labels == nil {
449
+ n .Labels = map [string ]string {}
450
+ }
451
+ n .Labels [v1 .LabelInstanceType ] = instanceType
452
+ n .Labels [v1 .LabelInstanceTypeStable ] = instanceType
453
+ })
454
+ }
455
+
456
+ if zones , ok := cnc .cloud .Zones (); ok {
457
+ zone , err := getZoneByProviderIDOrName (ctx , zones , node )
458
+ if err != nil {
459
+ return nil , fmt .Errorf ("failed to get zone from cloud provider: %v" , err )
460
+ }
461
+ if zone .FailureDomain != "" {
462
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneFailureDomain , zone .FailureDomain )
463
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneFailureDomainStable , zone .FailureDomain )
464
+ nodeModifiers = append (nodeModifiers , func (n * v1.Node ) {
465
+ if n .Labels == nil {
466
+ n .Labels = map [string ]string {}
467
+ }
468
+ n .Labels [v1 .LabelZoneFailureDomain ] = zone .FailureDomain
469
+ n .Labels [v1 .LabelZoneFailureDomainStable ] = zone .FailureDomain
470
+ })
471
+ }
472
+ if zone .Region != "" {
473
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneRegion , zone .Region )
474
+ klog .V (2 ).Infof ("Adding node label from cloud provider: %s=%s" , v1 .LabelZoneRegionStable , zone .Region )
475
+ nodeModifiers = append (nodeModifiers , func (n * v1.Node ) {
476
+ if n .Labels == nil {
477
+ n .Labels = map [string ]string {}
478
+ }
479
+ n .Labels [v1 .LabelZoneRegion ] = zone .Region
480
+ n .Labels [v1 .LabelZoneRegionStable ] = zone .Region
481
+ })
482
+ }
483
+ }
484
+ return nodeModifiers , nil
485
+ }
486
+
428
487
func getCloudTaint (taints []v1.Taint ) * v1.Taint {
429
488
for _ , taint := range taints {
430
489
if taint .Key == schedulerapi .TaintExternalCloudProvider {
0 commit comments