Skip to content

Commit 18fa7bd

Browse files
committed
Cloud node controller: Only call once into cloud provider
1 parent 623b697 commit 18fa7bd

File tree

1 file changed

+118
-59
lines changed

1 file changed

+118
-59
lines changed

pkg/controller/cloud/node_controller.go

Lines changed: 118 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
"fmt"
2323
"time"
2424

25-
v1 "k8s.io/api/core/v1"
25+
"k8s.io/api/core/v1"
2626
apierrors "k8s.io/apimachinery/pkg/api/errors"
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/types"
@@ -287,6 +287,10 @@ func (cnc *CloudNodeController) updateNodeAddress(ctx context.Context, node *v1.
287287
}
288288
}
289289

290+
// nodeModifier is used to carry changes to node objects across multiple attempts to update them
291+
// in a retry-if-conflict loop.
292+
type nodeModifier func(*v1.Node)
293+
290294
func (cnc *CloudNodeController) UpdateCloudNode(ctx context.Context, _, newObj interface{}) {
291295
node, ok := newObj.(*v1.Node)
292296
if !ok {
@@ -318,6 +322,7 @@ func (cnc *CloudNodeController) AddCloudNode(ctx context.Context, obj interface{
318322

319323
// This processes nodes that were added into the cluster, and cloud initialize them if appropriate
320324
func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Node) {
325+
klog.Infof("Initializing node %s with cloud provider", node.Name)
321326

322327
instances, ok := cnc.cloud.Instances()
323328
if !ok {
@@ -340,78 +345,51 @@ func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Nod
340345
return err
341346
}
342347
}
348+
return nil
349+
})
350+
if err != nil {
351+
utilruntime.HandleError(err)
352+
return
353+
}
343354

344-
curNode, err := cnc.kubeClient.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
345-
if err != nil {
346-
return err
347-
}
348-
349-
cloudTaint := getCloudTaint(curNode.Spec.Taints)
350-
if cloudTaint == nil {
351-
// Node object received from event had the cloud taint but was outdated,
352-
// the node has actually already been initialized.
353-
return nil
354-
}
355+
curNode, err := cnc.kubeClient.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
356+
if err != nil {
357+
utilruntime.HandleError(fmt.Errorf("failed to get node %s: %v", node.Name, err))
358+
return
359+
}
355360

356-
if curNode.Spec.ProviderID == "" {
357-
providerID, err := cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(curNode.Name))
358-
if err == nil {
359-
curNode.Spec.ProviderID = providerID
360-
} else {
361-
// we should attempt to set providerID on curNode, but
362-
// we can continue if we fail since we will attempt to set
363-
// node addresses given the node name in getNodeAddressesByProviderIDOrName
364-
klog.Errorf("failed to set node provider id: %v", err)
365-
}
366-
}
361+
cloudTaint := getCloudTaint(curNode.Spec.Taints)
362+
if cloudTaint == nil {
363+
// Node object received from event had the cloud taint but was outdated,
364+
// the node has actually already been initialized.
365+
return
366+
}
367367

368-
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, curNode)
369-
if err != nil {
370-
return err
371-
}
368+
nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, curNode, instances)
369+
if err != nil {
370+
utilruntime.HandleError(fmt.Errorf("failed to initialize node %s at cloudprovider: %v", node.Name, err))
371+
return
372+
}
372373

373-
// If user provided an IP address, ensure that IP address is found
374-
// in the cloud provider before removing the taint on the node
375-
if nodeIP, ok := ensureNodeProvidedIPExists(curNode, nodeAddresses); ok {
376-
if nodeIP == nil {
377-
return errors.New("failed to find kubelet node IP from cloud provider")
378-
}
379-
}
374+
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
375+
n.Spec.Taints = excludeCloudTaint(n.Spec.Taints)
376+
})
380377

381-
if instanceType, err := getInstanceTypeByProviderIDOrName(ctx, instances, curNode); err != nil {
378+
err = clientretry.RetryOnConflict(UpdateNodeSpecBackoff, func() error {
379+
curNode, err := cnc.kubeClient.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
380+
if err != nil {
382381
return err
383-
} else if instanceType != "" {
384-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceType, instanceType)
385-
curNode.ObjectMeta.Labels[v1.LabelInstanceType] = instanceType
386-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceTypeStable, instanceType)
387-
curNode.ObjectMeta.Labels[v1.LabelInstanceTypeStable] = instanceType
388382
}
389383

390-
if zones, ok := cnc.cloud.Zones(); ok {
391-
zone, err := getZoneByProviderIDOrName(ctx, zones, curNode)
392-
if err != nil {
393-
return fmt.Errorf("failed to get zone from cloud provider: %v", err)
394-
}
395-
if zone.FailureDomain != "" {
396-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomain, zone.FailureDomain)
397-
curNode.ObjectMeta.Labels[v1.LabelZoneFailureDomain] = zone.FailureDomain
398-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomainStable, zone.FailureDomain)
399-
curNode.ObjectMeta.Labels[v1.LabelZoneFailureDomainStable] = zone.FailureDomain
400-
}
401-
if zone.Region != "" {
402-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegion, zone.Region)
403-
curNode.ObjectMeta.Labels[v1.LabelZoneRegion] = zone.Region
404-
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegionStable, zone.Region)
405-
curNode.ObjectMeta.Labels[v1.LabelZoneRegionStable] = zone.Region
406-
}
384+
for _, modify := range nodeModifiers {
385+
modify(curNode)
407386
}
408387

409-
curNode.Spec.Taints = excludeCloudTaint(curNode.Spec.Taints)
410-
411388
_, err = cnc.kubeClient.CoreV1().Nodes().Update(curNode)
412389
if err != nil {
413390
return err
414391
}
392+
415393
// After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses
416394
// So that users do not see any significant delay in IP addresses being filled into the node
417395
cnc.updateNodeAddress(ctx, curNode, instances)
@@ -425,6 +403,87 @@ func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Nod
425403
}
426404
}
427405

406+
// getNodeModifiersFromCloudProvider returns a slice of nodeModifiers that update
407+
// a node object with provider-specific information.
408+
// All of the returned functions are idempotent, because they are used in a retry-if-conflict
409+
// loop, meaning they could get called multiple times.
410+
func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(ctx context.Context, node *v1.Node, instances cloudprovider.Instances) ([]nodeModifier, error) {
411+
var nodeModifiers []nodeModifier
412+
413+
if node.Spec.ProviderID == "" {
414+
providerID, err := cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
415+
if err == nil {
416+
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
417+
if n.Spec.ProviderID == "" {
418+
n.Spec.ProviderID = providerID
419+
}
420+
})
421+
} else {
422+
// we should attempt to set providerID on node, but
423+
// we can continue if we fail since we will attempt to set
424+
// node addresses given the node name in getNodeAddressesByProviderIDOrName
425+
klog.Errorf("failed to set node provider id: %v", err)
426+
}
427+
}
428+
429+
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, node)
430+
if err != nil {
431+
return nil, err
432+
}
433+
434+
// If user provided an IP address, ensure that IP address is found
435+
// in the cloud provider before removing the taint on the node
436+
if nodeIP, ok := ensureNodeProvidedIPExists(node, nodeAddresses); ok {
437+
if nodeIP == nil {
438+
return nil, errors.New("failed to find kubelet node IP from cloud provider")
439+
}
440+
}
441+
442+
if instanceType, err := getInstanceTypeByProviderIDOrName(ctx, instances, node); err != nil {
443+
return nil, err
444+
} else if instanceType != "" {
445+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceType, instanceType)
446+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceTypeStable, instanceType)
447+
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
448+
if n.Labels == nil {
449+
n.Labels = map[string]string{}
450+
}
451+
n.Labels[v1.LabelInstanceType] = instanceType
452+
n.Labels[v1.LabelInstanceTypeStable] = instanceType
453+
})
454+
}
455+
456+
if zones, ok := cnc.cloud.Zones(); ok {
457+
zone, err := getZoneByProviderIDOrName(ctx, zones, node)
458+
if err != nil {
459+
return nil, fmt.Errorf("failed to get zone from cloud provider: %v", err)
460+
}
461+
if zone.FailureDomain != "" {
462+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomain, zone.FailureDomain)
463+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomainStable, zone.FailureDomain)
464+
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
465+
if n.Labels == nil {
466+
n.Labels = map[string]string{}
467+
}
468+
n.Labels[v1.LabelZoneFailureDomain] = zone.FailureDomain
469+
n.Labels[v1.LabelZoneFailureDomainStable] = zone.FailureDomain
470+
})
471+
}
472+
if zone.Region != "" {
473+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegion, zone.Region)
474+
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegionStable, zone.Region)
475+
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
476+
if n.Labels == nil {
477+
n.Labels = map[string]string{}
478+
}
479+
n.Labels[v1.LabelZoneRegion] = zone.Region
480+
n.Labels[v1.LabelZoneRegionStable] = zone.Region
481+
})
482+
}
483+
}
484+
return nodeModifiers, nil
485+
}
486+
428487
func getCloudTaint(taints []v1.Taint) *v1.Taint {
429488
for _, taint := range taints {
430489
if taint.Key == schedulerapi.TaintExternalCloudProvider {

0 commit comments

Comments
 (0)