Skip to content

Commit 46d522d

Browse files
committed
cloud-node-lifecycle controller: add missing instancev2 calls for node exists and node shutdown
A cloud provider should have the option to only implement the new InstanceV2 interface. Currently the cloud nodelifecycle controller only makes instancev1 calls when it should prefer instancev2 if supported and fallback to the existing instancev1 otherwise. Signed-off-by: Andrew Sy Kim <[email protected]>
1 parent 82bdba9 commit 46d522d

File tree

3 files changed

+252
-10
lines changed

3 files changed

+252
-10
lines changed

staging/src/k8s.io/cloud-provider/controllers/node/node_controller.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ func NewCloudNodeController(
108108
klog.Infof("Sending events to api server.")
109109
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
110110

111-
if _, ok := cloud.Instances(); !ok {
111+
_, instancesSupported := cloud.Instances()
112+
_, instancesV2Supported := cloud.InstancesV2()
113+
if !instancesSupported && !instancesV2Supported {
112114
return nil, errors.New("cloud provider does not support instances")
113115
}
114116

staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller.go

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,9 @@ func NewCloudNodeLifecycleController(
8585
return nil, errors.New("no cloud provider provided")
8686
}
8787

88-
if _, ok := cloud.Instances(); !ok {
88+
_, instancesSupported := cloud.Instances()
89+
_, instancesV2Supported := cloud.InstancesV2()
90+
if !instancesSupported && !instancesV2Supported {
8991
return nil, errors.New("cloud provider does not support instances")
9092
}
9193

@@ -118,12 +120,6 @@ func (c *CloudNodeLifecycleController) Run(stopCh <-chan struct{}) {
118120
// or shutdown. If deleted, it deletes the node resource. If shutdown it
119121
// applies a shutdown taint to the node
120122
func (c *CloudNodeLifecycleController) MonitorNodes() {
121-
instances, ok := c.cloud.Instances()
122-
if !ok {
123-
utilruntime.HandleError(fmt.Errorf("failed to get instances from cloud provider"))
124-
return
125-
}
126-
127123
nodes, err := c.nodeLister.List(labels.Everything())
128124
if err != nil {
129125
klog.Errorf("error listing nodes from cache: %s", err)
@@ -148,7 +144,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
148144

149145
// At this point the node has NotReady status, we need to check if the node has been removed
150146
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node
151-
exists, err := ensureNodeExistsByProviderID(context.TODO(), instances, node)
147+
exists, err := ensureNodeExistsByProviderID(context.TODO(), c.cloud, node)
152148
if err != nil {
153149
klog.Errorf("error checking if node %s exists: %v", node.Name, err)
154150
continue
@@ -195,6 +191,10 @@ func (c *CloudNodeLifecycleController) MonitorNodes() {
195191

196192
// shutdownInCloudProvider returns true if the node is shutdown on the cloud provider
197193
func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
194+
if instanceV2, ok := cloud.InstancesV2(); ok {
195+
return instanceV2.InstanceShutdown(ctx, node)
196+
}
197+
198198
instances, ok := cloud.Instances()
199199
if !ok {
200200
return false, errors.New("cloud provider does not support instances")
@@ -210,7 +210,16 @@ func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface,
210210

211211
// ensureNodeExistsByProviderID checks if the instance exists by the provider id,
212212
// If provider id in spec is empty it calls instanceId with node name to get provider id
213-
func ensureNodeExistsByProviderID(ctx context.Context, instances cloudprovider.Instances, node *v1.Node) (bool, error) {
213+
func ensureNodeExistsByProviderID(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
214+
if instanceV2, ok := cloud.InstancesV2(); ok {
215+
return instanceV2.InstanceExists(ctx, node)
216+
}
217+
218+
instances, ok := cloud.Instances()
219+
if !ok {
220+
return false, errors.New("instances interface not supported in the cloud provider")
221+
}
222+
214223
providerID := node.Spec.ProviderID
215224
if providerID == "" {
216225
var err error

staging/src/k8s.io/cloud-provider/controllers/nodelifecycle/node_lifecycle_controller_test.go

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,237 @@ func Test_NodesDeleted(t *testing.T) {
269269
ExistsByProviderID: false,
270270
},
271271
},
272+
{
273+
name: "[instance2] node is not ready and does not exist",
274+
existingNode: &v1.Node{
275+
ObjectMeta: metav1.ObjectMeta{
276+
Name: "node0",
277+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
278+
},
279+
Status: v1.NodeStatus{
280+
Conditions: []v1.NodeCondition{
281+
{
282+
Type: v1.NodeReady,
283+
Status: v1.ConditionFalse,
284+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
285+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
286+
},
287+
},
288+
},
289+
},
290+
expectedDeleted: true,
291+
fakeCloud: &fakecloud.Cloud{
292+
EnableInstancesV2: true,
293+
ExistsByProviderID: false,
294+
},
295+
},
296+
{
297+
name: "[instancev2] node is not ready and provider returns err",
298+
existingNode: &v1.Node{
299+
ObjectMeta: metav1.ObjectMeta{
300+
Name: "node0",
301+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
302+
},
303+
Spec: v1.NodeSpec{
304+
ProviderID: "node0",
305+
},
306+
Status: v1.NodeStatus{
307+
Conditions: []v1.NodeCondition{
308+
{
309+
Type: v1.NodeReady,
310+
Status: v1.ConditionFalse,
311+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
312+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
313+
},
314+
},
315+
},
316+
},
317+
expectedNode: &v1.Node{
318+
ObjectMeta: metav1.ObjectMeta{
319+
Name: "node0",
320+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
321+
},
322+
Spec: v1.NodeSpec{
323+
ProviderID: "node0",
324+
},
325+
Status: v1.NodeStatus{
326+
Conditions: []v1.NodeCondition{
327+
{
328+
Type: v1.NodeReady,
329+
Status: v1.ConditionFalse,
330+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
331+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
332+
},
333+
},
334+
},
335+
},
336+
expectedDeleted: false,
337+
fakeCloud: &fakecloud.Cloud{
338+
EnableInstancesV2: true,
339+
ExistsByProviderID: false,
340+
ErrByProviderID: errors.New("err!"),
341+
},
342+
},
343+
{
344+
name: "[instancev2] node is not ready but still exists",
345+
existingNode: &v1.Node{
346+
ObjectMeta: metav1.ObjectMeta{
347+
Name: "node0",
348+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
349+
},
350+
Spec: v1.NodeSpec{
351+
ProviderID: "node0",
352+
},
353+
Status: v1.NodeStatus{
354+
Conditions: []v1.NodeCondition{
355+
{
356+
Type: v1.NodeReady,
357+
Status: v1.ConditionFalse,
358+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
359+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
360+
},
361+
},
362+
},
363+
},
364+
expectedNode: &v1.Node{
365+
ObjectMeta: metav1.ObjectMeta{
366+
Name: "node0",
367+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
368+
},
369+
Spec: v1.NodeSpec{
370+
ProviderID: "node0",
371+
},
372+
Status: v1.NodeStatus{
373+
Conditions: []v1.NodeCondition{
374+
{
375+
Type: v1.NodeReady,
376+
Status: v1.ConditionFalse,
377+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
378+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
379+
},
380+
},
381+
},
382+
},
383+
expectedDeleted: false,
384+
fakeCloud: &fakecloud.Cloud{
385+
EnableInstancesV2: true,
386+
ExistsByProviderID: true,
387+
},
388+
},
389+
{
390+
name: "[instancev2] node ready condition is unknown, node doesn't exist",
391+
existingNode: &v1.Node{
392+
ObjectMeta: metav1.ObjectMeta{
393+
Name: "node0",
394+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
395+
},
396+
Status: v1.NodeStatus{
397+
Conditions: []v1.NodeCondition{
398+
{
399+
Type: v1.NodeReady,
400+
Status: v1.ConditionUnknown,
401+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
402+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
403+
},
404+
},
405+
},
406+
},
407+
expectedDeleted: true,
408+
fakeCloud: &fakecloud.Cloud{
409+
EnableInstancesV2: true,
410+
ExistsByProviderID: false,
411+
},
412+
},
413+
{
414+
name: "[instancev2] node ready condition is unknown, node exists",
415+
existingNode: &v1.Node{
416+
ObjectMeta: metav1.ObjectMeta{
417+
Name: "node0",
418+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
419+
},
420+
Status: v1.NodeStatus{
421+
Conditions: []v1.NodeCondition{
422+
{
423+
Type: v1.NodeReady,
424+
Status: v1.ConditionUnknown,
425+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
426+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
427+
},
428+
},
429+
},
430+
},
431+
expectedNode: &v1.Node{
432+
ObjectMeta: metav1.ObjectMeta{
433+
Name: "node0",
434+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
435+
},
436+
Status: v1.NodeStatus{
437+
Conditions: []v1.NodeCondition{
438+
{
439+
Type: v1.NodeReady,
440+
Status: v1.ConditionUnknown,
441+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
442+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
443+
},
444+
},
445+
},
446+
},
447+
expectedDeleted: false,
448+
fakeCloud: &fakecloud.Cloud{
449+
EnableInstancesV2: true,
450+
NodeShutdown: false,
451+
ExistsByProviderID: true,
452+
ExtID: map[types.NodeName]string{
453+
types.NodeName("node0"): "foo://12345",
454+
},
455+
},
456+
},
457+
{
458+
name: "[instancev2] node is ready, but provider said it is deleted (maybe a bug in provider)",
459+
existingNode: &v1.Node{
460+
ObjectMeta: metav1.ObjectMeta{
461+
Name: "node0",
462+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
463+
},
464+
Spec: v1.NodeSpec{
465+
ProviderID: "node0",
466+
},
467+
Status: v1.NodeStatus{
468+
Conditions: []v1.NodeCondition{
469+
{
470+
Type: v1.NodeReady,
471+
Status: v1.ConditionTrue,
472+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
473+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
474+
},
475+
},
476+
},
477+
},
478+
expectedNode: &v1.Node{
479+
ObjectMeta: metav1.ObjectMeta{
480+
Name: "node0",
481+
CreationTimestamp: metav1.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
482+
},
483+
Spec: v1.NodeSpec{
484+
ProviderID: "node0",
485+
},
486+
Status: v1.NodeStatus{
487+
Conditions: []v1.NodeCondition{
488+
{
489+
Type: v1.NodeReady,
490+
Status: v1.ConditionTrue,
491+
LastHeartbeatTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
492+
LastTransitionTime: metav1.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
493+
},
494+
},
495+
},
496+
},
497+
expectedDeleted: false,
498+
fakeCloud: &fakecloud.Cloud{
499+
EnableInstancesV2: true,
500+
ExistsByProviderID: false,
501+
},
502+
},
272503
}
273504

274505
for _, testcase := range testcases {

0 commit comments

Comments
 (0)