Skip to content

Commit 8881b03

Browse files
authored
Bugfix: Drain machines with only a valid NodeName (#480)
* Bugfix: Drain machines with only a valid NodeName * Extend fix to OOT provider * Fixed unit test * Made suggested change
1 parent fba8a1b commit 8881b03

File tree

5 files changed

+186
-12
lines changed

5 files changed

+186
-12
lines changed

pkg/controller/machine.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,9 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
599599
}
600600
}
601601

602-
if machineID != "" {
602+
if machineID != "" && nodeName != "" {
603+
// Begin drain logic only when the nodeName & providerID exist's for the machine
604+
603605
var (
604606
forceDeletePods = false
605607
forceDeleteMachine = false

pkg/controller/machine_test.go

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ var _ = Describe("machine", func() {
822822
machine *machinev1.Machine
823823
errOccurred bool
824824
machineDeleted bool
825+
nodeDeleted bool
825826
}
826827
type data struct {
827828
setup setup
@@ -948,11 +949,14 @@ var _ = Describe("machine", func() {
948949

949950
if data.expect.machineDeleted {
950951
Expect(machineErr).To(HaveOccurred())
951-
Expect(nodeErr).To(HaveOccurred())
952952
} else {
953953
Expect(machineErr).ToNot(HaveOccurred())
954954
Expect(machine).ToNot(BeNil())
955+
}
955956

957+
if data.expect.nodeDeleted {
958+
Expect(nodeErr).To(HaveOccurred())
959+
} else {
956960
Expect(nodeErr).ToNot(HaveOccurred())
957961
Expect(node).ToNot(BeNil())
958962
}
@@ -991,6 +995,7 @@ var _ = Describe("machine", func() {
991995
expect: expect{
992996
errOccurred: false,
993997
machineDeleted: true,
998+
nodeDeleted: true,
994999
},
9951000
}),
9961001
Entry("Allow proper deletion of the machine object when providerID is missing but actual VM still exists in cloud", &data{
@@ -1030,6 +1035,7 @@ var _ = Describe("machine", func() {
10301035
expect: expect{
10311036
errOccurred: false,
10321037
machineDeleted: true,
1038+
nodeDeleted: true,
10331039
},
10341040
}),
10351041
Entry("Machine deletion when drain fails", &data{
@@ -1071,6 +1077,7 @@ var _ = Describe("machine", func() {
10711077
expect: expect{
10721078
errOccurred: true,
10731079
machineDeleted: false,
1080+
nodeDeleted: false,
10741081
},
10751082
}),
10761083
Entry("Machine force deletion label is present", &data{
@@ -1108,6 +1115,7 @@ var _ = Describe("machine", func() {
11081115
expect: expect{
11091116
errOccurred: false,
11101117
machineDeleted: true,
1118+
nodeDeleted: true,
11111119
},
11121120
}),
11131121
Entry("Machine force deletion label is present and when drain call fails (APIServer call fails)", &data{
@@ -1150,6 +1158,7 @@ var _ = Describe("machine", func() {
11501158
expect: expect{
11511159
errOccurred: false,
11521160
machineDeleted: true,
1161+
nodeDeleted: true,
11531162
},
11541163
}),
11551164
Entry("Machine deletion when timeout occurred", &data{
@@ -1201,6 +1210,7 @@ var _ = Describe("machine", func() {
12011210
expect: expect{
12021211
errOccurred: false,
12031212
machineDeleted: true,
1213+
nodeDeleted: true,
12041214
},
12051215
}),
12061216
Entry("Machine deletion when last drain failed", &data{
@@ -1251,6 +1261,7 @@ var _ = Describe("machine", func() {
12511261
expect: expect{
12521262
errOccurred: false,
12531263
machineDeleted: true,
1264+
nodeDeleted: true,
12541265
},
12551266
}),
12561267
Entry("Machine deletion when last drain failed and current drain call also fails (APIServer call fails)", &data{
@@ -1306,6 +1317,7 @@ var _ = Describe("machine", func() {
13061317
expect: expect{
13071318
errOccurred: true,
13081319
machineDeleted: false,
1320+
nodeDeleted: false,
13091321
},
13101322
}),
13111323
Entry("Machine force deletion if underlying Node is NotReady for a long time", &data{
@@ -1343,6 +1355,7 @@ var _ = Describe("machine", func() {
13431355
expect: expect{
13441356
errOccurred: false,
13451357
machineDeleted: true,
1358+
nodeDeleted: true,
13461359
},
13471360
}),
13481361
Entry("Machine do not force deletion if underlying Node is NotReady for a small period of time, a Machine deletion fails, since kubelet fails to evict Pods", &data{
@@ -1385,6 +1398,44 @@ var _ = Describe("machine", func() {
13851398
expect: expect{
13861399
errOccurred: true,
13871400
machineDeleted: false,
1401+
nodeDeleted: false,
1402+
},
1403+
}),
1404+
Entry("Allow machine object deletion where nodeName doesn't exist", &data{
1405+
setup: setup{
1406+
secrets: []*corev1.Secret{
1407+
{
1408+
ObjectMeta: *newObjectMeta(objMeta, 0),
1409+
},
1410+
},
1411+
aws: []*machinev1.AWSMachineClass{
1412+
{
1413+
ObjectMeta: *newObjectMeta(objMeta, 0),
1414+
Spec: machinev1.AWSMachineClassSpec{
1415+
SecretRef: newSecretReference(objMeta, 0),
1416+
},
1417+
},
1418+
},
1419+
machines: newMachines(1, &machinev1.MachineTemplateSpec{
1420+
ObjectMeta: *newObjectMeta(objMeta, 0),
1421+
Spec: machinev1.MachineSpec{
1422+
Class: machinev1.ClassSpec{
1423+
Kind: "AWSMachineClass",
1424+
Name: "machine-0",
1425+
},
1426+
},
1427+
}, nil, nil, nil, nil),
1428+
},
1429+
action: action{
1430+
machine: "machine-0",
1431+
fakeProviderID: "fakeID-0",
1432+
fakeNodeName: "", //NodeName is set to emptyString
1433+
fakeError: nil,
1434+
},
1435+
expect: expect{
1436+
errOccurred: false,
1437+
machineDeleted: true,
1438+
nodeDeleted: false,
13881439
},
13891440
}),
13901441
)

pkg/util/provider/machinecontroller/controller_suite_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ import (
4545
"k8s.io/utils/pointer"
4646
)
4747

48-
func TestMachineControllerManagerSuite(t *testing.T) {
48+
func TestMachineControllerSuite(t *testing.T) {
4949
RegisterFailHandler(Fail)
50-
RunSpecs(t, "Machine Controller Manager Suite")
50+
RunSpecs(t, "Machine Controller Suite")
5151
}
5252

5353
var (

pkg/util/provider/machinecontroller/machine_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,108 @@ var _ = Describe("machine", func() {
12261226
),
12271227
},
12281228
}),
1229+
Entry("Drain skipping as nodeName is not valid", &data{
1230+
setup: setup{
1231+
secrets: []*corev1.Secret{
1232+
{
1233+
ObjectMeta: *newObjectMeta(objMeta, 0),
1234+
},
1235+
},
1236+
machineClasses: []*v1alpha1.MachineClass{
1237+
{
1238+
ObjectMeta: *newObjectMeta(objMeta, 0),
1239+
SecretRef: newSecretReference(objMeta, 0),
1240+
},
1241+
},
1242+
machines: newMachines(
1243+
1,
1244+
&v1alpha1.MachineTemplateSpec{
1245+
ObjectMeta: *newObjectMeta(objMeta, 0),
1246+
Spec: v1alpha1.MachineSpec{
1247+
Class: v1alpha1.ClassSpec{
1248+
Kind: "MachineClass",
1249+
Name: "machine-0",
1250+
},
1251+
ProviderID: "fakeID",
1252+
},
1253+
},
1254+
&v1alpha1.MachineStatus{
1255+
Node: "fakeNode",
1256+
CurrentStatus: v1alpha1.CurrentStatus{
1257+
Phase: v1alpha1.MachineTerminating,
1258+
LastUpdateTime: metav1.Now(),
1259+
},
1260+
LastOperation: v1alpha1.LastOperation{
1261+
Description: machineutils.InitiateDrain,
1262+
State: v1alpha1.MachineStateProcessing,
1263+
Type: v1alpha1.MachineOperationDelete,
1264+
LastUpdateTime: metav1.Now(),
1265+
},
1266+
Conditions: []corev1.NodeCondition{
1267+
{
1268+
Type: corev1.NodeReady,
1269+
Status: corev1.ConditionUnknown,
1270+
LastTransitionTime: metav1.NewTime(time.Now().Add(-6 * time.Minute)),
1271+
},
1272+
},
1273+
},
1274+
nil,
1275+
map[string]string{
1276+
machineutils.MachinePriority: "3",
1277+
},
1278+
map[string]string{
1279+
"node": "",
1280+
},
1281+
true,
1282+
),
1283+
},
1284+
action: action{
1285+
machine: "machine-0",
1286+
fakeDriver: &driver.FakeDriver{
1287+
VMExists: true,
1288+
ProviderID: "fakeID-0",
1289+
NodeName: "",
1290+
Err: nil,
1291+
},
1292+
},
1293+
expect: expect{
1294+
err: fmt.Errorf("Skipping drain as nodeName is not a valid one for machine. Initiate VM deletion"),
1295+
retry: machineutils.RetryOp,
1296+
machine: newMachine(
1297+
&v1alpha1.MachineTemplateSpec{
1298+
ObjectMeta: *newObjectMeta(objMeta, 0),
1299+
Spec: v1alpha1.MachineSpec{
1300+
Class: v1alpha1.ClassSpec{
1301+
Kind: "MachineClass",
1302+
Name: "machine-0",
1303+
},
1304+
ProviderID: "fakeID",
1305+
},
1306+
},
1307+
&v1alpha1.MachineStatus{
1308+
Node: "fakeNode",
1309+
CurrentStatus: v1alpha1.CurrentStatus{
1310+
Phase: v1alpha1.MachineTerminating,
1311+
LastUpdateTime: metav1.Now(),
1312+
},
1313+
LastOperation: v1alpha1.LastOperation{
1314+
Description: fmt.Sprintf("Skipping drain as nodeName is not a valid one for machine. Initiate VM deletion"),
1315+
State: v1alpha1.MachineStateProcessing,
1316+
Type: v1alpha1.MachineOperationDelete,
1317+
LastUpdateTime: metav1.Now(),
1318+
},
1319+
},
1320+
nil,
1321+
map[string]string{
1322+
machineutils.MachinePriority: "3",
1323+
},
1324+
map[string]string{
1325+
"node": "",
1326+
},
1327+
true,
1328+
),
1329+
},
1330+
}),
12291331
Entry("Drain skipping as machine is NotReady for a long time (5 minutes)", &data{
12301332
setup: setup{
12311333
secrets: []*corev1.Secret{

pkg/util/provider/machinecontroller/machine_util.go

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,16 @@ func (c *controller) getVMStatus(getMachineStatusRequest *driver.GetMachineStatu
803803
return retry, err
804804
}
805805

806+
// isValidNodeName checks if the nodeName is valid
807+
func isValidNodeName(nodeName string) bool {
808+
if nodeName == "" {
809+
// if nodeName is empty
810+
return false
811+
}
812+
813+
return true
814+
}
815+
806816
// drainNode attempts to drain the node backed by the machine object
807817
func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.Retry, error) {
808818
var (
@@ -826,20 +836,29 @@ func (c *controller) drainNode(deleteMachineRequest *driver.DeleteMachineRequest
826836
nodeNotReadyDuration = 5 * time.Minute
827837
)
828838

829-
for _, condition := range machine.Status.Conditions {
830-
if condition.Type == v1.NodeReady && condition.Status != corev1.ConditionTrue && (time.Since(condition.LastTransitionTime.Time) > nodeNotReadyDuration) {
831-
klog.Warningf("Skipping drain for NotReady machine %q", machine.Name)
832-
err = fmt.Errorf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion)
833-
skipDrain = true
839+
if !isValidNodeName(nodeName) {
840+
klog.Warningf("Skipping drain as nodeName is not a valid one for machine %q", machine.Name)
841+
err = fmt.Errorf("Skipping drain as nodeName is not a valid one for machine. %s", machineutils.InitiateVMDeletion)
842+
description = fmt.Sprintf("Skipping drain as nodeName is not a valid one for machine. %s", machineutils.InitiateVMDeletion)
843+
skipDrain = true
844+
} else {
845+
for _, condition := range machine.Status.Conditions {
846+
if condition.Type == v1.NodeReady {
847+
if condition.Status != corev1.ConditionTrue && (time.Since(condition.LastTransitionTime.Time) > nodeNotReadyDuration) {
848+
klog.Warningf("Skipping drain for NotReady machine %q", machine.Name)
849+
err = fmt.Errorf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion)
850+
description = fmt.Sprintf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion)
851+
skipDrain = true
852+
}
853+
// break once the condition is found
854+
break
855+
}
834856
}
835857
}
836858

837859
if skipDrain {
838-
// If not is not ready for over 5 minutes, skip draining this machine
839-
description = fmt.Sprintf("Skipping drain as machine is NotReady for over 5minutes. %s", machineutils.InitiateVMDeletion)
840860
state = v1alpha1.MachineStateProcessing
841861
phase = v1alpha1.MachineTerminating
842-
843862
} else {
844863
// Timeout value obtained by subtracting last operation with expected time out period
845864
timeOut := metav1.Now().Add(-timeOutDuration).Sub(machine.Status.CurrentStatus.LastUpdateTime.Time)

0 commit comments

Comments
 (0)