Skip to content

Commit 457025e

Browse files
Improve machine creation and deletion flow by listing the machines before actual creation/deletion (#432)
* Improve machine creation and deletion flow by verifying the VMs before actually creating or deleting the machine. * Added unit-tests * Address review comments * Update pkg/controller/machine.go Co-authored-by: Prashanth <prashanth@sap.com>
1 parent 68b97d2 commit 457025e

File tree

5 files changed

+235
-15
lines changed

5 files changed

+235
-15
lines changed

pkg/controller/machine.go

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -266,14 +266,39 @@ func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, err
266266
}
267267

268268
func (c *controller) updateMachineState(machine *v1alpha1.Machine) (*v1alpha1.Machine, error) {
269+
nodeName := machine.Status.Node
269270

270-
if machine.Status.Node == "" {
271-
// There are no objects mapped to this machine object
272-
// Hence node status need not be propogated to machine object
273-
return machine, nil
271+
if nodeName == "" {
272+
// Check if any existing node-object can be adopted.
273+
nodeList, err := c.nodeLister.List(labels.Everything())
274+
if err != nil {
275+
klog.Errorf("Could not list the nodes due to error: %v", err)
276+
return machine, err
277+
}
278+
for _, node := range nodeList {
279+
nID, mID := decodeMachineID(node.Spec.ProviderID), decodeMachineID(machine.Spec.ProviderID)
280+
if nID == mID {
281+
klog.V(2).Infof("Adopting the node object %s for machine %s", node.Name, machine.Name)
282+
nodeName = node.Name
283+
clone := machine.DeepCopy()
284+
clone.Status.Node = nodeName
285+
clone, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(clone)
286+
if err != nil {
287+
klog.Errorf("Could not update status of the machine-object %s due to error %v", machine.Name, err)
288+
return machine, err
289+
}
290+
break
291+
}
292+
}
293+
// Couldnt adopt any node-object.
294+
if nodeName == "" {
295+
// There are no objects mapped to this machine object
296+
// Hence node status need not be propogated to machine object
297+
return machine, nil
298+
}
274299
}
275300

276-
node, err := c.nodeLister.Get(machine.Status.Node)
301+
node, err := c.nodeLister.Get(nodeName)
277302
if err != nil && apierrors.IsNotFound(err) {
278303
// Node object is not found
279304

@@ -325,7 +350,7 @@ func (c *controller) updateMachineState(machine *v1alpha1.Machine) (*v1alpha1.Ma
325350
clone.Labels = make(map[string]string)
326351
}
327352

328-
if _, ok := clone.Labels["node"]; !ok {
353+
if n := clone.Labels["node"]; n == "" {
329354
clone.Labels["node"] = machine.Status.Node
330355
machine, err = c.controlMachineClient.Machines(clone.Namespace).Update(clone)
331356
if err != nil {
@@ -344,6 +369,7 @@ func (c *controller) updateMachineState(machine *v1alpha1.Machine) (*v1alpha1.Ma
344369

345370
func (c *controller) machineCreate(machine *v1alpha1.Machine, driver driver.Driver) error {
346371
klog.V(2).Infof("Creating machine %q, please wait!", machine.Name)
372+
var actualProviderID, nodeName string
347373

348374
err := c.addBootstrapTokenToUserData(machine.Name, driver)
349375
if err != nil {
@@ -362,7 +388,22 @@ func (c *controller) machineCreate(machine *v1alpha1.Machine, driver driver.Driv
362388
c.updateMachineStatus(machine, lastOperation, currentStatus)
363389
return err
364390
}
365-
actualProviderID, nodeName, err := driver.Create()
391+
// Before actually creating the machine, we should once check and adopt if the virtual machine already exists.
392+
VMList, err := driver.GetVMs("")
393+
if err != nil {
394+
klog.Errorf("Failed to list VMs before creating machine %q %+v", machine.Name, err)
395+
return err
396+
}
397+
for providerID, machineName := range VMList {
398+
if machineName == machine.Name {
399+
klog.V(2).Infof("Adopted an existing VM %s for machine object %s.", providerID, machineName)
400+
actualProviderID = providerID
401+
}
402+
}
403+
if actualProviderID == "" {
404+
actualProviderID, nodeName, err = driver.Create()
405+
}
406+
366407
if err != nil {
367408
klog.Errorf("Error while creating machine %s: %s", machine.Name, err.Error())
368409
lastOperation := v1alpha1.LastOperation{
@@ -385,7 +426,7 @@ func (c *controller) machineCreate(machine *v1alpha1.Machine, driver driver.Driv
385426

386427
return err
387428
}
388-
klog.V(2).Infof("Created machine: %q, MachineID: %s", machine.Name, actualProviderID)
429+
klog.V(2).Infof("Created/Adopted machine: %q, MachineID: %s", machine.Name, actualProviderID)
389430

390431
for {
391432
machineName := machine.Name
@@ -614,6 +655,27 @@ func (c *controller) machineDelete(machine *v1alpha1.Machine, driver driver.Driv
614655
}
615656

616657
err = driver.Delete(machineID)
658+
} else {
659+
klog.V(2).Infof("Machine %q on deletion doesn't have a providerID attached to it. Checking for any VM linked to this machine object.", machine.Name)
660+
// As MachineID is missing, we should check once if actual VM was created but MachineID was not updated on machine-object.
661+
// We list VMs and check if any one them map with the given machine-object.
662+
VMList, err := driver.GetVMs("")
663+
if err != nil {
664+
klog.Errorf("Failed to list VMs while deleting the machine %q %v", machine.Name, err)
665+
return err
666+
}
667+
for providerID, machineName := range VMList {
668+
if machineName == machine.Name {
669+
klog.V(2).Infof("Deleting the VM %s backing the machine-object %s.", providerID, machine.Name)
670+
err = driver.Delete(providerID)
671+
if err != nil {
672+
klog.Errorf("Error deleting the VM %s backing the machine-object %s due to error %v", providerID, machine.Name, err)
673+
// Not returning error so that status on the machine object can be updated in the next step if errored.
674+
} else {
675+
klog.V(2).Infof("VM %s backing the machine-object %s is deleted successfully", providerID, machine.Name)
676+
}
677+
}
678+
}
617679
}
618680

619681
if err != nil {
@@ -952,3 +1014,9 @@ func shouldReconcileMachine(machine *v1alpha1.Machine, now time.Time) bool {
9521014

9531015
return true
9541016
}
1017+
1018+
// decodeMachineID is a generic way of decoding the Spec.ProviderID field of node-objects.
1019+
func decodeMachineID(id string) string {
1020+
splitProviderID := strings.Split(id, "/")
1021+
return splitProviderID[len(splitProviderID)-1]
1022+
}

pkg/controller/machine_test.go

Lines changed: 150 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,11 @@ var _ = Describe("machine", func() {
511511
},
512512
func(string) error {
513513
return action.fakeError
514-
}, nil))
514+
}, nil,
515+
func() (driver.VMs, error) {
516+
return map[string]string{}, nil
517+
},
518+
))
515519

516520
if data.expect.err {
517521
Expect(err).To(HaveOccurred())
@@ -733,6 +737,67 @@ var _ = Describe("machine", func() {
733737
err: true,
734738
},
735739
}),
740+
Entry("If ProviderID is available and node-name missing, ProviderID should be set back on machine object", &data{
741+
setup: setup{
742+
secrets: []*corev1.Secret{
743+
{
744+
ObjectMeta: *newObjectMeta(objMeta, 0),
745+
},
746+
},
747+
aws: []*machinev1.AWSMachineClass{
748+
{
749+
ObjectMeta: *newObjectMeta(objMeta, 0),
750+
Spec: machinev1.AWSMachineClassSpec{
751+
SecretRef: newSecretReference(objMeta, 0),
752+
},
753+
},
754+
},
755+
machines: newMachines(1, &machinev1.MachineTemplateSpec{
756+
ObjectMeta: *newObjectMeta(objMeta, 0),
757+
Spec: machinev1.MachineSpec{
758+
Class: machinev1.ClassSpec{
759+
Kind: "AWSMachineClass",
760+
Name: "machine-0",
761+
},
762+
},
763+
}, nil, nil, nil, nil),
764+
fakeResourceActions: &customfake.ResourceActions{
765+
Machine: customfake.Actions{
766+
Get: apierrors.NewGenericServerResponse(
767+
http.StatusBadRequest,
768+
"dummy method",
769+
schema.GroupResource{},
770+
"dummy name",
771+
"Failed to GET machine",
772+
30,
773+
true,
774+
),
775+
},
776+
},
777+
},
778+
action: action{
779+
machine: "machine-0",
780+
fakeProviderID: "providerid-0",
781+
fakeNodeName: "",
782+
fakeError: nil,
783+
},
784+
expect: expect{
785+
machine: newMachine(&machinev1.MachineTemplateSpec{
786+
ObjectMeta: *newObjectMeta(objMeta, 0),
787+
Spec: machinev1.MachineSpec{
788+
Class: machinev1.ClassSpec{
789+
Kind: "AWSMachineClass",
790+
Name: "machine-0",
791+
},
792+
ProviderID: "providerid",
793+
},
794+
}, &machinev1.MachineStatus{
795+
Node: "",
796+
//TODO conditions
797+
}, nil, nil, nil),
798+
err: false,
799+
},
800+
}),
736801
)
737802
})
738803

@@ -747,6 +812,7 @@ var _ = Describe("machine", func() {
747812
machine string
748813
fakeProviderID string
749814
fakeNodeName string
815+
fakeDriverGetVMs func() (driver.VMs, error)
750816
fakeError error
751817
forceDeleteLabelPresent bool
752818
fakeMachineStatus *machinev1.MachineStatus
@@ -791,6 +857,12 @@ var _ = Describe("machine", func() {
791857
machine, err := controller.controlMachineClient.Machines(objMeta.Namespace).Get(action.machine, metav1.GetOptions{})
792858
Expect(err).ToNot(HaveOccurred())
793859

860+
fakeDriverGetVMsTemp := func() (driver.VMs, error) { return nil, nil }
861+
862+
if action.fakeDriverGetVMs != nil {
863+
fakeDriverGetVMsTemp = action.fakeDriverGetVMs
864+
}
865+
794866
fakeDriver := driver.NewFakeDriver(
795867
func() (string, string, error) {
796868
_, err := controller.targetCoreClient.CoreV1().Nodes().Create(&v1.Node{
@@ -809,6 +881,7 @@ var _ = Describe("machine", func() {
809881
func() (string, error) {
810882
return action.fakeProviderID, action.fakeError
811883
},
884+
fakeDriverGetVMsTemp,
812885
)
813886

814887
// Create a machine that is to be deleted later
@@ -899,6 +972,45 @@ var _ = Describe("machine", func() {
899972
machineDeleted: true,
900973
},
901974
}),
975+
Entry("Allow proper deletion of the machine object when providerID is missing but actual VM still exists in cloud", &data{
976+
setup: setup{
977+
secrets: []*corev1.Secret{
978+
{
979+
ObjectMeta: *newObjectMeta(objMeta, 0),
980+
},
981+
},
982+
aws: []*machinev1.AWSMachineClass{
983+
{
984+
ObjectMeta: *newObjectMeta(objMeta, 0),
985+
Spec: machinev1.AWSMachineClassSpec{
986+
SecretRef: newSecretReference(objMeta, 0),
987+
},
988+
},
989+
},
990+
machines: newMachines(1, &machinev1.MachineTemplateSpec{
991+
ObjectMeta: *newObjectMeta(objMeta, 0),
992+
Spec: machinev1.MachineSpec{
993+
Class: machinev1.ClassSpec{
994+
Kind: "AWSMachineClass",
995+
Name: "machine-0",
996+
},
997+
},
998+
}, nil, nil, nil, nil),
999+
},
1000+
action: action{
1001+
machine: "machine-0",
1002+
//fakeProviderID: "fakeID-0",
1003+
fakeNodeName: "fakeNode-0",
1004+
fakeError: nil,
1005+
fakeDriverGetVMs: func() (driver.VMs, error) {
1006+
return map[string]string{"fakeID-0": "machine-0"}, nil
1007+
},
1008+
},
1009+
expect: expect{
1010+
errOccurred: false,
1011+
machineDeleted: true,
1012+
},
1013+
}),
9021014
Entry("Machine deletion when drain fails", &data{
9031015
setup: setup{
9041016
secrets: []*corev1.Secret{
@@ -1422,7 +1534,7 @@ var _ = Describe("machine", func() {
14221534

14231535
machineName := "machine-0"
14241536

1425-
DescribeTable("##Different machine state update scenrios",
1537+
DescribeTable("##Different machine state update scenarios",
14261538
func(data *data) {
14271539
stop := make(chan struct{})
14281540
defer close(stop)
@@ -1668,6 +1780,42 @@ var _ = Describe("machine", func() {
16681780
),
16691781
},
16701782
}),
1783+
Entry("Machine object does not have status.node set and node exists then it should adopt node using providerID", &data{
1784+
setup: setup{
1785+
machines: newMachines(1, &machinev1.MachineTemplateSpec{
1786+
ObjectMeta: *newObjectMeta(objMeta, 0),
1787+
Spec: machinev1.MachineSpec{
1788+
ProviderID: "aws//fakeID",
1789+
},
1790+
}, &machinev1.MachineStatus{}, nil, nil, nil),
1791+
nodes: []*corev1.Node{
1792+
{
1793+
ObjectMeta: metav1.ObjectMeta{
1794+
Name: "node-0",
1795+
},
1796+
Spec: corev1.NodeSpec{
1797+
ProviderID: "aws//fakeID-0",
1798+
},
1799+
},
1800+
},
1801+
},
1802+
action: action{
1803+
machine: machineName,
1804+
},
1805+
expect: expect{
1806+
machine: newMachine(&machinev1.MachineTemplateSpec{
1807+
ObjectMeta: metav1.ObjectMeta{
1808+
Name: "machine-0",
1809+
},
1810+
}, &machinev1.MachineStatus{
1811+
Node: "node",
1812+
}, nil, nil,
1813+
map[string]string{
1814+
"node": "node-0",
1815+
},
1816+
),
1817+
},
1818+
}),
16711819
)
16721820
})
16731821

pkg/driver/driver.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,8 @@ func NewDriver(machineID string, secretRef *corev1.Secret, classKind string, mac
106106
func() (string, error) {
107107
return "fake", nil
108108
},
109+
func() (VMs, error) {
110+
return nil, nil
111+
},
109112
)
110113
}

pkg/driver/driver_azure.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func (d *AzureDriver) getNICParameters(vmName string, subnet *network.Subnet) ne
7474
Name: &nicName,
7575
InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{
7676
PrivateIPAllocationMethod: network.Dynamic,
77-
Subnet: subnet,
77+
Subnet: subnet,
7878
},
7979
},
8080
},
@@ -292,7 +292,7 @@ func (d *AzureDriver) GetVMs(machineID string) (result VMs, err error) {
292292
return
293293
}
294294

295-
//GetUserData return the used data whit which the VM will be booted
295+
//GetUserData return the user data with which the VM will be booted
296296
func (d *AzureDriver) GetUserData() string {
297297
return d.UserData
298298
}

pkg/driver/driver_fake.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,17 @@ type FakeDriver struct {
2525
delete func(string) error
2626
//existing func() (string, v1alpha1.MachinePhase, error)
2727
existing func() (string, error)
28+
getVMs func() (VMs, error)
2829
getVolNames func([]corev1.PersistentVolumeSpec) ([]string, error)
2930
}
3031

3132
// NewFakeDriver returns a new fakedriver object
32-
func NewFakeDriver(create func() (string, string, error), delete func(string) error, existing func() (string, error)) Driver {
33+
func NewFakeDriver(create func() (string, string, error), delete func(string) error, existing func() (string, error), getVMs func() (VMs, error)) Driver {
3334
return &FakeDriver{
3435
create: create,
3536
delete: delete,
3637
existing: existing,
38+
getVMs: getVMs,
3739
}
3840
}
3941

@@ -54,8 +56,7 @@ func (d *FakeDriver) GetExisting() (string, error) {
5456

5557
// GetVMs returns a list of VMs
5658
func (d *FakeDriver) GetVMs(name string) (VMs, error) {
57-
listOfVMs := make(map[string]string)
58-
return listOfVMs, nil
59+
return d.getVMs()
5960
}
6061

6162
// GetVolNames parses volume names from pv specs

0 commit comments

Comments
 (0)