Skip to content

Commit 5202ff6

Browse files
RadekManakJoelSpeed
andcommitted
VSphere: Handle cloned instance with lost taskID
Co-authored-by: Joel Speed <[email protected]>
1 parent 148e5dc commit 5202ff6

File tree

2 files changed

+83
-27
lines changed

2 files changed

+83
-27
lines changed

pkg/controller/vsphere/reconciler.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,37 @@ func (r *Reconciler) create() error {
115115

116116
// We only clone the VM template if we have no taskRef.
117117
if r.providerStatus.TaskRef == "" {
118+
klog.V(4).Infof("%v: ProviderStatus does not have TaskRef", r.machine.GetName())
118119
if !r.machineScope.session.IsVC() {
119120
return fmt.Errorf("%v: not connected to a vCenter", r.machine.GetName())
120121
}
122+
123+
// Attempt to power on instance in situation where we alredy cloned the instance and lost taskRef.
124+
klog.V(4).Infof("%v: InstanceState is: %q", r.machine.GetName(), ptr.Deref(r.machineScope.providerStatus.InstanceState, ""))
125+
if types.VirtualMachinePowerState(ptr.Deref(r.machineScope.providerStatus.InstanceState, "")) == types.VirtualMachinePowerStatePoweredOff {
126+
klog.Infof("Powering on cloned machine without taskID: %v", r.machine.Name)
127+
128+
task, err := powerOn(r.machineScope)
129+
if err != nil {
130+
metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{
131+
Name: r.machine.Name,
132+
Namespace: r.machine.Namespace,
133+
Reason: "PowerOn task finished with error",
134+
})
135+
136+
conditionFailed := conditionFailed()
137+
conditionFailed.Message = err.Error()
138+
statusError := setProviderStatus(task, conditionFailed, r.machineScope, nil)
139+
if statusError != nil {
140+
return fmt.Errorf("failed to set provider status: %w", err)
141+
}
142+
143+
return fmt.Errorf("%v: failed to power on machine: %w", r.machine.GetName(), err)
144+
}
145+
146+
return setProviderStatus(task, conditionSuccess(), r.machineScope, nil)
147+
}
148+
121149
klog.Infof("%v: cloning", r.machine.GetName())
122150
task, err := clone(r.machineScope)
123151
if err != nil {
@@ -170,8 +198,12 @@ func (r *Reconciler) create() error {
170198
} else {
171199
return fmt.Errorf("failed to check task status: %w", err)
172200
}
173-
} else if !taskIsFinished {
174-
return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value)
201+
} else {
202+
if taskIsFinished {
203+
klog.V(4).Infof("%v task %v has completed", moTask.Info.DescriptionId, moTask.Reference().Value)
204+
} else {
205+
return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value)
206+
}
175207
}
176208

177209
// if clone task finished successfully, power on the vm
@@ -287,10 +319,9 @@ func (r *Reconciler) exists() (bool, error) {
287319
}
288320

289321
// Check if machine was powered on after clone.
290-
// If it is powered off and in "Provisioning" phase, treat machine as non-existed yet and requeue for proceed
291-
// with creation procedure.
322+
// If it is powered off and in "Provisioning" phase, treat machine as non-existed yet and proceed with creation procedure.
292323
powerState := types.VirtualMachinePowerState(ptr.Deref(r.machineScope.providerStatus.InstanceState, ""))
293-
if powerState == "" {
324+
if powerState == "" || ptr.Deref(r.machine.Status.Phase, "") == machinev1.PhaseProvisioning {
294325
vm := &virtualMachine{
295326
Context: r.machineScope.Context,
296327
Obj: object.NewVirtualMachine(r.machineScope.session.Client.Client, vmRef),
@@ -303,7 +334,11 @@ func (r *Reconciler) exists() (bool, error) {
303334
}
304335

305336
if ptr.Deref(r.machine.Status.Phase, "") == machinev1.PhaseProvisioning && powerState == types.VirtualMachinePowerStatePoweredOff {
306-
klog.Infof("%v: already exists, but was not powered on after clone, requeue ", r.machine.GetName())
337+
klog.Infof("%v: already exists, but was not powered on after clone", r.machine.GetName())
338+
r.machineScope.providerStatus.InstanceState = ptr.To(string(powerState))
339+
if err := r.machineScope.PatchMachine(); err != nil {
340+
return false, fmt.Errorf("%v: failed to patch machine: %w", r.machine.GetName(), err)
341+
}
307342
return false, nil
308343
}
309344

pkg/controller/vsphere/reconciler_test.go

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2843,17 +2843,24 @@ func TestUpdate(t *testing.T) {
28432843
}
28442844

28452845
func TestExists(t *testing.T) {
2846-
model, session, server := initSimulator(t)
2846+
withPoweredOffVMS := func() simulatorModelOption {
2847+
return func(m *simulator.Model) {
2848+
m.Autostart = false
2849+
}
2850+
}
2851+
2852+
model, server := initSimulatorCustom(t, withPoweredOffVMS())
2853+
session := getSimulatorSession(t, server)
28472854
defer model.Remove()
28482855
defer server.Close()
28492856
credentialsSecretUsername := fmt.Sprintf("%s.username", server.URL.Host)
28502857
credentialsSecretPassword := fmt.Sprintf("%s.password", server.URL.Host)
28512858

28522859
password, _ := server.URL.User.Password()
28532860
namespace := "test"
2854-
vm := simulator.Map.Any("VirtualMachine").(*simulator.VirtualMachine)
2855-
instanceUUID := "a5764857-ae35-34dc-8f25-a9c9e73aa898"
2856-
vm.Config.InstanceUuid = instanceUUID
2861+
VMs := simulator.Map.All("VirtualMachine")
2862+
poweredOffVM := VMs[0].(*simulator.VirtualMachine)
2863+
poweredOnVM := VMs[1].(*simulator.VirtualMachine)
28572864

28582865
credentialsSecret := corev1.Secret{
28592866
ObjectMeta: metav1.ObjectMeta{
@@ -2866,7 +2873,7 @@ func TestExists(t *testing.T) {
28662873
},
28672874
}
28682875

2869-
vmObj := object.NewVirtualMachine(session.Client.Client, vm.Reference())
2876+
vmObj := object.NewVirtualMachine(session.Client.Client, poweredOnVM.Reference())
28702877
task, err := vmObj.PowerOn(context.TODO())
28712878
if err != nil {
28722879
t.Fatal(err)
@@ -2878,11 +2885,12 @@ func TestExists(t *testing.T) {
28782885
instanceState string
28792886
exists bool
28802887
vmExists bool
2888+
vm *simulator.VirtualMachine
28812889
}{
28822890
{
28832891
name: "VM doesn't exist",
28842892
machinePhase: "Provisioning",
2885-
instanceState: string(types.VirtualMachinePowerStatePoweredOn),
2893+
instanceState: "",
28862894
exists: false,
28872895
vmExists: false,
28882896
},
@@ -2892,54 +2900,67 @@ func TestExists(t *testing.T) {
28922900
instanceState: string(types.VirtualMachinePowerStatePoweredOn),
28932901
exists: true,
28942902
vmExists: true,
2903+
vm: poweredOnVM,
28952904
},
28962905
{
28972906
name: "VM exists but didnt powered on after clone",
28982907
machinePhase: "Provisioning",
28992908
instanceState: string(types.VirtualMachinePowerStatePoweredOff),
29002909
exists: false,
29012910
vmExists: true,
2911+
vm: poweredOffVM,
29022912
},
29032913
{
29042914
name: "VM exists, but powered off",
29052915
machinePhase: "Provisioned",
29062916
instanceState: string(types.VirtualMachinePowerStatePoweredOff),
29072917
exists: true,
29082918
vmExists: true,
2919+
vm: poweredOffVM,
29092920
},
29102921
}
29112922

29122923
for _, tc := range cases {
29132924
t.Run(tc.name, func(t *testing.T) {
2914-
machineScope := machineScope{
2915-
Context: context.TODO(),
2916-
machine: &machinev1.Machine{
2917-
ObjectMeta: metav1.ObjectMeta{
2918-
Name: "test",
2919-
Namespace: "test",
2920-
Labels: map[string]string{
2921-
machinev1.MachineClusterIDLabel: "CLUSTERID",
2922-
},
2923-
},
2924-
Status: machinev1.MachineStatus{
2925-
Phase: &tc.machinePhase,
2925+
2926+
var name, uuid string
2927+
if tc.vm != nil {
2928+
name = tc.vm.Name
2929+
uuid = tc.vm.Config.InstanceUuid
2930+
}
2931+
2932+
machineObj := &machinev1.Machine{
2933+
ObjectMeta: metav1.ObjectMeta{
2934+
Name: "test",
2935+
Namespace: "test",
2936+
Labels: map[string]string{
2937+
machinev1.MachineClusterIDLabel: "CLUSTERID",
29262938
},
29272939
},
2940+
Status: machinev1.MachineStatus{
2941+
Phase: &tc.machinePhase,
2942+
},
2943+
}
2944+
2945+
machineScope := machineScope{
2946+
Context: context.TODO(),
2947+
machine: machineObj,
2948+
machineToBePatched: runtimeclient.MergeFrom(machineObj.DeepCopy()),
29282949
providerSpec: &machinev1.VSphereMachineProviderSpec{
2929-
Template: vm.Name,
2950+
Template: name,
29302951
},
29312952
session: session,
29322953
providerStatus: &machinev1.VSphereMachineProviderStatus{
29332954
TaskRef: task.Reference().Value,
29342955
InstanceState: &tc.instanceState,
29352956
},
2936-
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&credentialsSecret).Build(),
2957+
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&credentialsSecret, machineObj).WithStatusSubresource(machineObj).Build(),
29372958
}
29382959

29392960
reconciler := newReconciler(&machineScope)
29402961

29412962
if tc.vmExists {
2942-
reconciler.machine.UID = apimachinerytypes.UID(instanceUUID)
2963+
reconciler.machine.UID = apimachinerytypes.UID(uuid)
29432964
}
29442965

29452966
exists, err := reconciler.exists()

0 commit comments

Comments
 (0)