Skip to content

Commit 7d6867c

Browse files
Merge pull request #1298 from openshift-cherrypick-robot/cherry-pick-1223-to-release-4.16
[release-4.16] OCPBUGS-43433: Vsphere: Handle cloned instance with lost taskID
2 parents 9f9c6ba + 57e0ab4 commit 7d6867c

File tree

2 files changed

+83
-27
lines changed

2 files changed

+83
-27
lines changed

pkg/controller/vsphere/reconciler.go

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,37 @@ func (r *Reconciler) create() error {
115115

116116
// We only clone the VM template if we have no taskRef.
117117
if r.providerStatus.TaskRef == "" {
118+
klog.V(4).Infof("%v: ProviderStatus does not have TaskRef", r.machine.GetName())
118119
if !r.machineScope.session.IsVC() {
119120
return fmt.Errorf("%v: not connected to a vCenter", r.machine.GetName())
120121
}
122+
123+
// Attempt to power on instance in situation where we alredy cloned the instance and lost taskRef.
124+
klog.V(4).Infof("%v: InstanceState is: %q", r.machine.GetName(), ptr.Deref(r.machineScope.providerStatus.InstanceState, ""))
125+
if types.VirtualMachinePowerState(ptr.Deref(r.machineScope.providerStatus.InstanceState, "")) == types.VirtualMachinePowerStatePoweredOff {
126+
klog.Infof("Powering on cloned machine without taskID: %v", r.machine.Name)
127+
128+
task, err := powerOn(r.machineScope)
129+
if err != nil {
130+
metrics.RegisterFailedInstanceCreate(&metrics.MachineLabels{
131+
Name: r.machine.Name,
132+
Namespace: r.machine.Namespace,
133+
Reason: "PowerOn task finished with error",
134+
})
135+
136+
conditionFailed := conditionFailed()
137+
conditionFailed.Message = err.Error()
138+
statusError := setProviderStatus(task, conditionFailed, r.machineScope, nil)
139+
if statusError != nil {
140+
return fmt.Errorf("failed to set provider status: %w", err)
141+
}
142+
143+
return fmt.Errorf("%v: failed to power on machine: %w", r.machine.GetName(), err)
144+
}
145+
146+
return setProviderStatus(task, conditionSuccess(), r.machineScope, nil)
147+
}
148+
121149
klog.Infof("%v: cloning", r.machine.GetName())
122150
task, err := clone(r.machineScope)
123151
if err != nil {
@@ -170,8 +198,12 @@ func (r *Reconciler) create() error {
170198
} else {
171199
return fmt.Errorf("failed to check task status: %w", err)
172200
}
173-
} else if !taskIsFinished {
174-
return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value)
201+
} else {
202+
if taskIsFinished {
203+
klog.V(4).Infof("%v task %v has completed", moTask.Info.DescriptionId, moTask.Reference().Value)
204+
} else {
205+
return fmt.Errorf("%v task %v has not finished", moTask.Info.DescriptionId, moTask.Reference().Value)
206+
}
175207
}
176208

177209
// if clone task finished successfully, power on the vm
@@ -287,10 +319,9 @@ func (r *Reconciler) exists() (bool, error) {
287319
}
288320

289321
// Check if machine was powered on after clone.
290-
// If it is powered off and in "Provisioning" phase, treat machine as non-existed yet and requeue for proceed
291-
// with creation procedure.
322+
// If it is powered off and in "Provisioning" phase, treat machine as non-existed yet and proceed with creation procedure.
292323
powerState := types.VirtualMachinePowerState(ptr.Deref(r.machineScope.providerStatus.InstanceState, ""))
293-
if powerState == "" {
324+
if powerState == "" || ptr.Deref(r.machine.Status.Phase, "") == machinev1.PhaseProvisioning {
294325
vm := &virtualMachine{
295326
Context: r.machineScope.Context,
296327
Obj: object.NewVirtualMachine(r.machineScope.session.Client.Client, vmRef),
@@ -303,7 +334,11 @@ func (r *Reconciler) exists() (bool, error) {
303334
}
304335

305336
if ptr.Deref(r.machine.Status.Phase, "") == machinev1.PhaseProvisioning && powerState == types.VirtualMachinePowerStatePoweredOff {
306-
klog.Infof("%v: already exists, but was not powered on after clone, requeue ", r.machine.GetName())
337+
klog.Infof("%v: already exists, but was not powered on after clone", r.machine.GetName())
338+
r.machineScope.providerStatus.InstanceState = ptr.To(string(powerState))
339+
if err := r.machineScope.PatchMachine(); err != nil {
340+
return false, fmt.Errorf("%v: failed to patch machine: %w", r.machine.GetName(), err)
341+
}
307342
return false, nil
308343
}
309344

pkg/controller/vsphere/reconciler_test.go

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2839,17 +2839,24 @@ func TestUpdate(t *testing.T) {
28392839
}
28402840

28412841
func TestExists(t *testing.T) {
2842-
model, session, server := initSimulator(t)
2842+
withPoweredOffVMS := func() simulatorModelOption {
2843+
return func(m *simulator.Model) {
2844+
m.Autostart = false
2845+
}
2846+
}
2847+
2848+
model, server := initSimulatorCustom(t, withPoweredOffVMS())
2849+
session := getSimulatorSession(t, server)
28432850
defer model.Remove()
28442851
defer server.Close()
28452852
credentialsSecretUsername := fmt.Sprintf("%s.username", server.URL.Host)
28462853
credentialsSecretPassword := fmt.Sprintf("%s.password", server.URL.Host)
28472854

28482855
password, _ := server.URL.User.Password()
28492856
namespace := "test"
2850-
vm := simulator.Map.Any("VirtualMachine").(*simulator.VirtualMachine)
2851-
instanceUUID := "a5764857-ae35-34dc-8f25-a9c9e73aa898"
2852-
vm.Config.InstanceUuid = instanceUUID
2857+
VMs := simulator.Map.All("VirtualMachine")
2858+
poweredOffVM := VMs[0].(*simulator.VirtualMachine)
2859+
poweredOnVM := VMs[1].(*simulator.VirtualMachine)
28532860

28542861
credentialsSecret := corev1.Secret{
28552862
ObjectMeta: metav1.ObjectMeta{
@@ -2862,7 +2869,7 @@ func TestExists(t *testing.T) {
28622869
},
28632870
}
28642871

2865-
vmObj := object.NewVirtualMachine(session.Client.Client, vm.Reference())
2872+
vmObj := object.NewVirtualMachine(session.Client.Client, poweredOnVM.Reference())
28662873
task, err := vmObj.PowerOn(context.TODO())
28672874
if err != nil {
28682875
t.Fatal(err)
@@ -2874,11 +2881,12 @@ func TestExists(t *testing.T) {
28742881
instanceState string
28752882
exists bool
28762883
vmExists bool
2884+
vm *simulator.VirtualMachine
28772885
}{
28782886
{
28792887
name: "VM doesn't exist",
28802888
machinePhase: "Provisioning",
2881-
instanceState: string(types.VirtualMachinePowerStatePoweredOn),
2889+
instanceState: "",
28822890
exists: false,
28832891
vmExists: false,
28842892
},
@@ -2888,54 +2896,67 @@ func TestExists(t *testing.T) {
28882896
instanceState: string(types.VirtualMachinePowerStatePoweredOn),
28892897
exists: true,
28902898
vmExists: true,
2899+
vm: poweredOnVM,
28912900
},
28922901
{
28932902
name: "VM exists but didnt powered on after clone",
28942903
machinePhase: "Provisioning",
28952904
instanceState: string(types.VirtualMachinePowerStatePoweredOff),
28962905
exists: false,
28972906
vmExists: true,
2907+
vm: poweredOffVM,
28982908
},
28992909
{
29002910
name: "VM exists, but powered off",
29012911
machinePhase: "Provisioned",
29022912
instanceState: string(types.VirtualMachinePowerStatePoweredOff),
29032913
exists: true,
29042914
vmExists: true,
2915+
vm: poweredOffVM,
29052916
},
29062917
}
29072918

29082919
for _, tc := range cases {
29092920
t.Run(tc.name, func(t *testing.T) {
2910-
machineScope := machineScope{
2911-
Context: context.TODO(),
2912-
machine: &machinev1.Machine{
2913-
ObjectMeta: metav1.ObjectMeta{
2914-
Name: "test",
2915-
Namespace: "test",
2916-
Labels: map[string]string{
2917-
machinev1.MachineClusterIDLabel: "CLUSTERID",
2918-
},
2919-
},
2920-
Status: machinev1.MachineStatus{
2921-
Phase: &tc.machinePhase,
2921+
2922+
var name, uuid string
2923+
if tc.vm != nil {
2924+
name = tc.vm.Name
2925+
uuid = tc.vm.Config.InstanceUuid
2926+
}
2927+
2928+
machineObj := &machinev1.Machine{
2929+
ObjectMeta: metav1.ObjectMeta{
2930+
Name: "test",
2931+
Namespace: "test",
2932+
Labels: map[string]string{
2933+
machinev1.MachineClusterIDLabel: "CLUSTERID",
29222934
},
29232935
},
2936+
Status: machinev1.MachineStatus{
2937+
Phase: &tc.machinePhase,
2938+
},
2939+
}
2940+
2941+
machineScope := machineScope{
2942+
Context: context.TODO(),
2943+
machine: machineObj,
2944+
machineToBePatched: runtimeclient.MergeFrom(machineObj.DeepCopy()),
29242945
providerSpec: &machinev1.VSphereMachineProviderSpec{
2925-
Template: vm.Name,
2946+
Template: name,
29262947
},
29272948
session: session,
29282949
providerStatus: &machinev1.VSphereMachineProviderStatus{
29292950
TaskRef: task.Reference().Value,
29302951
InstanceState: &tc.instanceState,
29312952
},
2932-
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&credentialsSecret).Build(),
2953+
client: fake.NewClientBuilder().WithScheme(scheme.Scheme).WithRuntimeObjects(&credentialsSecret, machineObj).WithStatusSubresource(machineObj).Build(),
29332954
}
29342955

29352956
reconciler := newReconciler(&machineScope)
29362957

29372958
if tc.vmExists {
2938-
reconciler.machine.UID = apimachinerytypes.UID(instanceUUID)
2959+
reconciler.machine.UID = apimachinerytypes.UID(uuid)
29392960
}
29402961

29412962
exists, err := reconciler.exists()

0 commit comments

Comments
 (0)