Skip to content

Commit 189f6d2

Browse files
committed
Surface error when VM is waiting for VMG
Signed-off-by: Gong Zhang <[email protected]>
1 parent 36c3550 commit 189f6d2

File tree

6 files changed

+77
-82
lines changed

6 files changed

+77
-82
lines changed

apis/v1beta1/vspheremachine_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ const (
8181
// Note: This reason is used only in supervisor mode.
8282
VSphereMachineVirtualMachinePoweringOnV1Beta2Reason = "PoweringOn"
8383

84+
// VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason surfaces that the VirtualMachine
85+
// is waiting for its corresponding VirtualMachineGroup to be created and to include this VM as a member.
86+
VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason = "WaitingForVirtualMachineGroup"
87+
8488
// VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason surfaces when the VirtualMachine that is controlled
8589
// by the VSphereMachine waiting for the machine network settings to be reported after machine being powered on.
8690
VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason = "WaitingForNetworkAddress"

controllers/vmware/virtualmachinegroup_controller.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerMa
8989
}
9090

9191
// ClusterToVirtualMachineGroup maps Cluster events to VirtualMachineGroup reconcile requests.
92-
func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
92+
func (r *VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
9393
cluster, ok := a.(*clusterv1.Cluster)
9494
if !ok {
9595
return nil
@@ -104,11 +104,10 @@ func (r VirtualMachineGroupReconciler) ClusterToVirtualMachineGroup(ctx context.
104104
}}
105105
}
106106

107-
// VsphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests.
108-
// This handler only processes VSphereMachine objects for Day-2 operations, ensuring VSphereMachine state stays
109-
// in sync with its owning VMG. If no corresponding VMG is found, this is a no-op.
110-
111-
func (r VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
107+
// VSphereMachineToVirtualMachineGroup maps VSphereMachine events to VirtualMachineGroup reconcile requests.
108+
// This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring
109+
// VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op.
110+
func (r *VirtualMachineGroupReconciler) VSphereMachineToVirtualMachineGroup(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
112111
vSphereMachine, ok := a.(*vmwarev1.VSphereMachine)
113112
if !ok {
114113
return nil

controllers/vmware/virtualmachinegroup_reconciler.go

Lines changed: 33 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,14 @@ type VirtualMachineGroupReconciler struct {
5151
Recorder record.EventRecorder
5252
}
5353

54+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch
55+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get
56+
// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete
57+
// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get
58+
// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch
59+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch
60+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch
61+
5462
func (r *VirtualMachineGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
5563
log := ctrl.LoggerFrom(ctx)
5664

@@ -105,7 +113,12 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c
105113
}
106114

107115
// Calculate expected Machines of all MachineDeployments.
108-
expected := getExpectedVSphereMachines(cluster)
116+
expected, err := getExpectedVSphereMachines(ctx, r.Client, cluster)
117+
if err != nil {
118+
log.Error(err, "failed to get expected Machines of all MachineDeployment")
119+
return ctrl.Result{}, err
120+
}
121+
109122
if expected == 0 {
110123
log.Info("none of MachineDeployments specifies replica and node auto replacement doesn't support this scenario")
111124
return reconcile.Result{}, nil
@@ -215,60 +228,31 @@ func (r *VirtualMachineGroupReconciler) createOrUpdateVMG(ctx context.Context, c
215228
return reconcile.Result{}, err
216229
}
217230

218-
// isMDDefined checks if there are any MachineDeployments for the given cluster
219-
// by listing objects with the cluster.x-k8s.io/cluster-name label.
220-
func (r *VirtualMachineGroupReconciler) isMDDefined(ctx context.Context, cluster *clusterv1.Cluster) (bool, error) {
221-
mdList := &clusterv1.MachineDeploymentList{}
222-
if err := r.Client.List(ctx, mdList, client.InNamespace(cluster.Namespace), client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name}); err != nil {
223-
return false, errors.Wrapf(err, "failed to list MachineDeployments for cluster %s/%s",
224-
cluster.Namespace, cluster.Name)
225-
}
226-
227-
if len(mdList.Items) == 0 {
228-
return false, errors.Errorf("no MachineDeployments found for cluster %s/%s",
229-
cluster.Namespace, cluster.Name)
230-
}
231-
232-
return true, nil
233-
}
234-
235-
// isExplicitPlacement checks if any MachineDeployment has an explicit failure domain set.
236-
func (r *VirtualMachineGroupReconciler) isExplicitPlacement(cluster *clusterv1.Cluster) (bool, error) {
237-
// First, ensure MachineDeployments are defined
238-
mdDefined, err := r.isMDDefined(context.Background(), cluster)
239-
if !mdDefined {
240-
return false, err
241-
}
242-
243-
// Iterate through MachineDeployments to find if an explicit failure domain is set.
244-
mds := cluster.Spec.Topology.Workers.MachineDeployments
245-
for _, md := range mds {
246-
// If a failure domain is specified for any MachineDeployment, it indicates
247-
// explicit placement is configured, so return true.
248-
if md.FailureDomain != "" {
249-
return true, nil
250-
}
251-
}
252-
253-
return false, nil
254-
}
255-
256231
// getExpectedVSphereMachines returns the total number of replicas across all
257-
// MachineDeployments in the Cluster's Topology.Workers.
258-
func getExpectedVSphereMachines(cluster *clusterv1.Cluster) int32 {
259-
if !cluster.Spec.Topology.IsDefined() {
260-
return 0
232+
// MachineDeployments belonging to the Cluster
233+
func getExpectedVSphereMachines(ctx context.Context, kubeClient client.Client, cluster *clusterv1.Cluster) (int32, error) {
234+
var mdList clusterv1.MachineDeploymentList
235+
if err := kubeClient.List(
236+
ctx,
237+
&mdList,
238+
client.InNamespace(cluster.Namespace),
239+
client.MatchingLabels{clusterv1.ClusterNameLabel: cluster.Name},
240+
); err != nil {
241+
return 0, errors.Wrap(err, "failed to list MachineDeployments")
261242
}
262243

263244
var total int32
264-
for _, md := range cluster.Spec.Topology.Workers.MachineDeployments {
265-
if md.Replicas != nil {
266-
total += *md.Replicas
245+
for _, md := range mdList.Items {
246+
if md.Spec.Replicas != nil {
247+
total += *md.Spec.Replicas
267248
}
268249
}
269-
return total
250+
251+
return total, nil
270252
}
271253

254+
// getCurrentVSphereMachines returns the list of VSphereMachines belonging to the Cluster’s MachineDeployments.
255+
// VSphereMachines marked for removal are excluded from the result.
272256
func getCurrentVSphereMachines(ctx context.Context, kubeClient client.Client, clusterNamespace, clusterName string) ([]vmwarev1.VSphereMachine, error) {
273257
log := ctrl.LoggerFrom(ctx)
274258

@@ -321,7 +305,7 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa
321305

322306
// Check if VM belongs to a Machine Deployment by name (e.g. cluster-1-np-1-vm-xxx contains np-1)
323307
// TODO: Establish membership via the machine deployment name label
324-
if strings.Contains(member.Name, md) {
308+
if strings.Contains(member.Name, "-"+md+"-") {
325309
// Get the VM placement information by member status.
326310
// VMs that have undergone placement do not have Placement info set, skip.
327311
if member.Placement == nil {
@@ -345,7 +329,7 @@ func GenerateVMGPlacementAnnotations(ctx context.Context, vmg *vmoprv1.VirtualMa
345329
return annotations, nil
346330
}
347331

348-
// TODO: de-dup this logic with vmopmachine.go
332+
// Duplicated this logic from pkg/services/vmoperator/vmopmachine.go
349333
// GenerateVirtualMachineName generates the name of a VirtualMachine based on the naming strategy.
350334
func GenerateVirtualMachineName(machineName string, namingStrategy *vmwarev1.VirtualMachineNamingStrategy) (string, error) {
351335
// Per default the name of the VirtualMachine should be equal to the Machine name (this is the same as "{{ .machine.name }}")

controllers/vmware/virtualmachinegroup_reconciler_test.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ const (
3636

3737
func TestGetExpectedVSphereMachines(t *testing.T) {
3838
g := NewWithT(t)
39+
ctx := context.Background()
3940

4041
tests := []struct {
4142
name string
@@ -64,8 +65,9 @@ func TestGetExpectedVSphereMachines(t *testing.T) {
6465
}
6566

6667
for _, tt := range tests {
68+
fakeClient := fake.NewClientBuilder().WithScheme(s).WithObjects(tt.cluster).Build()
6769
t.Run(tt.name, func(t *testing.T) {
68-
g.Expect(getExpectedVSphereMachines(tt.cluster)).To(Equal(tt.expected))
70+
g.Expect(getExpectedVSphereMachines(ctx, fakeClient, tt.cluster)).To(Equal(tt.expected))
6971
})
7072
}
7173
}
@@ -74,12 +76,12 @@ func TestGetCurrentVSphereMachines(t *testing.T) {
7476
g := NewWithT(t)
7577
ctx := context.Background()
7678

77-
// VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSM objects here.
79+
// VM names are based on CAPI Machine names, not VSphereMachine names, but we use VSphereMachine here.
7880
vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil)
7981
vsm2 := newVSphereMachine("vsm-2", mdName2, false, nil)
8082
vsmDeleting := newVSphereMachine("vsm-3", mdName1, true, nil) // Deleting
8183
vsmControlPlane := newVSphereMachine("vsm-cp", "cp-md", false, nil)
82-
vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true" // Should be filtered by label in production, but here filtered implicitly by only listing MD-labelled objects
84+
vsmControlPlane.Labels[clusterv1.MachineControlPlaneLabel] = "true"
8385

8486
tests := []struct {
8587
name string
@@ -94,7 +96,7 @@ func TestGetCurrentVSphereMachines(t *testing.T) {
9496
vsmDeleting,
9597
vsmControlPlane,
9698
},
97-
want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (no MD label used in the actual listing logic)
99+
want: 2, // Should exclude vsm-3 (deleting) and vsm-cp (control plane VSphereMachine)
98100
},
99101
{
100102
name: "No VSphereMachines found",
@@ -110,7 +112,7 @@ func TestGetCurrentVSphereMachines(t *testing.T) {
110112
g.Expect(err).NotTo(HaveOccurred())
111113
g.Expect(len(got)).To(Equal(tt.want))
112114

113-
// Check that the correct machines are present (e.g., vsm1 and vsm2)
115+
// Check that the correct Machines are present
114116
if tt.want > 0 {
115117
names := make([]string, len(got))
116118
for i, vsm := range got {
@@ -191,7 +193,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) {
191193
Members: []vmoprv1.VirtualMachineGroupMemberStatus{
192194
// First VM sets the placement
193195
newVMGMemberStatus(vmName1, "VirtualMachine", true, zoneA),
194-
// Second VM is ignored (logic skips finding placement twice)
196+
// Second VM is ignored
195197
newVMGMemberStatus(vmNameUnplaced, "VirtualMachine", true, zoneB),
196198
},
197199
},
@@ -208,7 +210,7 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) {
208210
ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace},
209211
Status: vmoprv1.VirtualMachineGroupStatus{
210212
Members: []vmoprv1.VirtualMachineGroupMemberStatus{
211-
newVMGMemberStatus(vmNameWrongKind, "Pod", true, zoneA),
213+
newVMGMemberStatus(vmNameWrongKind, "VirtualMachineGroup", true, zoneA),
212214
},
213215
},
214216
},
@@ -220,7 +222,6 @@ func TestGenerateVMGPlacementAnnotations(t *testing.T) {
220222

221223
for _, tt := range tests {
222224
t.Run(tt.name, func(t *testing.T) {
223-
// Mock client is needed for the logging in the function, but not used for API calls
224225
ctx := ctrl.LoggerInto(context.Background(), ctrl.LoggerFrom(context.Background()))
225226

226227
got, err := GenerateVMGPlacementAnnotations(ctx, tt.vmg, tt.machineDeployments)
@@ -240,7 +241,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
240241
ctx := context.Background()
241242

242243
// Initial objects for the successful VMG creation path (Expected: 1, Current: 1)
243-
cluster := newCluster(clusterName, clusterNamespace, true, 1, 0) // Expect 1 machine
244+
cluster := newCluster(clusterName, clusterNamespace, true, 1, 0)
244245
vsm1 := newVSphereMachine("vsm-1", mdName1, false, nil)
245246
md1 := newMachineDeployment(mdName1)
246247

@@ -271,13 +272,13 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
271272
{
272273
name: "Requeue: ControlPlane Not Initialized",
273274
initialObjects: []client.Object{
274-
newCluster(clusterName, clusterNamespace, false, 1, 0), // Not Initialized
275+
newCluster(clusterName, clusterNamespace, false, 1, 0),
275276
},
276277
expectedResult: reconcile.Result{RequeueAfter: reconciliationDelay},
277278
checkVMGExists: false,
278279
},
279280
{
280-
name: "Requeue: VMG Not Found, Machines Missing (0/1)",
281+
name: "Requeue: VMG Not Found",
281282
initialObjects: []client.Object{
282283
cluster.DeepCopy(),
283284
md1.DeepCopy(),
@@ -286,7 +287,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
286287
checkVMGExists: false,
287288
},
288289
{
289-
name: "Success: VMG Created (1/1)",
290+
name: "Success: VMG Created",
290291
initialObjects: []client.Object{
291292
cluster.DeepCopy(),
292293
md1.DeepCopy(),
@@ -301,7 +302,7 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
301302
cluster.DeepCopy(),
302303
md1.DeepCopy(),
303304
vsm1.DeepCopy(),
304-
&vmoprv1.VirtualMachineGroup{ // Pre-existing VMG
305+
&vmoprv1.VirtualMachineGroup{
305306
ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: clusterNamespace},
306307
},
307308
},
@@ -333,7 +334,9 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
333334
// Check that the core fields were set by the MutateFn
334335
g.Expect(vmg.Labels).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, clusterName))
335336
g.Expect(vmg.Spec.BootOrder).To(HaveLen(1))
336-
g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(getExpectedVSphereMachines(cluster))))
337+
expected, err := getExpectedVSphereMachines(ctx, fakeClient, tt.initialObjects[0].(*clusterv1.Cluster))
338+
g.Expect(err).NotTo(HaveOccurred(), "Should get expected Machines")
339+
g.Expect(vmg.Spec.BootOrder[0].Members).To(HaveLen(int(expected)))
337340

338341
// VMG members should match the VSphereMachine (name: vsm-1)
339342
g.Expect(vmg.Spec.BootOrder[0].Members[0].Name).To(ContainElement("vsm-1"))
@@ -344,9 +347,6 @@ func TestVirtualMachineGroupReconciler_ReconcileFlow(t *testing.T) {
344347
}
345348
}
346349

347-
// Helper function to create a *string
348-
func stringPtr(s string) *string { return &s }
349-
350350
// Helper function to create a basic Cluster object
351351
func newCluster(name, namespace string, initialized bool, replicasMD1, replicasMD2 int32) *clusterv1.Cluster {
352352
cluster := &clusterv1.Cluster{

pkg/services/vmoperator/vmopmachine.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -207,22 +207,27 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap
207207
return false, err
208208
}
209209
if apierrors.IsNotFound(err) {
210-
log.V(4).Info("VirtualMachineGroup not found, requeueing")
210+
log.V(4).Info("VirtualMachineGroup not found, requeueing", "Name", key.Name, "Namespace", key.Namespace)
211211
return true, nil
212212
}
213213
}
214214

215215
// Proceed only if the machine is a member of the VirtualMachineGroup.
216216
if !v.checkVirtualMachineGroupMembership(vmOperatorVMGroup, supervisorMachineCtx) {
217-
log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing")
217+
v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{
218+
Type: infrav1.VSphereMachineVirtualMachineProvisionedV1Beta2Condition,
219+
Status: metav1.ConditionFalse,
220+
Reason: infrav1.VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason,
221+
})
222+
log.V(4).Info("Waiting for VirtualMachineGroup membership, requeueing", "VM Name", supervisorMachineCtx.Machine.Name)
218223
return true, nil
219224
}
220225

221226
affInfo = affinityInfo{
222227
vmGroupName: vmOperatorVMGroup.Name,
223228
}
224229

225-
// Set the zone label using the annotation of the machine deployment:zone mapping from VMG.
230+
// Set the zone label using the annotation of the per-md zone mapping from VMG.
226231
// This is for new VMs created during day-2 operations in VC 9.1.
227232
nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel]
228233
if zone, ok := vmOperatorVMGroup.Annotations[fmt.Sprintf("zone.cluster.x-k8s.io/%s", nodePool)]; ok && zone != "" {
@@ -842,10 +847,13 @@ func (v *VmopMachineService) addVolumes(ctx context.Context, supervisorMachineCt
842847
},
843848
}
844849

850+
// Before VC 9.1:
845851
// The CSI zone annotation must be set when using a zonal storage class,
846852
// which is required when the cluster has multiple (3) zones.
847853
// Single zone clusters (legacy/default) do not support zonal storage and must not
848854
// have the zone annotation set.
855+
// Since VC 9.1: With Node Auto Placement enabled, failureDomain is optional and CAPV no longer
856+
// sets PVC annotations. PVC placement now follows the StorageClass behavior (Immediate or WaitForFirstConsumer).
849857
zonal := len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1
850858

851859
if zone := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; zonal && zone != nil {
@@ -904,8 +912,7 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels
904912
vmLabels[k] = v
905913
}
906914

907-
// Get the labels that determine the VM's placement inside of a stretched
908-
// cluster.
915+
// Get the labels that determine the VM's placement
909916
var failureDomain *string
910917
if affinityInfo != nil && affinityInfo.failureDomain != nil {
911918
failureDomain = affinityInfo.failureDomain
@@ -934,12 +941,13 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels
934941
// and thus the code is optimized as such. However, in the future
935942
// this function may return a more diverse topology.
936943
func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, failureDomain *string) map[string]string {
937-
// TODO: Make it so that we always set the zone label, might require enquiring the zones present (when unset)
944+
// This is for explicit placement.
938945
if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" {
939946
return map[string]string{
940947
corev1.LabelTopologyZone: *fd,
941948
}
942949
}
950+
// This is for automatic placement.
943951
if failureDomain != nil && *failureDomain != "" {
944952
return map[string]string{
945953
corev1.LabelTopologyZone: *failureDomain,

0 commit comments

Comments
 (0)