Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
- --v=4
- "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}"
- "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},NodeAutoPlacement=${EXP_NODE_AUTO_PLACEMENT:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}"
image: controller:latest
imagePullPolicy: IfNotPresent
name: manager
Expand Down
6 changes: 6 additions & 0 deletions feature/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ const (
// alpha: v1.11
NamespaceScopedZones featuregate.Feature = "NamespaceScopedZones"

// NodeAutoPlacement is a feature gate for the NodeAutoPlacement functionality for supervisor.
//
// alpha: v1.15
NodeAutoPlacement featuregate.Feature = "NodeAutoPlacement"

// PriorityQueue is a feature gate that controls if the controller uses the controller-runtime PriorityQueue
// instead of the default queue implementation.
//
Expand All @@ -61,6 +66,7 @@ var defaultCAPVFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
// Every feature should be initiated here:
NodeAntiAffinity: {Default: false, PreRelease: featuregate.Alpha},
NamespaceScopedZones: {Default: false, PreRelease: featuregate.Alpha},
NodeAutoPlacement: {Default: false, PreRelease: featuregate.Alpha},
PriorityQueue: {Default: false, PreRelease: featuregate.Alpha},
MultiNetworks: {Default: false, PreRelease: featuregate.Alpha},
}
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ go 1.24.0

replace sigs.k8s.io/cluster-api => sigs.k8s.io/cluster-api v1.11.1

replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505
replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045

require (
github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d
github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d
// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests
github.com/vmware-tanzu/vm-operator/api v1.8.6
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045
github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505
github.com/vmware/govmomi v0.51.0
)
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c
github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM=
github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw=
github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk=
github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU=
github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A=
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M=
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE=
github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU=
github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I=
github.com/vmware/govmomi v0.51.0 h1:n3RLS9aw/irTOKbiIyJzAb6rOat4YOVv/uDoRsNTSQI=
Expand Down
2 changes: 2 additions & 0 deletions pkg/services/vmoperator/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package vmoperator

const (
kubeTopologyZoneLabelKey = "topology.kubernetes.io/zone"
kubeHostNameLabelKey = "kubernetes.io/hostname"
nodePoolLabelKey = "node-pool"

// ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module.
ControlPlaneVMClusterModuleGroupName = "control-plane-group"
Expand Down
112 changes: 106 additions & 6 deletions pkg/services/vmoperator/vmopmachine.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (

infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1"
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"
Expand Down Expand Up @@ -171,10 +172,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap
return false, errors.New("received unexpected SupervisorMachineContext type")
}

if supervisorMachineCtx.Machine.Spec.FailureDomain != "" {
supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain)
}

// If debug logging is enabled, report the number of vms in the cluster before and after the reconcile
if log.V(5).Enabled() {
vms, err := v.getVirtualMachinesInCluster(ctx, supervisorMachineCtx)
Expand All @@ -188,6 +185,96 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap
// Set the VM state. Will get reset throughout the reconcile
supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending

// TODO: add check for control plane machine
var vmAffinitySpec *vmoprv1.VirtualMachineAffinitySpec
if feature.Gates.Enabled(feature.NodeAutoPlacement) &&
supervisorMachineCtx.Machine.Spec.FailureDomain == "" &&
len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 {
// Check for the presence of a VirtualMachineGroup with the name and namespace same as the name of the Cluster
vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{}
key := client.ObjectKey{
Namespace: supervisorMachineCtx.Cluster.Namespace,
Name: supervisorMachineCtx.Cluster.Name,
}
err := v.Client.Get(ctx, key, vmOperatorVMGroup)
if err != nil {
if !apierrors.IsNotFound(err) {
return false, err
}
if apierrors.IsNotFound(err) {
log.V(4).Info("VirtualMachineGroup not found, requeueing")
return true, nil
}
}

// Check the presence of the node-pool label on the VirtualMachineGroup
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: let's use Machine Deployment in capv instead of node pool.

nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel]
if zone, ok := vmOperatorVMGroup.Labels[fmt.Sprintf("capv/%s", nodePool)]; ok && zone != "" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed in capv design doc, adding prefix may will fail badly when using long node pool names. I've dropped the prefix in VMG controller.

supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(zone)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For day 2 operations, we only need to create the VM with a label "topology.kubernetes.io/zone: Zone-x", but no need to set VSphereMachine.Spec.FailureDomain or Machine.Spec.FailureDomain. We should keep them consistent since they're empty from cluster specification.

After create VM with lable, VM Service will place it into that zone.

}

// Fetch the MachineDeployment objects for the Cluster and generate the list of names
// to define the anti-affinity for the VM object.
mdList := &clusterv1.MachineDeploymentList{}
if err := v.Client.List(ctx, mdList,
client.InNamespace(supervisorMachineCtx.Cluster.Namespace),
client.MatchingLabels{
clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name,
}); err != nil {
return false, err
}

antiAffineMDNames := []string{}
for _, md := range mdList.Items {
if md.Spec.Template.Spec.FailureDomain == "" && md.Name != nodePool {
antiAffineMDNames = append(antiAffineMDNames, md.Name)
}
}

vmAffinitySpec = &vmoprv1.VirtualMachineAffinitySpec{
VMAffinity: &vmoprv1.VirtualMachineAffinityVMAffinitySpec{
RequiredDuringSchedulingIgnoredDuringExecution: []vmoprv1.VMAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
nodePoolLabelKey: nodePool,
},
},
TopologyKey: kubeTopologyZoneLabelKey,
},
},
},
VMAntiAffinity: &vmoprv1.VirtualMachineAntiAffinityVMAffinitySpec{
PreferredDuringSchedulingIgnoredDuringExecution: []vmoprv1.VMAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
nodePoolLabelKey: nodePool,
},
},
TopologyKey: kubeHostNameLabelKey,
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: nodePoolLabelKey,
Operator: metav1.LabelSelectorOpIn,
Values: antiAffineMDNames,
},
},
},
TopologyKey: kubeTopologyZoneLabelKey,
},
},
},
}
}

if supervisorMachineCtx.Machine.Spec.FailureDomain != "" {
supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain)
}

// Check for the presence of an existing object
vmOperatorVM := &vmoprv1.VirtualMachine{}
key, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy)
Expand All @@ -208,7 +295,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap
}

// Reconcile the VM Operator VirtualMachine.
if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM); err != nil {
if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, vmAffinitySpec); err != nil {
v1beta1conditions.MarkFalse(supervisorMachineCtx.VSphereMachine, infrav1.VMProvisionedCondition, vmwarev1.VMCreationFailedReason, clusterv1beta1.ConditionSeverityWarning,
"failed to create or update VirtualMachine: %v", err)
v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{
Expand Down Expand Up @@ -378,7 +465,8 @@ func (v *VmopMachineService) GetHostInfo(ctx context.Context, machineCtx capvcon
return vmOperatorVM.Status.Host, nil
}

func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine) error {
// update the method to accept the vmAffinitySpec
func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine, vmAffinitySpec *vmoprv1.VirtualMachineAffinitySpec) error {
// All Machine resources should define the version of Kubernetes to use.
if supervisorMachineCtx.Machine.Spec.Version == "" {
return errors.Errorf(
Expand Down Expand Up @@ -494,6 +582,15 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis
vmOperatorVM = typedModified
}

if vmAffinitySpec != nil {
if vmOperatorVM.Spec.Affinity == nil {
vmOperatorVM.Spec.Affinity = vmAffinitySpec
}
if vmOperatorVM.Spec.GroupName == "" {
vmOperatorVM.Spec.GroupName = supervisorMachineCtx.GetCluster().Name
}
}

// Make sure the VSphereMachine owns the VM Operator VirtualMachine.
if err := ctrlutil.SetControllerReference(supervisorMachineCtx.VSphereMachine, vmOperatorVM, v.Client.Scheme()); err != nil {
return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s",
Expand Down Expand Up @@ -800,6 +897,9 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels
// resources associated with the target cluster.
vmLabels[clusterv1.ClusterNameLabel] = supervisorMachineCtx.GetClusterContext().Cluster.Name

// Ensure the VM has the machine deployment name label
vmLabels[nodePoolLabelKey] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel]

return vmLabels
}

Expand Down
4 changes: 2 additions & 2 deletions test/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ replace sigs.k8s.io/cluster-api/test => sigs.k8s.io/cluster-api/test v1.11.1

replace sigs.k8s.io/cluster-api-provider-vsphere => ../

replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v0.0.0-20240404200847-de75746a9505
replace github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels => github.com/vmware-tanzu/vm-operator/pkg/constants/testlabels v1.9.1-0.20250908141901-a9e1dfbc0045

require (
github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d
// The version of vm-operator should be kept in sync with the manifests at: config/deployments/integration-tests
github.com/vmware-tanzu/vm-operator/api v1.8.6
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045
github.com/vmware/govmomi v0.51.0
)

Expand Down
4 changes: 2 additions & 2 deletions test/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d h1:c
github.com/vmware-tanzu/net-operator-api v0.0.0-20240326163340-1f32d6bf7f9d/go.mod h1:JbFOh22iDsT5BowJe0GgpMI5e2/S7cWaJlv9LdURVQM=
github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d h1:z9lrzKVtNlujduv9BilzPxuge/LE2F0N1ms3TP4JZvw=
github.com/vmware-tanzu/nsx-operator/pkg/apis v0.0.0-20241112044858-9da8637c1b0d/go.mod h1:Q4JzNkNMvjo7pXtlB5/R3oME4Nhah7fAObWgghVmtxk=
github.com/vmware-tanzu/vm-operator/api v1.8.6 h1:NIndORjcnSmIlQsCMIewpIwg/ocRVDh2lYjOroTVLrU=
github.com/vmware-tanzu/vm-operator/api v1.8.6/go.mod h1:HHA2SNI9B5Yqtyp5t+Gt9WTWBi/fIkM6+MukDDSf11A=
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045 h1:zME8crazIAWVJGboJpSLl+qcRYQ8yA6hPQojz28gY5M=
github.com/vmware-tanzu/vm-operator/api v1.9.1-0.20250908141901-a9e1dfbc0045/go.mod h1:hkc/QZCSHcosWWMPS6VWWR12WenZcNE3BaTJ/8A8sNE=
github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505 h1:y4wXx1FUFqqSgJ/xUOEM1DLS2Uu0KaeLADWpzpioGTU=
github.com/vmware-tanzu/vm-operator/external/ncp v0.0.0-20240404200847-de75746a9505/go.mod h1:5rqRJ9zGR+KnKbkGx373WgN8xJpvAj99kHnfoDYRO5I=
github.com/vmware/govmomi v0.51.0 h1:n3RLS9aw/irTOKbiIyJzAb6rOat4YOVv/uDoRsNTSQI=
Expand Down
Loading