-
Notifications
You must be signed in to change notification settings - Fork 303
✨ WIP: Add affinity/anti-affinity to VirtualMachine spec #3645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release-1.14
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ import ( | |
|
||
infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" | ||
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" | ||
"sigs.k8s.io/cluster-api-provider-vsphere/feature" | ||
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" | ||
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" | ||
infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" | ||
|
@@ -171,10 +172,6 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap | |
return false, errors.New("received unexpected SupervisorMachineContext type") | ||
} | ||
|
||
if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { | ||
supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) | ||
} | ||
|
||
// If debug logging is enabled, report the number of vms in the cluster before and after the reconcile | ||
if log.V(5).Enabled() { | ||
vms, err := v.getVirtualMachinesInCluster(ctx, supervisorMachineCtx) | ||
|
@@ -188,6 +185,96 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap | |
// Set the VM state. Will get reset throughout the reconcile | ||
supervisorMachineCtx.VSphereMachine.Status.VMStatus = vmwarev1.VirtualMachineStatePending | ||
|
||
// TODO: add check for control plane machine | ||
var vmAffinitySpec *vmoprv1.VirtualMachineAffinitySpec | ||
if feature.Gates.Enabled(feature.NodeAutoPlacement) && | ||
supervisorMachineCtx.Machine.Spec.FailureDomain == "" && | ||
len(supervisorMachineCtx.VSphereCluster.Status.FailureDomains) > 1 { | ||
// Check for the presence of a VirtualMachineGroup with the name and namespace same as the name of the Cluster | ||
vmOperatorVMGroup := &vmoprv1.VirtualMachineGroup{} | ||
key := client.ObjectKey{ | ||
Namespace: supervisorMachineCtx.Cluster.Namespace, | ||
Name: supervisorMachineCtx.Cluster.Name, | ||
} | ||
err := v.Client.Get(ctx, key, vmOperatorVMGroup) | ||
if err != nil { | ||
if !apierrors.IsNotFound(err) { | ||
return false, err | ||
} | ||
if apierrors.IsNotFound(err) { | ||
log.V(4).Info("VirtualMachineGroup not found, requeueing") | ||
return true, nil | ||
} | ||
} | ||
|
||
// Check the presence of the node-pool label on the VirtualMachineGroup | ||
nodePool := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] | ||
if zone, ok := vmOperatorVMGroup.Labels[fmt.Sprintf("capv/%s", nodePool)]; ok && zone != "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed in capv design doc, adding prefix may will fail badly when using long node pool names. I've dropped the prefix in VMG controller. |
||
supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(zone) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For day 2 operations, we only need to create the VM with a label "topology.kubernetes.io/zone: Zone-x", but no need to set VSphereMachine.Spec.FailureDomain or Machine.Spec.FailureDomain. We should keep them consistent since they're empty from cluster specification. After create VM with lable, VM Service will place it into that zone. |
||
} | ||
|
||
// Fetch the MachineDeployment objects for the Cluster and generate the list of names | ||
// to define the anti-affinity for the VM object. | ||
mdList := &clusterv1.MachineDeploymentList{} | ||
if err := v.Client.List(ctx, mdList, | ||
client.InNamespace(supervisorMachineCtx.Cluster.Namespace), | ||
client.MatchingLabels{ | ||
clusterv1.ClusterNameLabel: supervisorMachineCtx.Cluster.Name, | ||
}); err != nil { | ||
return false, err | ||
} | ||
|
||
antiAffineMDNames := []string{} | ||
for _, md := range mdList.Items { | ||
if md.Spec.Template.Spec.FailureDomain == "" && md.Name != nodePool { | ||
antiAffineMDNames = append(antiAffineMDNames, md.Name) | ||
} | ||
} | ||
|
||
vmAffinitySpec = &vmoprv1.VirtualMachineAffinitySpec{ | ||
VMAffinity: &vmoprv1.VirtualMachineAffinityVMAffinitySpec{ | ||
RequiredDuringSchedulingIgnoredDuringExecution: []vmoprv1.VMAffinityTerm{ | ||
{ | ||
LabelSelector: &metav1.LabelSelector{ | ||
MatchLabels: map[string]string{ | ||
nodePoolLabelKey: nodePool, | ||
}, | ||
}, | ||
TopologyKey: kubeTopologyZoneLabelKey, | ||
}, | ||
}, | ||
}, | ||
VMAntiAffinity: &vmoprv1.VirtualMachineAntiAffinityVMAffinitySpec{ | ||
PreferredDuringSchedulingIgnoredDuringExecution: []vmoprv1.VMAffinityTerm{ | ||
{ | ||
LabelSelector: &metav1.LabelSelector{ | ||
MatchLabels: map[string]string{ | ||
nodePoolLabelKey: nodePool, | ||
}, | ||
}, | ||
TopologyKey: kubeHostNameLabelKey, | ||
}, | ||
{ | ||
LabelSelector: &metav1.LabelSelector{ | ||
MatchExpressions: []metav1.LabelSelectorRequirement{ | ||
{ | ||
Key: nodePoolLabelKey, | ||
Operator: metav1.LabelSelectorOpIn, | ||
Values: antiAffineMDNames, | ||
}, | ||
}, | ||
}, | ||
TopologyKey: kubeTopologyZoneLabelKey, | ||
}, | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
if supervisorMachineCtx.Machine.Spec.FailureDomain != "" { | ||
supervisorMachineCtx.VSphereMachine.Spec.FailureDomain = ptr.To(supervisorMachineCtx.Machine.Spec.FailureDomain) | ||
} | ||
|
||
// Check for the presence of an existing object | ||
vmOperatorVM := &vmoprv1.VirtualMachine{} | ||
key, err := virtualMachineObjectKey(supervisorMachineCtx.Machine.Name, supervisorMachineCtx.Machine.Namespace, supervisorMachineCtx.VSphereMachine.Spec.NamingStrategy) | ||
|
@@ -208,7 +295,7 @@ func (v *VmopMachineService) ReconcileNormal(ctx context.Context, machineCtx cap | |
} | ||
|
||
// Reconcile the VM Operator VirtualMachine. | ||
if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM); err != nil { | ||
if err := v.reconcileVMOperatorVM(ctx, supervisorMachineCtx, vmOperatorVM, vmAffinitySpec); err != nil { | ||
v1beta1conditions.MarkFalse(supervisorMachineCtx.VSphereMachine, infrav1.VMProvisionedCondition, vmwarev1.VMCreationFailedReason, clusterv1beta1.ConditionSeverityWarning, | ||
"failed to create or update VirtualMachine: %v", err) | ||
v1beta2conditions.Set(supervisorMachineCtx.VSphereMachine, metav1.Condition{ | ||
|
@@ -378,7 +465,8 @@ func (v *VmopMachineService) GetHostInfo(ctx context.Context, machineCtx capvcon | |
return vmOperatorVM.Status.Host, nil | ||
} | ||
|
||
func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine) error { | ||
// update the method to accept the vmAffinitySpec | ||
func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervisorMachineCtx *vmware.SupervisorMachineContext, vmOperatorVM *vmoprv1.VirtualMachine, vmAffinitySpec *vmoprv1.VirtualMachineAffinitySpec) error { | ||
// All Machine resources should define the version of Kubernetes to use. | ||
if supervisorMachineCtx.Machine.Spec.Version == "" { | ||
return errors.Errorf( | ||
|
@@ -494,6 +582,15 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis | |
vmOperatorVM = typedModified | ||
} | ||
|
||
if vmAffinitySpec != nil { | ||
if vmOperatorVM.Spec.Affinity == nil { | ||
vmOperatorVM.Spec.Affinity = vmAffinitySpec | ||
} | ||
if vmOperatorVM.Spec.GroupName == "" { | ||
vmOperatorVM.Spec.GroupName = supervisorMachineCtx.GetCluster().Name | ||
} | ||
} | ||
|
||
// Make sure the VSphereMachine owns the VM Operator VirtualMachine. | ||
if err := ctrlutil.SetControllerReference(supervisorMachineCtx.VSphereMachine, vmOperatorVM, v.Client.Scheme()); err != nil { | ||
return errors.Wrapf(err, "failed to mark %s %s/%s as owner of %s %s/%s", | ||
|
@@ -800,6 +897,9 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels | |
// resources associated with the target cluster. | ||
vmLabels[clusterv1.ClusterNameLabel] = supervisorMachineCtx.GetClusterContext().Cluster.Name | ||
|
||
// Ensure the VM has the machine deployment name label | ||
vmLabels[nodePoolLabelKey] = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] | ||
|
||
return vmLabels | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: let's use Machine Deployment in capv instead of node pool.