@@ -20,64 +20,75 @@ import (
2020 "context"
2121 "errors"
2222 "fmt"
23- "reflect"
2423 "strings"
25- "time "
24+ "sync "
2625
27- "github.com/aws/smithy-go/ptr"
2826 "github.com/vmware/govmomi/find"
2927 "github.com/vmware/govmomi/object"
3028 "github.com/vmware/govmomi/vim25/mo"
3129 "github.com/vmware/govmomi/vim25/types"
30+ "go.uber.org/zap"
31+
32+ clusterv1alpha1 "github.com/kubermatic/machine-controller/pkg/apis/cluster/v1alpha1"
33+
34+ "k8s.io/utils/ptr"
3235)
3336
37+ var lock sync.Mutex
38+
3439// createOrUpdateVMAntiAffinityRule creates or updates an anti affinity rule with the name in the given cluster.
3540// VMs are attached to the rule based on their folder path and name prefix in vsphere.
3641// A minimum of two VMs is required.
37- func (p * provider ) createOrUpdateVMAntiAffinityRule (ctx context.Context , session * Session , name string , config * Config ) error {
38- p .mutex .Lock ()
39- defer p .mutex .Unlock ()
40-
42+ func (p * provider ) createOrUpdateVMAntiAffinityRule (ctx context.Context , log * zap.SugaredLogger , session * Session , machine * clusterv1alpha1.Machine , config * Config ) error {
43+ lock .Lock ()
44+ defer lock .Unlock ()
4145 cluster , err := session .Finder .ClusterComputeResource (ctx , config .Cluster )
4246 if err != nil {
4347 return err
4448 }
4549
50+ machineSetName := machine .Name [:strings .LastIndex (machine .Name , "-" )]
4651 vmsInFolder , err := session .Finder .VirtualMachineList (ctx , strings .Join ([]string {config .Folder , "*" }, "/" ))
4752 if err != nil {
4853 if errors .Is (err , & find.NotFoundError {}) {
49- return removeVMAntiAffinityRule (ctx , session , config .Cluster , name )
54+ return removeVMAntiAffinityRule (ctx , session , config .Cluster , machineSetName )
5055 }
5156 return err
5257 }
5358
5459 var ruleVMRef []types.ManagedObjectReference
5560 for _ , vm := range vmsInFolder {
56- if strings .HasPrefix (vm .Name (), name ) {
61+ // Only add VMs with the same machineSetName to the rule and exclude the machine itself if it is being deleted
62+ if strings .HasPrefix (vm .Name (), machineSetName ) && ! (vm .Name () == machine .Name && machine .DeletionTimestamp != nil ) {
5763 ruleVMRef = append (ruleVMRef , vm .Reference ())
5864 }
5965 }
6066
61- // minimum of two vms required
62- if len (ruleVMRef ) < 2 {
63- return removeVMAntiAffinityRule (ctx , session , config .Cluster , name )
67+ if len (ruleVMRef ) == 0 {
68+ log .Debugf ("No VMs in folder %s with name prefix %s found" , config .Folder , machineSetName )
69+ return removeVMAntiAffinityRule (ctx , session , config .Cluster , machineSetName )
70+ } else if len (ruleVMRef ) < 2 {
71+ // DRS rule must have at least two virtual machine members
72+ log .Debugf ("Not enough VMs in folder %s to create anti-affinity rule" , config .Folder )
73+ return nil
6474 }
6575
66- info , err := findClusterAntiAffinityRuleByName (ctx , cluster , name )
76+ info , err := findClusterAntiAffinityRuleByName (ctx , cluster , machineSetName )
6777 if err != nil {
6878 return err
6979 }
7080
81+ log .Debugf ("Creating or updating anti-affinity rule for VMs %v in cluster %s" , ruleVMRef , config .Cluster )
7182 operation := types .ArrayUpdateOperationEdit
7283
7384 //create new rule
7485 if info == nil {
7586 info = & types.ClusterAntiAffinityRuleSpec {
7687 ClusterRuleInfo : types.ClusterRuleInfo {
77- Enabled : ptr .Bool (true ),
78- Mandatory : ptr .Bool (false ),
79- Name : name ,
80- UserCreated : ptr .Bool (true ),
88+ Enabled : ptr .To (true ),
89+ Mandatory : ptr .To (false ),
90+ Name : machineSetName ,
91+ UserCreated : ptr .To (true ),
8192 },
8293 }
8394 operation = types .ArrayUpdateOperationAdd
@@ -95,49 +106,22 @@ func (p *provider) createOrUpdateVMAntiAffinityRule(ctx context.Context, session
95106 },
96107 }
97108
109+ log .Debugf ("Performing %q for anti-affinity rule for VMs %v in cluster %s" , operation , ruleVMRef , config .Cluster )
98110 task , err := cluster .Reconfigure (ctx , spec , true )
99111 if err != nil {
100112 return err
101113 }
102114
103- err = task .Wait (ctx )
115+ taskResult , err : = task .WaitForResult (ctx )
104116 if err != nil {
105- return err
117+ return fmt . Errorf ( "error waiting for cluster %v reconfiguration to complete" , cluster . Name ())
106118 }
107-
108- return waitForRule (ctx , cluster , info )
109- }
110-
111- // waitForRule checks periodically the vsphere api for the ClusterAntiAffinityRule and returns error if the rule was not found after a timeout.
112- func waitForRule (ctx context.Context , cluster * object.ClusterComputeResource , rule * types.ClusterAntiAffinityRuleSpec ) error {
113- timeout := time .NewTimer (10 * time .Second )
114- ticker := time .NewTicker (500 * time .Millisecond )
115- defer timeout .Stop ()
116- defer ticker .Stop ()
117-
118- for {
119- select {
120- case <- timeout .C :
121-
122- info , err := findClusterAntiAffinityRuleByName (ctx , cluster , rule .Name )
123- if err != nil {
124- return err
125- }
126-
127- if ! reflect .DeepEqual (rule , info ) {
128- return fmt .Errorf ("expected anti affinity changes not found in vsphere" )
129- }
130- case <- ticker .C :
131- info , err := findClusterAntiAffinityRuleByName (ctx , cluster , rule .Name )
132- if err != nil {
133- return err
134- }
135-
136- if reflect .DeepEqual (rule , info ) {
137- return nil
138- }
139- }
119+ if taskResult .State != types .TaskInfoStateSuccess {
120+ return fmt .Errorf ("cluster %v reconfiguration task was not successful" , cluster .Name ())
140121 }
122+ log .Debugf ("Successfully created/updated anti-affinity rule for machineset %v against machine %v" , machineSetName , machine .Name )
123+
124+ return nil
141125}
142126
143127// removeVMAntiAffinityRule removes an anti affinity rule with the name in the given cluster.
@@ -172,7 +156,15 @@ func removeVMAntiAffinityRule(ctx context.Context, session *Session, clusterPath
172156 if err != nil {
173157 return err
174158 }
175- return task .Wait (ctx )
159+
160+ taskResult , err := task .WaitForResult (ctx )
161+ if err != nil {
162+ return fmt .Errorf ("error waiting for cluster %v reconfiguration to complete" , cluster .Name ())
163+ }
164+ if taskResult .State != types .TaskInfoStateSuccess {
165+ return fmt .Errorf ("cluster %v reconfiguration task was not successful" , cluster .Name ())
166+ }
167+ return nil
176168}
177169
178170func findClusterAntiAffinityRuleByName (ctx context.Context , cluster * object.ClusterComputeResource , name string ) (* types.ClusterAntiAffinityRuleSpec , error ) {
0 commit comments