@@ -25,13 +25,9 @@ package controller
2525import (
2626 "context"
2727 "fmt"
28- "time"
29-
3028 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
31- "github.com/gardener/machine-controller-manager/pkg/util/nodeops"
3229 v1 "k8s.io/api/core/v1"
3330 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34- "k8s.io/apimachinery/pkg/labels"
3531 "k8s.io/apimachinery/pkg/types"
3632 "k8s.io/klog/v2"
3733)
@@ -64,17 +60,17 @@ func (dc *controller) rollback(ctx context.Context, d *v1alpha1.MachineDeploymen
6460 klog .V (4 ).Infof ("Found machine set %q with desired revision %d" , is .Name , v )
6561
6662 // Remove PreferNoSchedule taints from nodes which were backing the machineSet
67- err = dc .removeTaintNodesBackingMachineSet (
68- ctx ,
69- is ,
70- & v1.Taint {
63+ if _ , exists := is .Annotations [PreferNoScheduleKey ]; exists {
64+ taint := & v1.Taint {
7165 Key : PreferNoScheduleKey ,
7266 Value : "True" ,
7367 Effect : "PreferNoSchedule" ,
74- },
75- )
76- if err != nil {
77- klog .Warningf ("Failed to remove taints %s off nodes. Error: %s" , PreferNoScheduleKey , err )
68+ }
69+
70+ if err := dc .removeTaintNodesBackingMachineSet (ctx , is , taint ); err != nil {
71+ klog .Errorf ("Failed to remove taints %s from all nodes. Error: %v" , PreferNoScheduleKey , err )
72+ return err
73+ }
7874 }
7975
8076 // rollback by copying podTemplate.Spec from the machine set
@@ -139,93 +135,3 @@ func (dc *controller) updateMachineDeploymentAndClearRollbackTo(ctx context.Cont
139135 _ , err := dc .controlMachineClient .MachineDeployments (d .Namespace ).Update (ctx , d , metav1.UpdateOptions {})
140136 return err
141137}
142-
143- // removeTaintNodesBackingMachineSet removes taints from all nodes backing the machineSets
144- func (dc * controller ) removeTaintNodesBackingMachineSet (ctx context.Context , machineSet * v1alpha1.MachineSet , taint * v1.Taint ) error {
145-
146- if _ , exists := machineSet .Annotations [taint .Key ]; ! exists {
147- // No taint exists
148- klog .Warningf ("No taint exists on machineSet: %s. Hence not removing." , machineSet .Name )
149- return nil
150- }
151-
152- klog .V (2 ).Infof ("Trying to untaint MachineSet object %q with %s to enable scheduling of pods" , machineSet .Name , taint .Key )
153- selector , err := metav1 .LabelSelectorAsSelector (machineSet .Spec .Selector )
154- if err != nil {
155- return err
156- }
157-
158- // list all machines to include the machines that don't match the ms`s selector
159- // anymore but has the stale controller ref.
160- // TODO: Do the List and Filter in a single pass, or use an index.
161- filteredMachines , err := dc .machineLister .List (labels .Everything ())
162- if err != nil {
163- return err
164- }
165- // NOTE: filteredMachines are pointing to objects from cache - if you need to
166- // modify them, you need to copy it first.
167- filteredMachines , err = dc .claimMachines (ctx , machineSet , selector , filteredMachines )
168- if err != nil {
169- return err
170- }
171-
172- // Iterate through all machines and remove the PreferNoSchedule taint
173- // to avoid scheduling on older machines
174- for _ , machine := range filteredMachines {
175- if machine .Labels [v1alpha1 .NodeLabelKey ] != "" {
176- node , err := dc .targetCoreClient .CoreV1 ().Nodes ().Get (ctx , machine .Labels [v1alpha1 .NodeLabelKey ], metav1.GetOptions {})
177- if err != nil {
178- klog .Warningf ("Node taint removal failed for node: %s, Error: %s" , machine .Labels [v1alpha1 .NodeLabelKey ], err )
179- continue
180- }
181- if err := nodeops .RemoveTaintOffNode (
182- ctx ,
183- dc .targetCoreClient ,
184- machine .Labels [v1alpha1 .NodeLabelKey ],
185- node ,
186- taint ,
187- ); err != nil {
188- klog .Warningf ("Node taint removal failed for node: %s, Error: %s" , machine .Labels [v1alpha1 .NodeLabelKey ], err )
189- }
190- }
191- }
192-
193- retryDeadline := time .Now ().Add (maxRetryDeadline )
194- for {
195- machineSet , err = dc .controlMachineClient .MachineSets (machineSet .Namespace ).Get (ctx , machineSet .Name , metav1.GetOptions {})
196- if err != nil {
197- if time .Now ().Before (retryDeadline ) {
198- klog .Warningf ("Unable to fetch MachineSet object %s, Error: %+v" , machineSet .Name , err )
199- time .Sleep (conflictRetryInterval )
200- continue
201- } else {
202- // Timeout occurred
203- klog .Errorf ("Timeout occurred: Unable to fetch MachineSet object %s, Error: %+v" , machineSet .Name , err )
204- return err
205- }
206- }
207-
208- msCopy := machineSet .DeepCopy ()
209- delete (msCopy .Annotations , taint .Key )
210-
211- machineSet , err = dc .controlMachineClient .MachineSets (msCopy .Namespace ).Update (ctx , msCopy , metav1.UpdateOptions {})
212-
213- if err != nil {
214- if time .Now ().Before (retryDeadline ) {
215- klog .Warningf ("Unable to update MachineSet object %s, Error: %+v" , machineSet .Name , err )
216- time .Sleep (conflictRetryInterval )
217- continue
218- } else {
219- // Timeout occurred
220- klog .Errorf ("Timeout occurred: Unable to update MachineSet object %s, Error: %+v" , machineSet .Name , err )
221- return err
222- }
223- }
224-
225- // Break out of loop when update succeeds
226- break
227- }
228- klog .V (2 ).Infof ("Removed taint %s from MachineSet object %q" , taint .Key , machineSet .Name )
229-
230- return nil
231- }
0 commit comments