Skip to content

Commit 8e2cafc

Browse files
hardikdrprashanth26
authored andcommitted
Disable MachineTimeout freeze aka permenant-freeze temporarily
1 parent 10ab017 commit 8e2cafc

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

pkg/controller/machine_safety.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ func (c *controller) reconcileClusterMachineSafety(key string) error {
4949
var wg sync.WaitGroup
5050

5151
glog.V(3).Info("SafetyCheck loop initializing")
52-
wg.Add(3)
52+
wg.Add(2)
5353
go c.checkAndFreezeORUnfreezeMachineSets(&wg)
5454
go c.checkVMObjects(&wg)
55-
go c.checkAndFreezeMachineSetTimeout(&wg)
55+
//Disable permenant freeze for now. We should enable it again once we have sophisticated automatic unfreeze mechanism in place.
56+
//go c.checkAndFreezeMachineSetTimeout(&wg)
5657
wg.Wait()
5758
c.machineSafetyQueue.AddAfter("", 60*time.Second)
5859

@@ -63,10 +64,11 @@ func (c *controller) reconcileClusterMachineSafety(key string) error {
6364
// which have much greater than desired number of replicas of machine objects
6465
func (c *controller) checkAndFreezeORUnfreezeMachineSets(wg *sync.WaitGroup) {
6566

67+
defer wg.Done()
68+
6669
machineSets, err := c.machineSetLister.List(labels.Everything())
6770
if err != nil {
6871
glog.Error("Safety-Net: Error getting machineSets - ", err)
69-
wg.Done()
7072
return
7173
}
7274

@@ -75,14 +77,13 @@ func (c *controller) checkAndFreezeORUnfreezeMachineSets(wg *sync.WaitGroup) {
7577
filteredMachines, err := c.machineLister.List(labels.Everything())
7678
if err != nil {
7779
glog.Error("Safety-Net: Error getting machines - ", err)
78-
wg.Done()
7980
return
8081
}
8182
fullyLabeledReplicasCount := int32(0)
8283
templateLabel := labels.Set(machineSet.Spec.Template.Labels).AsSelectorPreValidated()
8384
for _, machine := range filteredMachines {
8485
if templateLabel.Matches(labels.Set(machine.Labels)) &&
85-
len(machine.OwnerReferences) == 1 &&
86+
len(machine.OwnerReferences) >= 1 &&
8687
machine.OwnerReferences[0].Name == machineSet.Name {
8788
fullyLabeledReplicasCount++
8889
}
@@ -103,9 +104,7 @@ func (c *controller) checkAndFreezeORUnfreezeMachineSets(wg *sync.WaitGroup) {
103104
true,
104105
)
105106
if err != nil {
106-
//TODO explore if we can log/annotate this machineset and continue here.
107107
glog.Error("Safety-Net: Error getting surge value - ", err)
108-
wg.Done()
109108
return
110109
}
111110

@@ -133,13 +132,13 @@ func (c *controller) checkAndFreezeORUnfreezeMachineSets(wg *sync.WaitGroup) {
133132
c.freezeMachineSetsAndDeployments(machineSet, OverShootingReplicaCount, message)
134133

135134
} else if machineSet.Labels["freeze"] == "True" &&
136-
machineSet.Status.Conditions != nil &&
137-
GetCondition(&machineSet.Status, v1alpha1.MachineSetFrozen).Reason == OverShootingReplicaCount &&
135+
//TODO: Reintroduce this checks once we have automated unfreeze for MachinTimeout aka meltdown.
136+
//machineSet.Status.Conditions != nil &&
137+
//GetCondition(&machineSet.Status, v1alpha1.MachineSetFrozen).Reason == OverShootingReplicaCount &&
138138
fullyLabeledReplicasCount <= lowerThreshold {
139139
c.unfreezeMachineSetsAndDeployments(machineSet)
140140
}
141141
}
142-
wg.Done()
143142
}
144143

145144
// checkVMObjects checks for orphan VMs (VMs that don't have a machine object backing)
@@ -456,6 +455,8 @@ func (c *controller) freezeMachineSetsAndDeployments(machineSet *v1alpha1.Machin
456455
glog.V(2).Infof("Freezing MachineSet %q due to %q", machineSet.Name, reason)
457456

458457
for {
458+
// TODO: Replace it with better retry logic. Replace all occurrences similarly.
459+
// Ref: https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/deployment/util/replicaset_util.go#L35
459460
// Get the latest version of the machineSet so that we can avoid conflicts
460461
machineSet, err := c.controlMachineClient.MachineSets(machineSet.Namespace).Get(machineSet.Name, metav1.GetOptions{})
461462
if err != nil {

0 commit comments

Comments
 (0)