Skip to content

Commit 5e7a559

Browse files
authored
Merge pull request #7841 from pmendelski/force-ds-fix2
Force system-node-critical daemon-sets in node group templates
2 parents 4307e44 + 4f58055 commit 5e7a559

File tree

5 files changed

+87
-22
lines changed

5 files changed

+87
-22
lines changed

cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,5 +184,12 @@ func isNodeGoodTemplateCandidate(node *apiv1.Node, now time.Time) bool {
184184
ready, lastTransitionTime, _ := kube_util.GetReadinessState(node)
185185
stable := lastTransitionTime.Add(stabilizationDelay).Before(now)
186186
schedulable := !node.Spec.Unschedulable
187-
return ready && stable && schedulable
187+
toBeDeleted := false
188+
for _, taint := range node.Spec.Taints {
189+
if taint.Key == taints.ToBeDeletedTaint {
190+
toBeDeleted = true
191+
break
192+
}
193+
}
194+
return ready && stable && schedulable && !toBeDeleted
188195
}

cluster-autoscaler/processors/nodeinfosprovider/mixed_nodeinfos_processor_test.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package nodeinfosprovider
1818

1919
import (
20+
"fmt"
2021
"testing"
2122
"time"
2223

@@ -50,14 +51,19 @@ func TestGetNodeInfosForGroups(t *testing.T) {
5051
SetNodeReadyState(unready4, false, now)
5152
justReady5 := BuildTestNode("n5", 5000, 5000)
5253
SetNodeReadyState(justReady5, true, now)
54+
readyToBeDeleted6 := BuildTestNode("n6", 2000, 2000)
55+
SetNodeReadyState(readyToBeDeleted6, true, now.Add(-2*time.Minute))
56+
setToBeDeletedTaint(readyToBeDeleted6)
57+
ready7 := BuildTestNode("n7", 6000, 6000)
58+
SetNodeReadyState(ready7, true, now.Add(-2*time.Minute))
5359

5460
tn := BuildTestNode("tn", 5000, 5000)
5561
tni := framework.NewTestNodeInfo(tn)
5662

5763
// Cloud provider with TemplateNodeInfo implemented.
5864
provider1 := testprovider.NewTestAutoprovisioningCloudProvider(
5965
nil, nil, nil, nil, nil,
60-
map[string]*framework.NodeInfo{"ng3": tni, "ng4": tni, "ng5": tni})
66+
map[string]*framework.NodeInfo{"ng3": tni, "ng4": tni, "ng5": tni, "ng6": tni})
6167
provider1.AddNodeGroup("ng1", 1, 10, 1) // Nodegroup with ready node.
6268
provider1.AddNode("ng1", ready1)
6369
provider1.AddNodeGroup("ng2", 1, 10, 1) // Nodegroup with ready and unready node.
@@ -68,15 +74,18 @@ func TestGetNodeInfosForGroups(t *testing.T) {
6874
provider1.AddNodeGroup("ng4", 0, 1000, 0) // Nodegroup without nodes.
6975
provider1.AddNodeGroup("ng5", 1, 10, 1) // Nodegroup with node that recently became ready.
7076
provider1.AddNode("ng5", justReady5)
77+
provider1.AddNodeGroup("ng6", 1, 10, 1) // Nodegroup with to be deleted node
78+
provider1.AddNode("ng6", readyToBeDeleted6)
79+
provider1.AddNode("ng6", ready7)
7180

7281
// Cloud provider with TemplateNodeInfo not implemented.
7382
provider2 := testprovider.NewTestAutoprovisioningCloudProvider(nil, nil, nil, nil, nil, nil)
74-
provider2.AddNodeGroup("ng6", 1, 10, 1) // Nodegroup without nodes.
83+
provider2.AddNodeGroup("ng7", 1, 10, 1) // Nodegroup without nodes.
7584

7685
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{})
7786
registry := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil)
7887

79-
nodes := []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1}
88+
nodes := []*apiv1.Node{justReady5, unready4, unready3, ready2, ready1, ready7, readyToBeDeleted6}
8089
snapshot := testsnapshot.NewTestSnapshotOrDie(t)
8190
err := snapshot.SetClusterState(nodes, nil, drasnapshot.Snapshot{})
8291
assert.NoError(t, err)
@@ -90,7 +99,7 @@ func TestGetNodeInfosForGroups(t *testing.T) {
9099
}
91100
res, err := NewMixedTemplateNodeInfoProvider(&cacheTtl, false).Process(&ctx, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
92101
assert.NoError(t, err)
93-
assert.Equal(t, 5, len(res))
102+
assert.Equal(t, 6, len(res))
94103
info, found := res["ng1"]
95104
assert.True(t, found)
96105
assertEqualNodeCapacities(t, ready1, info.Node())
@@ -106,6 +115,9 @@ func TestGetNodeInfosForGroups(t *testing.T) {
106115
info, found = res["ng5"]
107116
assert.True(t, found)
108117
assertEqualNodeCapacities(t, tn, info.Node())
118+
info, found = res["ng6"]
119+
assert.True(t, found)
120+
assertEqualNodeCapacities(t, ready7, info.Node())
109121

110122
// Test for a nodegroup without nodes and TemplateNodeInfo not implemented by cloud proivder
111123
ctx = context.AutoscalingContext{
@@ -314,3 +326,11 @@ func getNodeResource(node *apiv1.Node, resource apiv1.ResourceName) int64 {
314326

315327
return nodeCapacityValue
316328
}
329+
330+
func setToBeDeletedTaint(node *apiv1.Node) {
331+
node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{
332+
Key: taints.ToBeDeletedTaint,
333+
Value: fmt.Sprint(time.Now().Unix()),
334+
Effect: apiv1.TaintEffectNoSchedule,
335+
})
336+
}

cluster-autoscaler/simulator/node_info_utils.go

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -148,23 +148,34 @@ func podsExpectedOnFreshNode(sanitizedExampleNodeInfo *framework.NodeInfo, daemo
148148
// TODO(DRA): Figure out how to make this work for DS pods using DRA. Currently such pods would get force-added to the
149149
// ClusterSnapshot, but the ResourceClaims reflecting their DRA usage on the Node wouldn't. So CA would be overestimating
150150
// available DRA resources on the Node.
151-
if forceDaemonSets {
152-
var pendingDS []*appsv1.DaemonSet
153-
for _, ds := range daemonsets {
154-
if !runningDS[ds.UID] {
155-
pendingDS = append(pendingDS, ds)
156-
}
151+
var pendingDS []*appsv1.DaemonSet
152+
for _, ds := range daemonsets {
153+
if !runningDS[ds.UID] {
154+
pendingDS = append(pendingDS, ds)
157155
}
158-
// The provided nodeInfo has to have taints properly sanitized, or this won't work correctly.
159-
daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS)
160-
if err != nil {
161-
return nil, err
162-
}
163-
for _, pod := range daemonPods {
164-
// There's technically no need to sanitize these pods since they're created from scratch, but
165-
// it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging.
166-
result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)})
156+
}
157+
// The provided nodeInfo has to have taints properly sanitized, or this won't work correctly.
158+
daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS)
159+
if err != nil {
160+
return nil, err
161+
}
162+
for _, pod := range daemonPods {
163+
if !forceDaemonSets && !isPreemptingSystemNodeCritical(pod) {
164+
continue
167165
}
166+
// There's technically no need to sanitize these pods since they're created from scratch, but
167+
// it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging.
168+
result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)})
168169
}
169170
return result, nil
170171
}
172+
173+
func isPreemptingSystemNodeCritical(pod *framework.PodInfo) bool {
174+
if pod.Spec.PriorityClassName != labels.SystemNodeCriticalLabel {
175+
return false
176+
}
177+
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy != apiv1.PreemptLowerPriority {
178+
return false
179+
}
180+
return true
181+
}

cluster-autoscaler/simulator/node_info_utils_test.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ import (
3131
resourceapi "k8s.io/api/resource/v1beta1"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3333
"k8s.io/apimachinery/pkg/types"
34+
"k8s.io/kubernetes/pkg/controller/daemon"
35+
3436
"k8s.io/autoscaler/cluster-autoscaler/config"
3537
drautils "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/utils"
3638
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
3739
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
40+
"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
3841
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
3942
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
40-
"k8s.io/kubernetes/pkg/controller/daemon"
4143
)
4244

4345
var (
@@ -69,7 +71,21 @@ var (
6971
},
7072
},
7173
}
72-
testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3}
74+
ds4 = &appsv1.DaemonSet{
75+
ObjectMeta: metav1.ObjectMeta{
76+
Name: "ds4",
77+
Namespace: "ds4-namespace",
78+
UID: types.UID("ds4"),
79+
},
80+
Spec: appsv1.DaemonSetSpec{
81+
Template: apiv1.PodTemplateSpec{
82+
Spec: apiv1.PodSpec{
83+
PriorityClassName: labels.SystemNodeCriticalLabel,
84+
},
85+
},
86+
},
87+
}
88+
testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3, ds4}
7389
)
7490

7591
func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
@@ -98,6 +114,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
98114
wantPods: []*apiv1.Pod{
99115
buildDSPod(ds1, "n"),
100116
buildDSPod(ds2, "n"),
117+
buildDSPod(ds4, "n"),
101118
},
102119
},
103120
{
@@ -116,6 +133,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
116133
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
117134
buildDSPod(ds1, "n"),
118135
buildDSPod(ds2, "n"),
136+
buildDSPod(ds4, "n"),
119137
},
120138
},
121139
} {
@@ -208,6 +226,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
208226
daemonSets: testDaemonSets,
209227
wantPods: []*apiv1.Pod{
210228
buildDSPod(ds1, "n"),
229+
buildDSPod(ds4, "n"),
211230
},
212231
},
213232
{
@@ -232,6 +251,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
232251
wantPods: []*apiv1.Pod{
233252
buildDSPod(ds1, "n"),
234253
buildDSPod(ds2, "n"),
254+
buildDSPod(ds4, "n"),
235255
},
236256
},
237257
{
@@ -248,6 +268,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
248268
wantPods: []*apiv1.Pod{
249269
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
250270
buildDSPod(ds1, "n"),
271+
buildDSPod(ds4, "n"),
251272
},
252273
},
253274
{
@@ -266,6 +287,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
266287
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
267288
buildDSPod(ds1, "n"),
268289
buildDSPod(ds2, "n"),
290+
buildDSPod(ds4, "n"),
269291
},
270292
},
271293
}

cluster-autoscaler/utils/labels/labels.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ import (
2525
"k8s.io/apimachinery/pkg/api/resource"
2626
)
2727

28+
const (
29+
// SystemNodeCriticalLabel is a label that marks critical pods with the highest priority.
30+
SystemNodeCriticalLabel = "system-node-critical"
31+
)
32+
2833
var (
2934
// cpu amount used for account pods that don't specify cpu requests
3035
defaultMinCPU = *resource.NewMilliQuantity(50, resource.DecimalSI)

0 commit comments

Comments
 (0)