Skip to content

Commit 68c7d1a

Browse files
committed
Force preempting system-node-critical daemon sets
1 parent edfda70 commit 68c7d1a

File tree

3 files changed

+55
-17
lines changed

3 files changed

+55
-17
lines changed

cluster-autoscaler/simulator/node_info_utils.go

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -148,23 +148,34 @@ func podsExpectedOnFreshNode(sanitizedExampleNodeInfo *framework.NodeInfo, daemo
148148
// TODO(DRA): Figure out how to make this work for DS pods using DRA. Currently such pods would get force-added to the
149149
// ClusterSnapshot, but the ResourceClaims reflecting their DRA usage on the Node wouldn't. So CA would be overestimating
150150
// available DRA resources on the Node.
151-
if forceDaemonSets {
152-
var pendingDS []*appsv1.DaemonSet
153-
for _, ds := range daemonsets {
154-
if !runningDS[ds.UID] {
155-
pendingDS = append(pendingDS, ds)
156-
}
151+
var pendingDS []*appsv1.DaemonSet
152+
for _, ds := range daemonsets {
153+
if !runningDS[ds.UID] {
154+
pendingDS = append(pendingDS, ds)
157155
}
158-
// The provided nodeInfo has to have taints properly sanitized, or this won't work correctly.
159-
daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS)
160-
if err != nil {
161-
return nil, err
162-
}
163-
for _, pod := range daemonPods {
164-
// There's technically no need to sanitize these pods since they're created from scratch, but
165-
// it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging.
166-
result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)})
156+
}
157+
// The provided nodeInfo has to have taints properly sanitized, or this won't work correctly.
158+
daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS)
159+
if err != nil {
160+
return nil, err
161+
}
162+
for _, pod := range daemonPods {
163+
if !forceDaemonSets && !isPreemptingSystemNodeCritical(pod) {
164+
continue
167165
}
166+
// There's technically no need to sanitize these pods since they're created from scratch, but
167+
// it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging.
168+
result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)})
168169
}
169170
return result, nil
170171
}
172+
173+
func isPreemptingSystemNodeCritical(pod *framework.PodInfo) bool {
174+
if pod.Spec.PriorityClassName != labels.SystemNodeCriticalLabel {
175+
return false
176+
}
177+
if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy != apiv1.PreemptLowerPriority {
178+
return false
179+
}
180+
return true
181+
}

cluster-autoscaler/simulator/node_info_utils_test.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ import (
3131
resourceapi "k8s.io/api/resource/v1beta1"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3333
"k8s.io/apimachinery/pkg/types"
34+
"k8s.io/kubernetes/pkg/controller/daemon"
35+
3436
"k8s.io/autoscaler/cluster-autoscaler/config"
3537
drautils "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/utils"
3638
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
3739
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
40+
"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
3841
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
3942
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
40-
"k8s.io/kubernetes/pkg/controller/daemon"
4143
)
4244

4345
var (
@@ -69,7 +71,21 @@ var (
6971
},
7072
},
7173
}
72-
testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3}
74+
ds4 = &appsv1.DaemonSet{
75+
ObjectMeta: metav1.ObjectMeta{
76+
Name: "ds4",
77+
Namespace: "ds4-namespace",
78+
UID: types.UID("ds4"),
79+
},
80+
Spec: appsv1.DaemonSetSpec{
81+
Template: apiv1.PodTemplateSpec{
82+
Spec: apiv1.PodSpec{
83+
PriorityClassName: labels.SystemNodeCriticalLabel,
84+
},
85+
},
86+
},
87+
}
88+
testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3, ds4}
7389
)
7490

7591
func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
@@ -98,6 +114,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
98114
wantPods: []*apiv1.Pod{
99115
buildDSPod(ds1, "n"),
100116
buildDSPod(ds2, "n"),
117+
buildDSPod(ds4, "n"),
101118
},
102119
},
103120
{
@@ -116,6 +133,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
116133
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
117134
buildDSPod(ds1, "n"),
118135
buildDSPod(ds2, "n"),
136+
buildDSPod(ds4, "n"),
119137
},
120138
},
121139
} {
@@ -208,6 +226,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
208226
daemonSets: testDaemonSets,
209227
wantPods: []*apiv1.Pod{
210228
buildDSPod(ds1, "n"),
229+
buildDSPod(ds4, "n"),
211230
},
212231
},
213232
{
@@ -232,6 +251,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
232251
wantPods: []*apiv1.Pod{
233252
buildDSPod(ds1, "n"),
234253
buildDSPod(ds2, "n"),
254+
buildDSPod(ds4, "n"),
235255
},
236256
},
237257
{
@@ -248,6 +268,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
248268
wantPods: []*apiv1.Pod{
249269
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
250270
buildDSPod(ds1, "n"),
271+
buildDSPod(ds4, "n"),
251272
},
252273
},
253274
{
@@ -266,6 +287,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
266287
SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")),
267288
buildDSPod(ds1, "n"),
268289
buildDSPod(ds2, "n"),
290+
buildDSPod(ds4, "n"),
269291
},
270292
},
271293
}

cluster-autoscaler/utils/labels/labels.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ import (
2525
"k8s.io/apimachinery/pkg/api/resource"
2626
)
2727

28+
const (
29+
// SystemNodeCriticalLabel is a label that marks critical pods with the highest priority.
30+
SystemNodeCriticalLabel = "system-node-critical"
31+
)
32+
2833
var (
2934
// cpu amount used for account pods that don't specify cpu requests
3035
defaultMinCPU = *resource.NewMilliQuantity(50, resource.DecimalSI)

0 commit comments

Comments
 (0)