Skip to content

Commit c7088e6

Browse files
authored
Merge pull request kubernetes#130260 from Bowser1704/fix/skip-best-effort-pods
fix(scheduler): skip best-effort pods in BalancedAllocation PreScore
2 parents 672f57e + d76f40d commit c7088e6

File tree

4 files changed

+73
-142
lines changed

4 files changed

+73
-142
lines changed

pkg/scheduler/framework/plugins/noderesources/balanced_allocation.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,15 @@ func (s *balancedAllocationPreScoreState) Clone() framework.StateData {
6363

6464
// PreScore calculates incoming pod's resource requests and writes them to the cycle state used.
6565
func (ba *BalancedAllocation) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*framework.NodeInfo) *framework.Status {
66+
podRequests := ba.calculatePodResourceRequestList(pod, ba.resources)
67+
if ba.isBestEffortPod(podRequests) {
68+
// Skip BalancedAllocation scoring for best-effort pods to
69+
// prevent a large number of pods from being scheduled to the same node.
70+
// See https://github.com/kubernetes/kubernetes/issues/129138 for details.
71+
return framework.NewStatus(framework.Skip)
72+
}
6673
state := &balancedAllocationPreScoreState{
67-
podRequests: ba.calculatePodResourceRequestList(pod, ba.resources),
74+
podRequests: podRequests,
6875
}
6976
cycleState.Write(balancedAllocationPreScoreStateKey, state)
7077
return nil
@@ -93,6 +100,9 @@ func (ba *BalancedAllocation) Score(ctx context.Context, state *framework.CycleS
93100
s, err := getBalancedAllocationPreScoreState(state)
94101
if err != nil {
95102
s = &balancedAllocationPreScoreState{podRequests: ba.calculatePodResourceRequestList(pod, ba.resources)}
103+
if ba.isBestEffortPod(s.podRequests) {
104+
return 0, nil
105+
}
96106
}
97107

98108
// ba.score favors nodes with balanced resource usage rate.
@@ -154,7 +164,6 @@ func balancedResourceScorer(requested, allocable []int64) int64 {
154164
// Otherwise, set the std to zero is enough.
155165
if len(resourceToFractions) == 2 {
156166
std = math.Abs((resourceToFractions[0] - resourceToFractions[1]) / 2)
157-
158167
} else if len(resourceToFractions) > 2 {
159168
mean := totalFraction / float64(len(resourceToFractions))
160169
var sum float64

pkg/scheduler/framework/plugins/noderesources/balanced_allocation_test.go

Lines changed: 40 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"github.com/google/go-cmp/cmp"
2424
v1 "k8s.io/api/core/v1"
2525
"k8s.io/apimachinery/pkg/api/resource"
26-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2726
"k8s.io/klog/v2/ktesting"
2827
"k8s.io/kubernetes/pkg/scheduler/apis/config"
2928
"k8s.io/kubernetes/pkg/scheduler/backend/cache"
@@ -57,14 +56,6 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
5756
},
5857
NodeName: "node1",
5958
}
60-
labels1 := map[string]string{
61-
"foo": "bar",
62-
"baz": "blah",
63-
}
64-
labels2 := map[string]string{
65-
"bar": "foo",
66-
"baz": "blah",
67-
}
6859
cpuOnly := v1.PodSpec{
6960
NodeName: "node1",
7061
Containers: []v1.Container{
@@ -119,29 +110,23 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
119110
}
120111

121112
tests := []struct {
122-
pod *v1.Pod
123-
pods []*v1.Pod
124-
nodes []*v1.Node
125-
expectedList framework.NodeScoreList
126-
name string
127-
args config.NodeResourcesBalancedAllocationArgs
128-
runPreScore bool
113+
pod *v1.Pod
114+
pods []*v1.Pod
115+
nodes []*v1.Node
116+
expectedList framework.NodeScoreList
117+
name string
118+
args config.NodeResourcesBalancedAllocationArgs
119+
runPreScore bool
120+
wantPreScoreStatusCode framework.Code
129121
}{
130122
{
131-
// Node1 scores (remaining resources) on 0-MaxNodeScore scale
132-
// CPU Fraction: 0 / 4000 = 0%
133-
// Memory Fraction: 0 / 10000 = 0%
134-
// Node1 Score: (1-0) * MaxNodeScore = MaxNodeScore
135-
// Node2 scores (remaining resources) on 0-MaxNodeScore scale
136-
// CPU Fraction: 0 / 4000 = 0 %
137-
// Memory Fraction: 0 / 10000 = 0%
138-
// Node2 Score: (1-0) * MaxNodeScore = MaxNodeScore
139-
pod: st.MakePod().Obj(),
140-
nodes: []*v1.Node{makeNode("node1", 4000, 10000, nil), makeNode("node2", 4000, 10000, nil)},
141-
expectedList: []framework.NodeScore{{Name: "node1", Score: framework.MaxNodeScore}, {Name: "node2", Score: framework.MaxNodeScore}},
142-
name: "nothing scheduled, nothing requested",
143-
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
144-
runPreScore: true,
123+
// bestEffort pods, skip in PreScore
124+
pod: st.MakePod().Obj(),
125+
nodes: []*v1.Node{makeNode("node1", 4000, 10000, nil), makeNode("node2", 4000, 10000, nil)},
126+
name: "nothing scheduled, nothing requested, skip in PreScore",
127+
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
128+
runPreScore: true,
129+
wantPreScoreStatusCode: framework.Skip,
145130
},
146131
{
147132
// Node1 scores on 0-MaxNodeScore scale
@@ -161,76 +146,6 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
161146
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
162147
runPreScore: true,
163148
},
164-
{
165-
// Node1 scores on 0-MaxNodeScore scale
166-
// CPU Fraction: 0 / 4000= 0%
167-
// Memory Fraction: 0 / 10000 = 0%
168-
// Node1 std: 0
169-
// Node1 Score: (1-0) * MaxNodeScore = MaxNodeScore
170-
// Node2 scores on 0-MaxNodeScore scale
171-
// CPU Fraction: 0 / 4000= 0%
172-
// Memory Fraction: 0 / 10000 = 0%
173-
// Node2 std: 0
174-
// Node2 Score: (1-0) * MaxNodeScore = MaxNodeScore
175-
pod: st.MakePod().Obj(),
176-
nodes: []*v1.Node{makeNode("node1", 4000, 10000, nil), makeNode("node2", 4000, 10000, nil)},
177-
expectedList: []framework.NodeScore{{Name: "node2", Score: framework.MaxNodeScore}, {Name: "node2", Score: framework.MaxNodeScore}},
178-
name: "no resources requested, pods without container scheduled",
179-
pods: []*v1.Pod{
180-
st.MakePod().Node("node1").Labels(labels2).Obj(),
181-
st.MakePod().Node("node1").Labels(labels1).Obj(),
182-
st.MakePod().Node("node2").Labels(labels1).Obj(),
183-
st.MakePod().Node("node2").Labels(labels1).Obj(),
184-
},
185-
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
186-
runPreScore: true,
187-
},
188-
{
189-
// Node1 scores on 0-MaxNodeScore scale
190-
// CPU Fraction: 0 / 250 = 0%
191-
// Memory Fraction: 0 / 1000 = 0%
192-
// Node1 std: (0 - 0) / 2 = 0
193-
// Node1 Score: (1 - 0)*MaxNodeScore = 100
194-
// Node2 scores on 0-MaxNodeScore scale
195-
// CPU Fraction: 0 / 250 = 0%
196-
// Memory Fraction: 0 / 1000 = 0%
197-
// Node2 std: (0 - 0) / 2 = 0
198-
// Node2 Score: (1 - 0)*MaxNodeScore = 100
199-
pod: st.MakePod().Obj(),
200-
nodes: []*v1.Node{makeNode("node1", 250, 1000*1024*1024, nil), makeNode("node2", 250, 1000*1024*1024, nil)},
201-
expectedList: []framework.NodeScore{{Name: "node1", Score: 100}, {Name: "node2", Score: 100}},
202-
name: "no resources requested, pods with container scheduled",
203-
pods: []*v1.Pod{
204-
st.MakePod().Node("node1").Obj(),
205-
st.MakePod().Node("node1").Obj(),
206-
},
207-
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
208-
runPreScore: true,
209-
},
210-
{
211-
// Node1 scores on 0-MaxNodeScore scale
212-
// CPU Fraction: 6000 / 10000 = 60%
213-
// Memory Fraction: 0 / 20000 = 0%
214-
// Node1 std: (0.6 - 0) / 2 = 0.3
215-
// Node1 Score: (1 - 0.3)*MaxNodeScore = 70
216-
// Node2 scores on 0-MaxNodeScore scale
217-
// CPU Fraction: 6000 / 10000 = 60%
218-
// Memory Fraction: 5000 / 20000 = 25%
219-
// Node2 std: (0.6 - 0.25) / 2 = 0.175
220-
// Node2 Score: (1 - 0.175)*MaxNodeScore = 82
221-
pod: st.MakePod().Obj(),
222-
nodes: []*v1.Node{makeNode("node1", 10000, 20000, nil), makeNode("node2", 10000, 20000, nil)},
223-
expectedList: []framework.NodeScore{{Name: "node1", Score: 70}, {Name: "node2", Score: 82}},
224-
name: "no resources requested, pods scheduled with resources",
225-
pods: []*v1.Pod{
226-
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels2}},
227-
{Spec: cpuOnly, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
228-
{Spec: cpuOnly2, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
229-
{Spec: cpuAndMemory, ObjectMeta: metav1.ObjectMeta{Labels: labels1}},
230-
},
231-
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
232-
runPreScore: true,
233-
},
234149
{
235150
// Node1 scores on 0-MaxNodeScore scale
236151
// CPU Fraction: 6000 / 10000 = 60%
@@ -298,18 +213,6 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
298213
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
299214
runPreScore: true,
300215
},
301-
{
302-
pod: st.MakePod().Obj(),
303-
nodes: []*v1.Node{makeNode("node1", 0, 0, nil), makeNode("node2", 0, 0, nil)},
304-
expectedList: []framework.NodeScore{{Name: "node1", Score: 100}, {Name: "node2", Score: 100}},
305-
name: "zero node resources, pods scheduled with resources",
306-
pods: []*v1.Pod{
307-
{Spec: cpuOnly},
308-
{Spec: cpuAndMemory},
309-
},
310-
args: config.NodeResourcesBalancedAllocationArgs{Resources: defaultResourceBalancedAllocationSet},
311-
runPreScore: true,
312-
},
313216
// Node1 scores on 0-MaxNodeScore scale
314217
// CPU Fraction: 3000 / 3500 = 85.71%
315218
// Memory Fraction: 5000 / 40000 = 12.5%
@@ -342,19 +245,25 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
342245
runPreScore: true,
343246
},
344247
// Only one node (node1) has the scalar resource, pod doesn't request the scalar resource and the scalar resource should be skipped for consideration.
345-
// Node1: std = 0, score = 100
346-
// Node2: std = 0, score = 100
248+
// Node1 scores on 0-MaxNodeScore scale
249+
// CPU Fraction: 3000 / 3500 = 85.71%
250+
// Memory Fraction: 5000 / 40000 = 12.5%
251+
// Node1 std: (0.8571 - 0.125) / 2 = 0.36605
252+
// Node1 Score: (1 - 0.22705)*MaxNodeScore = 63
253+
// Node2 scores on 0-MaxNodeScore scale
254+
// CPU Fraction: 3000 / 3500 = 85.71%
255+
// Memory Fraction: 5000 / 40000 = 12.5%
256+
// Node2 std: (0.8571 - 0.125) / 2 = 0.36605
257+
// Node2 Score: (1 - 0.22705)*MaxNodeScore = 63
347258
{
348-
pod: st.MakePod().Obj(),
259+
pod: &v1.Pod{Spec: cpuAndMemory},
349260
nodes: []*v1.Node{makeNode("node1", 3500, 40000, scalarResource), makeNode("node2", 3500, 40000, nil)},
350-
expectedList: []framework.NodeScore{{Name: "node1", Score: 100}, {Name: "node2", Score: 100}},
351-
name: "node without the scalar resource results to a higher score",
352-
pods: []*v1.Pod{
353-
{Spec: cpuOnly},
354-
{Spec: cpuOnly2},
355-
},
261+
expectedList: []framework.NodeScore{{Name: "node1", Score: 63}, {Name: "node2", Score: 63}},
262+
name: "node without the scalar resource should skip the scalar resource",
263+
pods: []*v1.Pod{},
356264
args: config.NodeResourcesBalancedAllocationArgs{Resources: []config.ResourceSpec{
357265
{Name: string(v1.ResourceCPU), Weight: 1},
266+
{Name: string(v1.ResourceMemory), Weight: 1},
358267
{Name: "nvidia.com/gpu", Weight: 1},
359268
}},
360269
runPreScore: true,
@@ -392,13 +301,17 @@ func TestNodeResourcesBalancedAllocation(t *testing.T) {
392301
fh, _ := runtime.NewFramework(ctx, nil, nil, runtime.WithSnapshotSharedLister(snapshot))
393302
p, _ := NewBalancedAllocation(ctx, &test.args, fh, feature.Features{})
394303
state := framework.NewCycleState()
395-
for i := range test.nodes {
396-
if test.runPreScore {
397-
status := p.(framework.PreScorePlugin).PreScore(ctx, state, test.pod, tf.BuildNodeInfos(test.nodes))
398-
if !status.IsSuccess() {
399-
t.Errorf("PreScore is expected to return success, but didn't. Got status: %v", status)
400-
}
304+
if test.runPreScore {
305+
status := p.(framework.PreScorePlugin).PreScore(ctx, state, test.pod, tf.BuildNodeInfos(test.nodes))
306+
if status.Code() != test.wantPreScoreStatusCode {
307+
t.Errorf("unexpected status code, want: %v, got: %v", test.wantPreScoreStatusCode, status.Code())
401308
}
309+
if status.Code() == framework.Skip {
310+
t.Log("skipping score test as PreScore returned skip")
311+
return
312+
}
313+
}
314+
for i := range test.nodes {
402315
nodeInfo, err := snapshot.Get(test.nodes[i].Name)
403316
if err != nil {
404317
t.Errorf("failed to get node %q from snapshot: %v", test.nodes[i].Name, err)

pkg/scheduler/framework/plugins/noderesources/resource_allocation.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,12 @@ func (r *resourceAllocationScorer) calculatePodResourceRequestList(pod *v1.Pod,
146146
}
147147
return podRequests
148148
}
149+
150+
func (r *resourceAllocationScorer) isBestEffortPod(podRequests []int64) bool {
151+
for _, request := range podRequests {
152+
if request != 0 {
153+
return false
154+
}
155+
}
156+
return true
157+
}

pkg/scheduler/schedule_one_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,7 +2939,7 @@ func TestZeroRequest(t *testing.T) {
29392939
{Spec: large1}, {Spec: noResources1},
29402940
{Spec: large2}, {Spec: small2},
29412941
},
2942-
expectedScore: 150,
2942+
expectedScore: 50,
29432943
},
29442944
{
29452945
pod: &v1.Pod{Spec: small},
@@ -3105,10 +3105,10 @@ func Test_prioritizeNodes(t *testing.T) {
31053105
},
31063106
{
31073107
Name: "NodeResourcesBalancedAllocation",
3108-
Score: 100,
3108+
Score: 0,
31093109
},
31103110
},
3111-
TotalScore: 110,
3111+
TotalScore: 10,
31123112
},
31133113
{
31143114
Name: "node2",
@@ -3119,10 +3119,10 @@ func Test_prioritizeNodes(t *testing.T) {
31193119
},
31203120
{
31213121
Name: "NodeResourcesBalancedAllocation",
3122-
Score: 100,
3122+
Score: 0,
31233123
},
31243124
},
3125-
TotalScore: 200,
3125+
TotalScore: 100,
31263126
},
31273127
},
31283128
},
@@ -3172,10 +3172,10 @@ func Test_prioritizeNodes(t *testing.T) {
31723172
},
31733173
{
31743174
Name: "NodeResourcesBalancedAllocation",
3175-
Score: 100,
3175+
Score: 0,
31763176
},
31773177
},
3178-
TotalScore: 420,
3178+
TotalScore: 320,
31793179
},
31803180
{
31813181
Name: "node2",
@@ -3190,10 +3190,10 @@ func Test_prioritizeNodes(t *testing.T) {
31903190
},
31913191
{
31923192
Name: "NodeResourcesBalancedAllocation",
3193-
Score: 100,
3193+
Score: 0,
31943194
},
31953195
},
3196-
TotalScore: 330,
3196+
TotalScore: 230,
31973197
},
31983198
},
31993199
},
@@ -3222,10 +3222,10 @@ func Test_prioritizeNodes(t *testing.T) {
32223222
},
32233223
{
32243224
Name: "NodeResourcesBalancedAllocation",
3225-
Score: 100,
3225+
Score: 0,
32263226
},
32273227
},
3228-
TotalScore: 110,
3228+
TotalScore: 10,
32293229
},
32303230
{
32313231
Name: "node2",
@@ -3236,10 +3236,10 @@ func Test_prioritizeNodes(t *testing.T) {
32363236
},
32373237
{
32383238
Name: "NodeResourcesBalancedAllocation",
3239-
Score: 100,
3239+
Score: 0,
32403240
},
32413241
},
3242-
TotalScore: 200,
3242+
TotalScore: 100,
32433243
},
32443244
},
32453245
},

0 commit comments

Comments
 (0)