Skip to content

Commit e660e84

Browse files
committed
Faster scheduler.
1 parent ea5be33 commit e660e84

File tree

4 files changed

+67
-44
lines changed

4 files changed

+67
-44
lines changed

pkg/scheduler/core/generic_scheduler.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ type genericScheduler struct {
167167
pvcLister corelisters.PersistentVolumeClaimLister
168168
pdbLister algorithm.PDBLister
169169
disablePreemption bool
170+
lastIndex int
170171
percentageOfNodesToScore int32
171172
}
172173

@@ -460,8 +461,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v
460461
if len(g.predicates) == 0 {
461462
filtered = nodes
462463
} else {
463-
allNodes := int32(g.cache.NodeTree().NumNodes())
464-
numNodesToFind := g.numFeasibleNodesToFind(allNodes)
464+
allNodes := g.cache.NodeTree().AllNodes()
465+
numNodesToFind := g.numFeasibleNodesToFind(int32(len(allNodes)))
465466

466467
// Create filtered list with enough space to avoid growing it
467468
// and allow assigning.
@@ -477,8 +478,12 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v
477478
// We can use the same metadata producer for all nodes.
478479
meta := g.predicateMetaProducer(pod, g.nodeInfoSnapshot.NodeInfoMap)
479480

481+
processedNodes := int32(0)
480482
checkNode := func(i int) {
481-
nodeName := g.cache.NodeTree().Next()
483+
// We check the nodes starting from where we left off in the previous scheduling cycle,
484+
// this is to make sure all nodes have the same chance of being examined across pods.
485+
atomic.AddInt32(&processedNodes, 1)
486+
nodeName := allNodes[(g.lastIndex+i)%len(allNodes)]
482487
fits, failedPredicates, err := podFitsOnNode(
483488
pod,
484489
meta,
@@ -510,7 +515,8 @@ func (g *genericScheduler) findNodesThatFit(pod *v1.Pod, nodes []*v1.Node) ([]*v
510515

511516
// Stops searching for more nodes once the configured number of feasible nodes
512517
// are found.
513-
workqueue.ParallelizeUntil(ctx, 16, int(allNodes), checkNode)
518+
workqueue.ParallelizeUntil(ctx, 16, len(allNodes), checkNode)
519+
g.lastIndex = (g.lastIndex + int(processedNodes)) % len(allNodes)
514520

515521
filtered = filtered[:filteredLen]
516522
if len(errs) > 0 {

pkg/scheduler/internal/cache/cache_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,7 @@ func TestNodeOperators(t *testing.T) {
10661066
if !found {
10671067
t.Errorf("Failed to find node %v in internalcache.", node.Name)
10681068
}
1069-
if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name {
1069+
if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name {
10701070
t.Errorf("cache.nodeTree is not updated correctly after adding node: %v", node.Name)
10711071
}
10721072

@@ -1109,7 +1109,7 @@ func TestNodeOperators(t *testing.T) {
11091109
t.Errorf("Failed to update node in schedulernodeinfo:\n got: %+v \nexpected: %+v", got, expected)
11101110
}
11111111
// Check nodeTree after update
1112-
if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.Next() != node.Name {
1112+
if cache.nodeTree.NumNodes() != 1 || cache.nodeTree.next() != node.Name {
11131113
t.Errorf("unexpected cache.nodeTree after updating node: %v", node.Name)
11141114
}
11151115

@@ -1120,7 +1120,7 @@ func TestNodeOperators(t *testing.T) {
11201120
}
11211121
// Check nodeTree after remove. The node should be removed from the nodeTree even if there are
11221122
// still pods on it.
1123-
if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.Next() != "" {
1123+
if cache.nodeTree.NumNodes() != 0 || cache.nodeTree.next() != "" {
11241124
t.Errorf("unexpected cache.nodeTree after removing node: %v", node.Name)
11251125
}
11261126
}

pkg/scheduler/internal/cache/node_tree.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ type NodeTree struct {
3232
tree map[string]*nodeArray // a map from zone (region-zone) to an array of nodes in the zone.
3333
zones []string // a list of all the zones in the tree (keys)
3434
zoneIndex int
35+
allNodes []string
3536
numNodes int
3637
mu sync.RWMutex
3738
}
@@ -92,6 +93,7 @@ func (nt *NodeTree) addNode(n *v1.Node) {
9293
}
9394
klog.V(5).Infof("Added node %v in group %v to NodeTree", n.Name, zone)
9495
nt.numNodes++
96+
nt.recomputeAllNodes()
9597
}
9698

9799
// RemoveNode removes a node from the NodeTree.
@@ -112,6 +114,7 @@ func (nt *NodeTree) removeNode(n *v1.Node) error {
112114
}
113115
klog.V(5).Infof("Removed node %v in group %v from NodeTree", n.Name, zone)
114116
nt.numNodes--
117+
nt.recomputeAllNodes()
115118
return nil
116119
}
117120
}
@@ -159,9 +162,7 @@ func (nt *NodeTree) resetExhausted() {
159162

160163
// Next returns the name of the next node. NodeTree iterates over zones and in each zone iterates
161164
// over nodes in a round robin fashion.
162-
func (nt *NodeTree) Next() string {
163-
nt.mu.Lock()
164-
defer nt.mu.Unlock()
165+
func (nt *NodeTree) next() string {
165166
if len(nt.zones) == 0 {
166167
return ""
167168
}
@@ -186,6 +187,22 @@ func (nt *NodeTree) Next() string {
186187
}
187188
}
188189

190+
func (nt *NodeTree) recomputeAllNodes() {
191+
nt.allNodes = make([]string, 0, nt.numNodes)
192+
nt.resetExhausted()
193+
for i := 0; i < nt.numNodes; i++ {
194+
nt.allNodes = append(nt.allNodes, nt.next())
195+
}
196+
}
197+
198+
// AllNodes returns the list of nodes as they would be iterated by
199+
// Next() method.
200+
func (nt *NodeTree) AllNodes() []string {
201+
nt.mu.RLock()
202+
defer nt.mu.RUnlock()
203+
return nt.allNodes
204+
}
205+
189206
// NumNodes returns the number of nodes.
190207
func (nt *NodeTree) NumNodes() int {
191208
nt.mu.RLock()

pkg/scheduler/internal/cache/node_tree_test.go

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -140,28 +140,28 @@ func TestNodeTree_AddNode(t *testing.T) {
140140
{
141141
name: "single node no labels",
142142
nodesToAdd: allNodes[:1],
143-
expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 0}},
143+
expectedTree: map[string]*nodeArray{"": {[]string{"node-0"}, 1}},
144144
},
145145
{
146146
name: "mix of nodes with and without proper labels",
147147
nodesToAdd: allNodes[:4],
148148
expectedTree: map[string]*nodeArray{
149-
"": {[]string{"node-0"}, 0},
150-
"region-1:\x00:": {[]string{"node-1"}, 0},
151-
":\x00:zone-2": {[]string{"node-2"}, 0},
152-
"region-1:\x00:zone-2": {[]string{"node-3"}, 0},
149+
"": {[]string{"node-0"}, 1},
150+
"region-1:\x00:": {[]string{"node-1"}, 1},
151+
":\x00:zone-2": {[]string{"node-2"}, 1},
152+
"region-1:\x00:zone-2": {[]string{"node-3"}, 1},
153153
},
154154
},
155155
{
156156
name: "mix of nodes with and without proper labels and some zones with multiple nodes",
157157
nodesToAdd: allNodes[:7],
158158
expectedTree: map[string]*nodeArray{
159-
"": {[]string{"node-0"}, 0},
160-
"region-1:\x00:": {[]string{"node-1"}, 0},
161-
":\x00:zone-2": {[]string{"node-2"}, 0},
162-
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
163-
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
164-
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
159+
"": {[]string{"node-0"}, 1},
160+
"region-1:\x00:": {[]string{"node-1"}, 1},
161+
":\x00:zone-2": {[]string{"node-2"}, 1},
162+
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2},
163+
"region-1:\x00:zone-3": {[]string{"node-5"}, 1},
164+
"region-2:\x00:zone-2": {[]string{"node-6"}, 1},
165165
},
166166
},
167167
}
@@ -190,22 +190,22 @@ func TestNodeTree_RemoveNode(t *testing.T) {
190190
existingNodes: allNodes[:7],
191191
nodesToRemove: allNodes[:1],
192192
expectedTree: map[string]*nodeArray{
193-
"region-1:\x00:": {[]string{"node-1"}, 0},
194-
":\x00:zone-2": {[]string{"node-2"}, 0},
195-
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 0},
196-
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
197-
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
193+
"region-1:\x00:": {[]string{"node-1"}, 1},
194+
":\x00:zone-2": {[]string{"node-2"}, 1},
195+
"region-1:\x00:zone-2": {[]string{"node-3", "node-4"}, 2},
196+
"region-1:\x00:zone-3": {[]string{"node-5"}, 1},
197+
"region-2:\x00:zone-2": {[]string{"node-6"}, 1},
198198
},
199199
},
200200
{
201201
name: "remove a few nodes including one from a zone with multiple nodes",
202202
existingNodes: allNodes[:7],
203203
nodesToRemove: allNodes[1:4],
204204
expectedTree: map[string]*nodeArray{
205-
"": {[]string{"node-0"}, 0},
206-
"region-1:\x00:zone-2": {[]string{"node-4"}, 0},
207-
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
208-
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
205+
"": {[]string{"node-0"}, 1},
206+
"region-1:\x00:zone-2": {[]string{"node-4"}, 1},
207+
"region-1:\x00:zone-3": {[]string{"node-5"}, 1},
208+
"region-2:\x00:zone-2": {[]string{"node-6"}, 1},
209209
},
210210
},
211211
{
@@ -257,11 +257,11 @@ func TestNodeTree_UpdateNode(t *testing.T) {
257257
},
258258
},
259259
expectedTree: map[string]*nodeArray{
260-
"region-1:\x00:": {[]string{"node-1"}, 0},
261-
":\x00:zone-2": {[]string{"node-2"}, 0},
262-
"region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 0},
263-
"region-1:\x00:zone-3": {[]string{"node-5"}, 0},
264-
"region-2:\x00:zone-2": {[]string{"node-6"}, 0},
260+
"region-1:\x00:": {[]string{"node-1"}, 1},
261+
":\x00:zone-2": {[]string{"node-2"}, 1},
262+
"region-1:\x00:zone-2": {[]string{"node-3", "node-4", "node-0"}, 3},
263+
"region-1:\x00:zone-3": {[]string{"node-5"}, 1},
264+
"region-2:\x00:zone-2": {[]string{"node-6"}, 1},
265265
},
266266
},
267267
{
@@ -277,7 +277,7 @@ func TestNodeTree_UpdateNode(t *testing.T) {
277277
},
278278
},
279279
expectedTree: map[string]*nodeArray{
280-
"region-1:\x00:zone-2": {[]string{"node-0"}, 0},
280+
"region-1:\x00:zone-2": {[]string{"node-0"}, 1},
281281
},
282282
},
283283
{
@@ -293,8 +293,8 @@ func TestNodeTree_UpdateNode(t *testing.T) {
293293
},
294294
},
295295
expectedTree: map[string]*nodeArray{
296-
"": {[]string{"node-0"}, 0},
297-
"region-1:\x00:zone-2": {[]string{"node-new"}, 0},
296+
"": {[]string{"node-0"}, 1},
297+
"region-1:\x00:zone-2": {[]string{"node-new"}, 1},
298298
},
299299
},
300300
}
@@ -322,7 +322,7 @@ func TestNodeTree_Next(t *testing.T) {
322322
tests := []struct {
323323
name string
324324
nodesToAdd []*v1.Node
325-
numRuns int // number of times to run Next()
325+
numRuns int // number of times to run next()
326326
expectedOutput []string
327327
}{
328328
{
@@ -357,7 +357,7 @@ func TestNodeTree_Next(t *testing.T) {
357357

358358
var output []string
359359
for i := 0; i < test.numRuns; i++ {
360-
output = append(output, nt.Next())
360+
output = append(output, nt.next())
361361
}
362362
if !reflect.DeepEqual(output, test.expectedOutput) {
363363
t.Errorf("unexpected output. Expected: %v, Got: %v", test.expectedOutput, output)
@@ -399,15 +399,15 @@ func TestNodeTreeMultiOperations(t *testing.T) {
399399
name: "add more nodes to an exhausted zone",
400400
nodesToAdd: append(allNodes[4:9], allNodes[3]),
401401
nodesToRemove: nil,
402-
operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "add", "next", "next", "next"},
403-
expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-3", "node-8", "node-4"},
402+
operations: []string{"add", "add", "add", "add", "add", "next", "next", "next", "next", "next", "next", "next"},
403+
expectedOutput: []string{"node-4", "node-5", "node-6", "node-7", "node-8", "node-4", "node-5"},
404404
},
405405
{
406406
name: "remove zone and add new to ensure exhausted is reset correctly",
407407
nodesToAdd: append(allNodes[3:5], allNodes[6:8]...),
408408
nodesToRemove: allNodes[3:5],
409409
operations: []string{"add", "add", "next", "next", "remove", "add", "add", "next", "next", "remove", "next", "next"},
410-
expectedOutput: []string{"node-3", "node-4", "node-6", "node-7", "node-6", "node-7"},
410+
expectedOutput: []string{"node-3", "node-4", "node-4", "node-6", "node-6", "node-7"},
411411
},
412412
}
413413

@@ -434,7 +434,7 @@ func TestNodeTreeMultiOperations(t *testing.T) {
434434
removeIndex++
435435
}
436436
case "next":
437-
output = append(output, nt.Next())
437+
output = append(output, nt.next())
438438
default:
439439
t.Errorf("unknow operation: %v", op)
440440
}

0 commit comments

Comments
 (0)