Skip to content

Commit c2ec8be

Browse files
maelkigraecao
andcommitted
Fix scheduler issue with nodetree additions
When nodes are added in multiple zones at once, the nodeTree next function does not return a correct list of nodes but repeats some This commit resets the index before starting to call next() to prevent this issue Special thanks to igraecao for the help in finding the bug Co-authored-by: igraecao <[email protected]>
1 parent ae7dce7 commit c2ec8be

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed

pkg/scheduler/internal/cache/cache.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ func (cache *schedulerCache) updateNodeInfoSnapshotList(snapshot *Snapshot, upda
280280
if updateAll {
281281
// Take a snapshot of the nodes order in the tree
282282
snapshot.nodeInfoList = make([]*framework.NodeInfo, 0, cache.nodeTree.numNodes)
283+
cache.nodeTree.resetExhausted()
283284
for i := 0; i < cache.nodeTree.numNodes; i++ {
284285
nodeName := cache.nodeTree.next()
285286
if n := snapshot.nodeInfoMap[nodeName]; n != nil {

pkg/scheduler/internal/cache/cache_test.go

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,151 @@ func compareCacheWithNodeInfoSnapshot(cache *schedulerCache, snapshot *Snapshot)
15391539
return nil
15401540
}
15411541

1542+
func TestSchedulerCache_updateNodeInfoSnapshotList(t *testing.T) {
1543+
// Create a few nodes to be used in tests.
1544+
nodes := []*v1.Node{}
1545+
i := 0
1546+
// List of number of nodes per zone, zone 0 -> 2, zone 1 -> 6
1547+
for zone, nb := range []int{2, 6} {
1548+
for j := 0; j < nb; j++ {
1549+
nodes = append(nodes, &v1.Node{
1550+
ObjectMeta: metav1.ObjectMeta{
1551+
Name: fmt.Sprintf("node-%d", i),
1552+
Labels: map[string]string{
1553+
v1.LabelZoneRegion: fmt.Sprintf("region-%d", zone),
1554+
v1.LabelZoneFailureDomain: fmt.Sprintf("zone-%d", zone),
1555+
},
1556+
},
1557+
})
1558+
i++
1559+
}
1560+
}
1561+
1562+
var cache *schedulerCache
1563+
var snapshot *Snapshot
1564+
1565+
addNode := func(t *testing.T, i int) {
1566+
if err := cache.AddNode(nodes[i]); err != nil {
1567+
t.Error(err)
1568+
}
1569+
_, ok := snapshot.nodeInfoMap[nodes[i].Name]
1570+
if !ok {
1571+
snapshot.nodeInfoMap[nodes[i].Name] = cache.nodes[nodes[i].Name].info
1572+
}
1573+
}
1574+
1575+
updateSnapshot := func(t *testing.T) {
1576+
cache.updateNodeInfoSnapshotList(snapshot, true)
1577+
if err := compareCacheWithNodeInfoSnapshot(cache, snapshot); err != nil {
1578+
t.Error(err)
1579+
}
1580+
}
1581+
1582+
tests := []struct {
1583+
name string
1584+
operations func(t *testing.T)
1585+
expected []string
1586+
}{
1587+
{
1588+
name: "Empty cache",
1589+
operations: func(t *testing.T) {},
1590+
expected: []string{},
1591+
},
1592+
{
1593+
name: "Single node",
1594+
operations: func(t *testing.T) {
1595+
addNode(t, 0)
1596+
},
1597+
expected: []string{"node-0"},
1598+
},
1599+
{
1600+
name: "Two nodes",
1601+
operations: func(t *testing.T) {
1602+
addNode(t, 0)
1603+
updateSnapshot(t)
1604+
addNode(t, 1)
1605+
},
1606+
expected: []string{"node-0", "node-1"},
1607+
},
1608+
{
1609+
name: "bug 91601, two nodes, update the snapshot and add two nodes in different zones",
1610+
operations: func(t *testing.T) {
1611+
addNode(t, 2)
1612+
addNode(t, 3)
1613+
updateSnapshot(t)
1614+
addNode(t, 4)
1615+
addNode(t, 0)
1616+
},
1617+
expected: []string{"node-2", "node-0", "node-3", "node-4"},
1618+
},
1619+
{
1620+
name: "bug 91601, 6 nodes, one in a different zone",
1621+
operations: func(t *testing.T) {
1622+
addNode(t, 2)
1623+
addNode(t, 3)
1624+
addNode(t, 4)
1625+
addNode(t, 5)
1626+
updateSnapshot(t)
1627+
addNode(t, 6)
1628+
addNode(t, 0)
1629+
},
1630+
expected: []string{"node-2", "node-0", "node-3", "node-4", "node-5", "node-6"},
1631+
},
1632+
{
1633+
name: "bug 91601, 7 nodes, two in a different zone",
1634+
operations: func(t *testing.T) {
1635+
addNode(t, 2)
1636+
updateSnapshot(t)
1637+
addNode(t, 3)
1638+
addNode(t, 4)
1639+
updateSnapshot(t)
1640+
addNode(t, 5)
1641+
addNode(t, 6)
1642+
addNode(t, 0)
1643+
addNode(t, 1)
1644+
},
1645+
expected: []string{"node-2", "node-0", "node-3", "node-1", "node-4", "node-5", "node-6"},
1646+
},
1647+
{
1648+
name: "bug 91601, 7 nodes, two in a different zone, different zone order",
1649+
operations: func(t *testing.T) {
1650+
addNode(t, 2)
1651+
addNode(t, 1)
1652+
updateSnapshot(t)
1653+
addNode(t, 3)
1654+
addNode(t, 4)
1655+
updateSnapshot(t)
1656+
addNode(t, 5)
1657+
addNode(t, 6)
1658+
addNode(t, 0)
1659+
},
1660+
expected: []string{"node-2", "node-1", "node-3", "node-0", "node-4", "node-5", "node-6"},
1661+
},
1662+
}
1663+
1664+
for _, test := range tests {
1665+
t.Run(test.name, func(t *testing.T) {
1666+
cache = newSchedulerCache(time.Second, time.Second, nil)
1667+
snapshot = NewEmptySnapshot()
1668+
1669+
test.operations(t)
1670+
1671+
// Always update the snapshot at the end of operations and compare it.
1672+
cache.updateNodeInfoSnapshotList(snapshot, true)
1673+
if err := compareCacheWithNodeInfoSnapshot(cache, snapshot); err != nil {
1674+
t.Error(err)
1675+
}
1676+
nodeNames := make([]string, len(snapshot.nodeInfoList))
1677+
for i, nodeInfo := range snapshot.nodeInfoList {
1678+
nodeNames[i] = nodeInfo.Node().Name
1679+
}
1680+
if !reflect.DeepEqual(nodeNames, test.expected) {
1681+
t.Errorf("The nodeInfoList is incorrect. Expected %v , got %v", test.expected, nodeNames)
1682+
}
1683+
})
1684+
}
1685+
}
1686+
15421687
func BenchmarkUpdate1kNodes30kPods(b *testing.B) {
15431688
// Enable volumesOnNodeForBalancing to do balanced resource allocation
15441689
defer featuregatetesting.SetFeatureGateDuringTest(nil, utilfeature.DefaultFeatureGate, features.BalanceAttachedNodeVolumes, true)()

0 commit comments

Comments
 (0)