Skip to content

Commit 70a88d1

Browse files
committed
nrt: enable consumption of MaxAllowedNUMANodes
kubernetes 1.31 gained support for topology manager options, and most notably to bump the limit of max NUMA nodes, which is no longer hardcoded to 8. Thus, we need to adapt the scheduler code. Likewise other key topology manager parameters, we trust NRT objects to carry the values we need. In this case, to mitigate DoS risks, we set an artificial "high enough" (and hopefully good enough for a while) limit. Once obtained the vlaue, we pass it through to all the code places on which the previous hardcoded value was used. Signed-off-by: Francesco Romani <[email protected]>
1 parent 62f9d6b commit 70a88d1

File tree

5 files changed

+157
-37
lines changed

5 files changed

+157
-37
lines changed

pkg/noderesourcetopology/filter.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ import (
3535
"sigs.k8s.io/scheduler-plugins/pkg/util"
3636
)
3737

38-
// The maximum number of NUMA nodes that Topology Manager allows is 8
39-
// https://kubernetes.io/docs/tasks/administer-cluster/topology-manager/#known-limitations
40-
const highestNUMAID = 8
41-
4238
type PolicyHandler func(pod *v1.Pod, zoneMap topologyv1alpha2.ZoneList) *framework.Status
4339

4440
func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterInfo) *framework.Status {
@@ -85,7 +81,7 @@ func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, info *filterIn
8581
// resourcesAvailableInAnyNUMANodes checks for sufficient resource and return the NUMAID that would be selected by Kubelet.
8682
// this function requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63
8783
func resourcesAvailableInAnyNUMANodes(lh logr.Logger, info *filterInfo, resources v1.ResourceList) (int, bool, string) {
88-
numaID := highestNUMAID
84+
numaID := info.topologyManager.MaxNUMANodes // highest NUMA ID
8985
bitmask := bm.NewEmptyBitMask()
9086
// set all bits, each bit is a NUMA node, if resources couldn't be aligned
9187
// on the NUMA node, bit should be unset

pkg/noderesourcetopology/least_numa.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func leastNUMAContainerScopeScore(lh logr.Logger, pod *v1.Pod, info *scoreInfo)
6767
return framework.MaxNodeScore, nil
6868
}
6969

70-
return normalizeScore(maxNUMANodesCount, allContainersMinAvgDistance), nil
70+
return normalizeScore(maxNUMANodesCount, allContainersMinAvgDistance, info.topologyManager.MaxNUMANodes), nil
7171
}
7272

7373
func leastNUMAPodScopeScore(lh logr.Logger, pod *v1.Pod, info *scoreInfo) (int64, *framework.Status) {
@@ -85,11 +85,11 @@ func leastNUMAPodScopeScore(lh logr.Logger, pod *v1.Pod, info *scoreInfo) (int64
8585
return framework.MinNodeScore, nil
8686
}
8787

88-
return normalizeScore(numaNodes.Count(), isMinAvgDistance), nil
88+
return normalizeScore(numaNodes.Count(), isMinAvgDistance, info.topologyManager.MaxNUMANodes), nil
8989
}
9090

91-
func normalizeScore(numaNodesCount int, isMinAvgDistance bool) int64 {
92-
numaNodeScore := framework.MaxNodeScore / highestNUMAID
91+
func normalizeScore(numaNodesCount int, isMinAvgDistance bool, highestNUMAID int) int64 {
92+
numaNodeScore := framework.MaxNodeScore / int64(highestNUMAID)
9393
score := framework.MaxNodeScore - int64(numaNodesCount)*numaNodeScore
9494
if isMinAvgDistance {
9595
// if distance between NUMA domains is optimal add half of numaNodeScore to make this node more favorable

pkg/noderesourcetopology/least_numa_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2626
"k8s.io/klog/v2"
2727
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
28+
"sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/nodeconfig"
2829
)
2930

3031
const (
@@ -746,7 +747,7 @@ func TestNormalizeScore(t *testing.T) {
746747

747748
for _, tc := range tcases {
748749
t.Run(tc.description, func(t *testing.T) {
749-
normalizedScore := normalizeScore(tc.score, tc.optimalDistance)
750+
normalizedScore := normalizeScore(tc.score, tc.optimalDistance, nodeconfig.DefaultMaxNUMANodes)
750751
if normalizedScore != tc.expectedScore {
751752
t.Errorf("Expected normalizedScore to be %d not %d", tc.expectedScore, normalizedScore)
752753
}

pkg/noderesourcetopology/nodeconfig/topologymanager.go

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package nodeconfig
1818

1919
import (
2020
"fmt"
21+
"strconv"
2122

2223
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
2324

@@ -26,11 +27,16 @@ import (
2627
)
2728

2829
const (
29-
AttributeScope = "topologyManagerScope"
30-
AttributePolicy = "topologyManagerPolicy"
30+
DefaultMaxNUMANodes = 8 // legacy setting and default value for TopologyManager. NOTE: kube doesn't expose this constant
31+
32+
LimitNUMANodes = 1024 // basic sanitization, but we will likely need to bump soon enough
3133
)
3234

33-
// TODO: handle topologyManagerPolicyOptions added in k8s 1.26
35+
const (
36+
AttributeScope = "topologyManagerScope"
37+
AttributePolicy = "topologyManagerPolicy"
38+
AttributeMaxNUMANodes = "topologyManagerMaxNUMANodes"
39+
)
3440

3541
func IsValidScope(scope string) bool {
3642
if scope == kubeletconfig.ContainerTopologyManagerScope || scope == kubeletconfig.PodTopologyManagerScope {
@@ -47,15 +53,25 @@ func IsValidPolicy(policy string) bool {
4753
return false
4854
}
4955

56+
func IsValidMaxNUMANodes(value int) bool {
57+
// machines always report at least 1 NUMA node anyway, and furthermore the value is used in a division,
58+
// so we need to enforce 1 (not 0) as minimum
59+
// NOTE: there's no legit upper bound, so care must be taken to cap this value. But still, theoretically
60+
// 4096 NUMA nodes is a valid legal value.
61+
return value > 1
62+
}
63+
5064
type TopologyManager struct {
51-
Scope string
52-
Policy string
65+
Scope string
66+
Policy string
67+
MaxNUMANodes int
5368
}
5469

5570
func TopologyManagerDefaults() TopologyManager {
5671
return TopologyManager{
57-
Scope: kubeletconfig.ContainerTopologyManagerScope,
58-
Policy: kubeletconfig.NoneTopologyManagerPolicy,
72+
Scope: kubeletconfig.ContainerTopologyManagerScope,
73+
Policy: kubeletconfig.NoneTopologyManagerPolicy,
74+
MaxNUMANodes: DefaultMaxNUMANodes,
5975
}
6076
}
6177

@@ -65,12 +81,12 @@ func TopologyManagerFromNodeResourceTopology(lh logr.Logger, nodeTopology *topol
6581
// Backward compatibility (v1alpha2 and previous). Deprecated, will be removed when the NRT API moves to v1beta1.
6682
cfg.updateFromPolicies(lh, nodeTopology.Name, nodeTopology.TopologyPolicies)
6783
// preferred new configuration source (v1alpha2 and onwards)
68-
cfg.updateFromAttributes(nodeTopology.Attributes)
84+
cfg.updateFromAttributes(lh, nodeTopology.Attributes)
6985
return conf
7086
}
7187

7288
func (conf TopologyManager) String() string {
73-
return fmt.Sprintf("policy=%q scope=%q", conf.Policy, conf.Scope)
89+
return fmt.Sprintf("policy=%q scope=%q maxNUMANodes=%d", conf.Policy, conf.Scope, conf.MaxNUMANodes)
7490
}
7591

7692
func (conf TopologyManager) Equal(other TopologyManager) bool {
@@ -80,10 +96,10 @@ func (conf TopologyManager) Equal(other TopologyManager) bool {
8096
if conf.Policy != other.Policy {
8197
return false
8298
}
83-
return true
99+
return conf.MaxNUMANodes == other.MaxNUMANodes
84100
}
85101

86-
func (conf *TopologyManager) updateFromAttributes(attrs topologyv1alpha2.AttributeList) {
102+
func (conf *TopologyManager) updateFromAttributes(lh logr.Logger, attrs topologyv1alpha2.AttributeList) {
87103
for _, attr := range attrs {
88104
if attr.Name == AttributeScope && IsValidScope(attr.Value) {
89105
conf.Scope = attr.Value
@@ -93,8 +109,22 @@ func (conf *TopologyManager) updateFromAttributes(attrs topologyv1alpha2.Attribu
93109
conf.Policy = attr.Value
94110
continue
95111
}
96-
// TODO: handle topologyManagerPolicyOptions added in k8s 1.26
112+
if attr.Name == AttributeMaxNUMANodes {
113+
if val, err := strconv.Atoi(attr.Value); err == nil && IsValidMaxNUMANodes(val) {
114+
conf.MaxNUMANodes = clampMaxNUMANodes(lh, val)
115+
continue
116+
}
117+
}
118+
}
119+
}
120+
121+
func clampMaxNUMANodes(lh logr.Logger, val int) int {
122+
if val > LimitNUMANodes {
123+
// should never happen, so we are verbose
124+
lh.Info("capped MaxNUMANodes value to limit", "value", val, "limit", LimitNUMANodes)
125+
val = LimitNUMANodes
97126
}
127+
return val
98128
}
99129

100130
func (conf *TopologyManager) updateFromPolicies(lh logr.Logger, nodeName string, topologyPolicies []string) {

pkg/noderesourcetopology/nodeconfig/topologymanager_test.go

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,14 @@ func TestTopologyManagerEqual(t *testing.T) {
136136
{
137137
name: "matching",
138138
tmA: TopologyManager{
139-
Scope: "container",
140-
Policy: "single-numa-node",
139+
Scope: "container",
140+
Policy: "single-numa-node",
141+
MaxNUMANodes: 9, // gracehopper
141142
},
142143
tmB: TopologyManager{
143-
Scope: "container",
144-
Policy: "single-numa-node",
144+
Scope: "container",
145+
Policy: "single-numa-node",
146+
MaxNUMANodes: 9, // gracehopper
145147
},
146148
expected: true,
147149
},
@@ -181,6 +183,25 @@ func TestTopologyManagerEqual(t *testing.T) {
181183
},
182184
expected: false,
183185
},
186+
{
187+
name: "nodes diff vs nil",
188+
tmA: TopologyManager{
189+
MaxNUMANodes: 16,
190+
},
191+
tmB: TopologyManager{},
192+
expected: false,
193+
},
194+
{
195+
name: "nodes diff",
196+
tmA: TopologyManager{
197+
MaxNUMANodes: 16,
198+
},
199+
tmB: TopologyManager{
200+
MaxNUMANodes: 9, // gracehopper
201+
},
202+
expected: false,
203+
},
204+
184205
{
185206
name: "scope diff, policy matching",
186207
tmA: TopologyManager{
@@ -205,6 +226,19 @@ func TestTopologyManagerEqual(t *testing.T) {
205226
},
206227
expected: false,
207228
},
229+
{
230+
name: "scope, policy matching, nodes diff",
231+
tmA: TopologyManager{
232+
Scope: "container",
233+
Policy: "single-numa-node",
234+
MaxNUMANodes: 9,
235+
},
236+
tmB: TopologyManager{
237+
Scope: "container",
238+
Policy: "best-effort",
239+
},
240+
expected: false,
241+
},
208242
}
209243

210244
for _, tt := range tests {
@@ -324,13 +358,67 @@ func TestConfigFromAttributes(t *testing.T) {
324358
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
325359
},
326360
},
361+
{
362+
name: "invalid-nodes-string",
363+
attrs: topologyv1alpha2.AttributeList{
364+
{
365+
Name: "topologyManagerMaxNUMANodes",
366+
Value: "A",
367+
},
368+
},
369+
expected: TopologyManager{},
370+
},
371+
{
372+
name: "invalid-nodes-zero",
373+
attrs: topologyv1alpha2.AttributeList{
374+
{
375+
Name: "topologyManagerMaxNUMANodes",
376+
Value: "0",
377+
},
378+
},
379+
expected: TopologyManager{},
380+
},
381+
{
382+
name: "invalid-nodes-negative",
383+
attrs: topologyv1alpha2.AttributeList{
384+
{
385+
Name: "topologyManagerMaxNUMANodes",
386+
Value: "-2",
387+
},
388+
},
389+
expected: TopologyManager{},
390+
},
391+
{
392+
name: "valid-nodes",
393+
attrs: topologyv1alpha2.AttributeList{
394+
{
395+
Name: "topologyManagerMaxNUMANodes",
396+
Value: "16",
397+
},
398+
},
399+
expected: TopologyManager{
400+
MaxNUMANodes: 16,
401+
},
402+
},
403+
{
404+
name: "valid-nodes-upper-bound",
405+
attrs: topologyv1alpha2.AttributeList{
406+
{
407+
Name: "topologyManagerMaxNUMANodes",
408+
Value: "65535",
409+
},
410+
},
411+
expected: TopologyManager{
412+
MaxNUMANodes: LimitNUMANodes,
413+
},
414+
},
327415
}
328416

329417
for _, tt := range tests {
330418
t.Run(tt.name, func(t *testing.T) {
331419
got := TopologyManager{}
332420
cfg := &got // shortcut
333-
cfg.updateFromAttributes(tt.attrs)
421+
cfg.updateFromAttributes(klog.Background(), tt.attrs)
334422
if !reflect.DeepEqual(got, tt.expected) {
335423
t.Errorf("conf got=%+#v expected=%+#v", got, tt.expected)
336424
}
@@ -427,8 +515,9 @@ func TestConfigFromNRT(t *testing.T) {
427515
},
428516
},
429517
expected: TopologyManager{
430-
Policy: kubeletconfig.BestEffortTopologyManagerPolicy,
431-
Scope: kubeletconfig.PodTopologyManagerScope,
518+
Policy: kubeletconfig.BestEffortTopologyManagerPolicy,
519+
Scope: kubeletconfig.PodTopologyManagerScope,
520+
MaxNUMANodes: DefaultMaxNUMANodes,
432521
},
433522
},
434523
{
@@ -440,8 +529,9 @@ func TestConfigFromNRT(t *testing.T) {
440529
},
441530
},
442531
expected: TopologyManager{
443-
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
444-
Scope: kubeletconfig.ContainerTopologyManagerScope,
532+
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
533+
Scope: kubeletconfig.ContainerTopologyManagerScope,
534+
MaxNUMANodes: DefaultMaxNUMANodes,
445535
},
446536
},
447537
{
@@ -455,8 +545,9 @@ func TestConfigFromNRT(t *testing.T) {
455545
},
456546
},
457547
expected: TopologyManager{
458-
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
459-
Scope: kubeletconfig.ContainerTopologyManagerScope,
548+
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
549+
Scope: kubeletconfig.ContainerTopologyManagerScope,
550+
MaxNUMANodes: DefaultMaxNUMANodes,
460551
},
461552
},
462553
{
@@ -473,8 +564,9 @@ func TestConfigFromNRT(t *testing.T) {
473564
},
474565
},
475566
expected: TopologyManager{
476-
Policy: kubeletconfig.BestEffortTopologyManagerPolicy,
477-
Scope: kubeletconfig.ContainerTopologyManagerScope,
567+
Policy: kubeletconfig.BestEffortTopologyManagerPolicy,
568+
Scope: kubeletconfig.ContainerTopologyManagerScope,
569+
MaxNUMANodes: DefaultMaxNUMANodes,
478570
},
479571
},
480572
{
@@ -495,8 +587,9 @@ func TestConfigFromNRT(t *testing.T) {
495587
},
496588
},
497589
expected: TopologyManager{
498-
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
499-
Scope: kubeletconfig.ContainerTopologyManagerScope,
590+
Policy: kubeletconfig.RestrictedTopologyManagerPolicy,
591+
Scope: kubeletconfig.ContainerTopologyManagerScope,
592+
MaxNUMANodes: DefaultMaxNUMANodes,
500593
},
501594
},
502595
}

0 commit comments

Comments
 (0)