Skip to content

Commit 2263f2d

Browse files
authored
Merge pull request kubernetes#124148 from cyclinder/add_flag_kubelet
kubelet: Add a TopologyManager policy option: max-allowable-numa-nodes
2 parents eb9b928 + 87129c3 commit 2263f2d

File tree

7 files changed

+88
-21
lines changed

7 files changed

+88
-21
lines changed

cmd/kubelet/app/options/options.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
492492
fs.BoolVar(&c.ProtectKernelDefaults, "protect-kernel-defaults", c.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")
493493
fs.StringVar(&c.ReservedSystemCPUs, "reserved-cpus", c.ReservedSystemCPUs, "A comma-separated list of CPUs or CPU ranges that are reserved for system and kubernetes usage. This specific list will supersede cpu counts in --system-reserved and --kube-reserved.")
494494
fs.StringVar(&c.TopologyManagerScope, "topology-manager-scope", c.TopologyManagerScope, "Scope to which topology hints applied. Topology Manager collects hints from Hint Providers and applies them to defined scope to ensure the pod admission. Possible values: 'container', 'pod'.")
495-
fs.Var(cliflag.NewMapStringStringNoSplit(&c.TopologyManagerPolicyOptions), "topology-manager-policy-options", "A set of key=value Topology Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.")
495+
fs.Var(cliflag.NewMapStringString(&c.TopologyManagerPolicyOptions), "topology-manager-policy-options", "A set of key=value Topology Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.")
496496
// Node Allocatable Flags
497497
fs.Var(cliflag.NewMapStringString(&c.SystemReserved), "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for non-kubernetes components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]")
498498
fs.Var(cliflag.NewMapStringString(&c.KubeReserved), "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for kubernetes system components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]")

pkg/kubelet/cm/topologymanager/policy_options.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,20 @@ import (
2222

2323
"k8s.io/apimachinery/pkg/util/sets"
2424
utilfeature "k8s.io/apiserver/pkg/util/feature"
25+
"k8s.io/klog/v2"
2526
kubefeatures "k8s.io/kubernetes/pkg/features"
2627
)
2728

2829
const (
2930
PreferClosestNUMANodes string = "prefer-closest-numa-nodes"
31+
MaxAllowableNUMANodes string = "max-allowable-numa-nodes"
3032
)
3133

3234
var (
3335
alphaOptions = sets.New[string]()
3436
betaOptions = sets.New[string](
3537
PreferClosestNUMANodes,
38+
MaxAllowableNUMANodes,
3639
)
3740
stableOptions = sets.New[string]()
3841
)
@@ -54,11 +57,17 @@ func CheckPolicyOptionAvailable(option string) error {
5457
}
5558

5659
type PolicyOptions struct {
57-
PreferClosestNUMA bool
60+
PreferClosestNUMA bool
61+
MaxAllowableNUMANodes int
5862
}
5963

6064
func NewPolicyOptions(policyOptions map[string]string) (PolicyOptions, error) {
61-
opts := PolicyOptions{}
65+
opts := PolicyOptions{
66+
// Set MaxAllowableNUMANodes to the default. This will be overwritten
67+
// if the user has specified a policy option for MaxAllowableNUMANodes.
68+
MaxAllowableNUMANodes: defaultMaxAllowableNUMANodes,
69+
}
70+
6271
for name, value := range policyOptions {
6372
if err := CheckPolicyOptionAvailable(name); err != nil {
6473
return opts, err
@@ -71,6 +80,20 @@ func NewPolicyOptions(policyOptions map[string]string) (PolicyOptions, error) {
7180
return opts, fmt.Errorf("bad value for option %q: %w", name, err)
7281
}
7382
opts.PreferClosestNUMA = optValue
83+
case MaxAllowableNUMANodes:
84+
optValue, err := strconv.Atoi(value)
85+
if err != nil {
86+
return opts, fmt.Errorf("unable to convert policy option to integer %q: %w", name, err)
87+
}
88+
89+
if optValue < defaultMaxAllowableNUMANodes {
90+
return opts, fmt.Errorf("the minimum value of %q should not be less than %v", name, defaultMaxAllowableNUMANodes)
91+
}
92+
93+
if optValue > defaultMaxAllowableNUMANodes {
94+
klog.InfoS("WARNING: the value of max-allowable-numa-nodes is more than the default recommended value", "max-allowable-numa-nodes", optValue, "defaultMaxAllowableNUMANodes", defaultMaxAllowableNUMANodes)
95+
}
96+
opts.MaxAllowableNUMANodes = optValue
7497
default:
7598
// this should never be reached, we already detect unknown options,
7699
// but we keep it as further safety.

pkg/kubelet/cm/topologymanager/policy_options_test.go

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"strings"
2222
"testing"
2323

24-
"k8s.io/apimachinery/pkg/util/sets"
2524
utilfeature "k8s.io/apiserver/pkg/util/feature"
2625
"k8s.io/component-base/featuregate"
2726
featuregatetesting "k8s.io/component-base/featuregate/testing"
@@ -52,39 +51,69 @@ func TestNewTopologyManagerOptions(t *testing.T) {
5251
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
5352
featureGateEnable: true,
5453
expectedOptions: PolicyOptions{
55-
PreferClosestNUMA: true,
54+
PreferClosestNUMA: true,
55+
MaxAllowableNUMANodes: 8,
5656
},
5757
policyOptions: map[string]string{
5858
PreferClosestNUMANodes: "true",
59+
MaxAllowableNUMANodes: "8",
60+
},
61+
},
62+
{
63+
description: "return TopologyManagerOptions with MaxAllowableNUMANodes set to 12",
64+
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
65+
featureGateEnable: true,
66+
expectedOptions: PolicyOptions{
67+
MaxAllowableNUMANodes: 12,
68+
},
69+
policyOptions: map[string]string{
70+
MaxAllowableNUMANodes: "12",
5971
},
6072
},
6173
{
6274
description: "fail to set option when TopologyManagerPolicyBetaOptions feature gate is not set",
6375
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
6476
policyOptions: map[string]string{
6577
PreferClosestNUMANodes: "true",
78+
MaxAllowableNUMANodes: "8",
6679
},
6780
expectedErr: fmt.Errorf("Topology Manager Policy Beta-level Options not enabled,"),
6881
},
6982
{
7083
description: "return empty TopologyManagerOptions",
84+
expectedOptions: PolicyOptions{
85+
MaxAllowableNUMANodes: 8,
86+
},
7187
},
7288
{
73-
description: "fail to parse options",
89+
description: "fail to parse options with error PreferClosestNUMANodes",
7490
featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions,
7591
featureGateEnable: true,
7692
policyOptions: map[string]string{
7793
PreferClosestNUMANodes: "not a boolean",
7894
},
7995
expectedErr: fmt.Errorf("bad value for option"),
8096
},
97+
{
98+
description: "fail to parse options with error MaxAllowableNUMANodes",
99+
featureGate: pkgfeatures.TopologyManagerPolicyAlphaOptions,
100+
featureGateEnable: true,
101+
policyOptions: map[string]string{
102+
MaxAllowableNUMANodes: "can't parse to int",
103+
},
104+
expectedErr: fmt.Errorf("unable to convert policy option to integer"),
105+
},
81106
{
82107
description: "test beta options success",
83108
featureGate: pkgfeatures.TopologyManagerPolicyBetaOptions,
84109
featureGateEnable: true,
85110
policyOptions: map[string]string{
86111
fancyBetaOption: "true",
87112
},
113+
expectedOptions: PolicyOptions{
114+
PreferClosestNUMA: false,
115+
MaxAllowableNUMANodes: 8,
116+
},
88117
},
89118
{
90119
description: "test beta options fail",
@@ -101,6 +130,10 @@ func TestNewTopologyManagerOptions(t *testing.T) {
101130
policyOptions: map[string]string{
102131
fancyAlphaOption: "true",
103132
},
133+
expectedOptions: PolicyOptions{
134+
PreferClosestNUMA: false,
135+
MaxAllowableNUMANodes: 8,
136+
},
104137
},
105138
{
106139
description: "test alpha options fail",
@@ -112,7 +145,7 @@ func TestNewTopologyManagerOptions(t *testing.T) {
112145
}
113146

114147
betaOptions.Insert(fancyBetaOption)
115-
alphaOptions = sets.New[string](fancyAlphaOption)
148+
alphaOptions.Insert(fancyAlphaOption)
116149

117150
for _, tcase := range testCases {
118151
t.Run(tcase.description, func(t *testing.T) {
@@ -122,13 +155,13 @@ func TestNewTopologyManagerOptions(t *testing.T) {
122155
opts, err := NewPolicyOptions(tcase.policyOptions)
123156
if tcase.expectedErr != nil {
124157
if !strings.Contains(err.Error(), tcase.expectedErr.Error()) {
125-
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), tcase.expectedErr.Error())
158+
t.Errorf("Unexpected error message. Have: %s, wants %s", err.Error(), tcase.expectedErr.Error())
126159
}
160+
return
127161
}
128162

129163
if opts != tcase.expectedOptions {
130164
t.Errorf("Expected TopologyManagerOptions to equal %v, not %v", tcase.expectedOptions, opts)
131-
132165
}
133166
})
134167
}

pkg/kubelet/cm/topologymanager/topology_manager.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@ import (
2929
)
3030

3131
const (
32-
// maxAllowableNUMANodes specifies the maximum number of NUMA Nodes that
32+
// defaultMaxAllowableNUMANodes specifies the maximum number of NUMA Nodes that
3333
// the TopologyManager supports on the underlying machine.
3434
//
3535
// At present, having more than this number of NUMA Nodes will result in a
3636
// state explosion when trying to enumerate possible NUMAAffinity masks and
3737
// generate hints for them. As such, if more NUMA Nodes than this are
3838
// present on a machine and the TopologyManager is enabled, an error will
3939
// be returned and the TopologyManager will not be loaded.
40-
maxAllowableNUMANodes = 8
40+
defaultMaxAllowableNUMANodes = 8
4141
// ErrorTopologyAffinity represents the type for a TopologyAffinityError
4242
ErrorTopologyAffinity = "TopologyAffinityError"
4343
)
@@ -151,8 +151,8 @@ func NewManager(topology []cadvisorapi.Node, topologyPolicyName string, topology
151151
return nil, fmt.Errorf("cannot discover NUMA topology: %w", err)
152152
}
153153

154-
if topologyPolicyName != PolicyNone && len(numaInfo.Nodes) > maxAllowableNUMANodes {
155-
return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes)
154+
if topologyPolicyName != PolicyNone && len(numaInfo.Nodes) > opts.MaxAllowableNUMANodes {
155+
return nil, fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", opts.MaxAllowableNUMANodes)
156156
}
157157

158158
var policy Policy

pkg/kubelet/cm/topologymanager/topology_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func TestNewManager(t *testing.T) {
105105
description: "more than 8 NUMA nodes",
106106
policyName: "best-effort",
107107
expectedPolicy: "best-effort",
108-
expectedError: fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", maxAllowableNUMANodes),
108+
expectedError: fmt.Errorf("unsupported on machines with more than %v NUMA Nodes", defaultMaxAllowableNUMANodes),
109109
topology: []cadvisorapi.Node{
110110
{
111111
Id: 0,

test/e2e_node/topology_manager_metrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature.
6666
policy := topologymanager.PolicySingleNumaNode
6767
scope := podScopeTopology
6868

69-
newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0)
69+
newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, nil, 0)
7070
updateKubeletConfig(ctx, f, newCfg, true)
7171

7272
})

test/e2e_node/topology_manager_test.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,17 @@ func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (in
203203
return findNUMANodeWithoutSRIOVDevicesFromSysfs(numaNodes)
204204
}
205205

206-
func configureTopologyManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, configMap *v1.ConfigMap, numaNodes int) (*kubeletconfig.KubeletConfiguration, string) {
206+
func configureTopologyManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, policy, scope string, topologyOptions map[string]string, configMap *v1.ConfigMap, numaNodes int) (*kubeletconfig.KubeletConfiguration, string) {
207207
// Configure Topology Manager in Kubelet with policy.
208208
newCfg := oldCfg.DeepCopy()
209209
if newCfg.FeatureGates == nil {
210210
newCfg.FeatureGates = make(map[string]bool)
211211
}
212212

213+
if topologyOptions != nil {
214+
newCfg.TopologyManagerPolicyOptions = topologyOptions
215+
}
216+
213217
// Set the Topology Manager policy
214218
newCfg.TopologyManagerPolicy = policy
215219

@@ -858,7 +862,7 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
858862
}
859863
}
860864

861-
func runTopologyManagerTests(f *framework.Framework) {
865+
func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]string) {
862866
var oldCfg *kubeletconfig.KubeletConfiguration
863867
var err error
864868

@@ -879,7 +883,7 @@ func runTopologyManagerTests(f *framework.Framework) {
879883
ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
880884
framework.Logf("Configuring topology Manager policy to %s", policy)
881885

882-
newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0)
886+
newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, nil, 0)
883887
updateKubeletConfig(ctx, f, newCfg, true)
884888
// Run the tests
885889
runTopologyManagerPolicySuiteTests(ctx, f)
@@ -903,7 +907,7 @@ func runTopologyManagerTests(f *framework.Framework) {
903907
ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy))
904908
framework.Logf("Configuring topology Manager policy to %s", policy)
905909

906-
newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes)
910+
newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, configMap, numaNodes)
907911
updateKubeletConfig(ctx, f, newCfg, true)
908912

909913
runTopologyManagerNodeAlignmentSuiteTests(ctx, f, sd, reservedSystemCPUs, policy, numaNodes, coreCount)
@@ -921,7 +925,7 @@ func runTopologyManagerTests(f *framework.Framework) {
921925
policy := topologymanager.PolicySingleNumaNode
922926
scope := podScopeTopology
923927

924-
newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, configMap, numaNodes)
928+
newCfg, reservedSystemCPUs := configureTopologyManagerInKubelet(oldCfg, policy, scope, topologyOptions, configMap, numaNodes)
925929
updateKubeletConfig(ctx, f, newCfg, true)
926930

927931
runTMScopeResourceAlignmentTestSuite(ctx, f, configMap, reservedSystemCPUs, policy, numaNodes, coreCount)
@@ -960,6 +964,13 @@ var _ = SIGDescribe("Topology Manager", framework.WithSerial(), feature.Topology
960964
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
961965

962966
ginkgo.Context("With kubeconfig updated to static CPU Manager policy run the Topology Manager tests", func() {
963-
runTopologyManagerTests(f)
967+
runTopologyManagerTests(f, nil)
968+
})
969+
ginkgo.Context("With kubeconfig's topologyOptions updated to static CPU Manager policy run the Topology Manager tests", ginkgo.Label("MaxAllowableNUMANodes"), func() {
970+
// At present, the default value of defaultMaxAllowableNUMANode is 8, we run
971+
// the same tests with 2 * defaultMaxAllowableNUMANode(16). This is the
972+
// most basic verification that the changed setting is not breaking stuff.
973+
doubleDefaultMaxAllowableNUMANodes := strconv.Itoa(8 * 2)
974+
runTopologyManagerTests(f, map[string]string{topologymanager.MaxAllowableNUMANodes: doubleDefaultMaxAllowableNUMANodes})
964975
})
965976
})

0 commit comments

Comments
 (0)