Skip to content

Commit c923a61

Browse files
authored
Merge pull request kubernetes#125982 from harche/compressible_reserved
Set only compressible resources on system and kube reserved cgroup slices
2 parents 9ffefe3 + 3bad47e commit c923a61

File tree

7 files changed

+184
-35
lines changed

7 files changed

+184
-35
lines changed

cmd/kubelet/app/options/options.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
498498
// Node Allocatable Flags
499499
fs.Var(cliflag.NewMapStringString(&c.SystemReserved), "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for non-kubernetes components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]")
500500
fs.Var(cliflag.NewMapStringString(&c.KubeReserved), "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=500Mi,ephemeral-storage=1Gi,pid=1000) pairs that describe resources reserved for kubernetes system components. Currently only cpu, memory, pid and local ephemeral storage for root file system are supported. See https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ for more detail. [default=none]")
501-
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', and 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
501+
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', 'system-reserved-compressible', 'kube-reserved' and 'kube-reserved-compressible'. If any of the latter four options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
502502
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
503503
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
504504
logsapi.AddFlags(&c.Logging, fs)

cmd/kubelet/app/server.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
801801
if err != nil {
802802
return fmt.Errorf("--system-reserved value failed to parse: %w", err)
803803
}
804+
804805
var hardEvictionThresholds []evictionapi.Threshold
805806
// If the user requested to ignore eviction thresholds, then do not set valid values for hardEvictionThresholds here.
806807
if !s.ExperimentalNodeAllocatableIgnoreEvictionThreshold {

pkg/kubelet/apis/config/validation/validation.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,26 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
203203
allErrors = append(allErrors, fmt.Errorf("invalid configuration: memorySwap.swapBehavior cannot be set when NodeSwap feature flag is disabled"))
204204
}
205205

206+
// Check for mutually exclusive keys before the main validation loop
207+
reservedKeys := map[string]bool{
208+
kubetypes.SystemReservedEnforcementKey: false,
209+
kubetypes.SystemReservedCompressibleEnforcementKey: false,
210+
kubetypes.KubeReservedEnforcementKey: false,
211+
kubetypes.KubeReservedCompressibleEnforcementKey: false,
212+
}
213+
214+
for _, val := range kc.EnforceNodeAllocatable {
215+
reservedKeys[val] = true
216+
}
217+
218+
if reservedKeys[kubetypes.SystemReservedCompressibleEnforcementKey] && reservedKeys[kubetypes.SystemReservedEnforcementKey] {
219+
allErrors = append(allErrors, fmt.Errorf("invalid configuration: both %q and %q cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.SystemReservedEnforcementKey, kubetypes.SystemReservedCompressibleEnforcementKey))
220+
}
221+
222+
if reservedKeys[kubetypes.KubeReservedCompressibleEnforcementKey] && reservedKeys[kubetypes.KubeReservedEnforcementKey] {
223+
allErrors = append(allErrors, fmt.Errorf("invalid configuration: both %q and %q cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.KubeReservedEnforcementKey, kubetypes.KubeReservedCompressibleEnforcementKey))
224+
}
225+
206226
uniqueEnforcements := sets.Set[string]{}
207227
for _, val := range kc.EnforceNodeAllocatable {
208228
if uniqueEnforcements.Has(val) {
@@ -213,13 +233,13 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
213233

214234
switch val {
215235
case kubetypes.NodeAllocatableEnforcementKey:
216-
case kubetypes.SystemReservedEnforcementKey:
236+
case kubetypes.SystemReservedEnforcementKey, kubetypes.SystemReservedCompressibleEnforcementKey:
217237
if kc.SystemReservedCgroup == "" {
218-
allErrors = append(allErrors, fmt.Errorf("invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when %q contained in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.SystemReservedEnforcementKey))
238+
allErrors = append(allErrors, fmt.Errorf("invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when %q or %q included in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.SystemReservedEnforcementKey, kubetypes.SystemReservedCompressibleEnforcementKey))
219239
}
220-
case kubetypes.KubeReservedEnforcementKey:
240+
case kubetypes.KubeReservedEnforcementKey, kubetypes.KubeReservedCompressibleEnforcementKey:
221241
if kc.KubeReservedCgroup == "" {
222-
allErrors = append(allErrors, fmt.Errorf("invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when %q contained in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.KubeReservedEnforcementKey))
242+
allErrors = append(allErrors, fmt.Errorf("invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when %q or %q included in enforceNodeAllocatable (--enforce-node-allocatable)", kubetypes.KubeReservedEnforcementKey, kubetypes.KubeReservedCompressibleEnforcementKey))
223243
}
224244
case kubetypes.NodeAllocatableNoneKey:
225245
if len(kc.EnforceNodeAllocatable) > 1 {
@@ -228,8 +248,9 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
228248
// skip all further checks when this is explicitly "none"
229249
continue
230250
default:
231-
allErrors = append(allErrors, fmt.Errorf("invalid configuration: option %q specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are %q, %q, %q, or %q",
232-
val, kubetypes.NodeAllocatableEnforcementKey, kubetypes.SystemReservedEnforcementKey, kubetypes.KubeReservedEnforcementKey, kubetypes.NodeAllocatableNoneKey))
251+
allErrors = append(allErrors, fmt.Errorf("invalid configuration: option %q specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are %q, %q, %q, %q, %q or %q",
252+
val, kubetypes.NodeAllocatableEnforcementKey, kubetypes.SystemReservedEnforcementKey, kubetypes.SystemReservedCompressibleEnforcementKey,
253+
kubetypes.KubeReservedEnforcementKey, kubetypes.KubeReservedCompressibleEnforcementKey, kubetypes.NodeAllocatableNoneKey))
233254
continue
234255
}
235256

pkg/kubelet/apis/config/validation/validation_test.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,15 +383,45 @@ func TestValidateKubeletConfiguration(t *testing.T) {
383383
conf.SystemReservedCgroup = ""
384384
return conf
385385
},
386-
errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" contained in enforceNodeAllocatable (--enforce-node-allocatable)",
386+
errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)",
387+
}, {
388+
name: "specify SystemReservedCompressibleEnforcementKey without specifying SystemReservedCgroup",
389+
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
390+
conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey}
391+
conf.SystemReservedCgroup = ""
392+
return conf
393+
},
394+
errMsg: "invalid configuration: systemReservedCgroup (--system-reserved-cgroup) must be specified when \"system-reserved\" or \"system-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)",
395+
}, {
396+
name: "specify SystemReservedCompressibleEnforcementKey with SystemReservedEnforcementKey",
397+
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
398+
conf.EnforceNodeAllocatable = []string{kubetypes.SystemReservedCompressibleEnforcementKey, kubetypes.SystemReservedEnforcementKey}
399+
return conf
400+
},
401+
errMsg: "invalid configuration: both \"system-reserved\" and \"system-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)",
402+
}, {
403+
name: "specify KubeReservedCompressibleEnforcementKey without specifying KubeReservedCgroup",
404+
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
405+
conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey}
406+
conf.KubeReservedCgroup = ""
407+
return conf
408+
},
409+
errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)",
387410
}, {
388411
name: "specify KubeReservedEnforcementKey without specifying KubeReservedCgroup",
389412
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
390413
conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedEnforcementKey}
391414
conf.KubeReservedCgroup = ""
392415
return conf
393416
},
394-
errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" contained in enforceNodeAllocatable (--enforce-node-allocatable)",
417+
errMsg: "invalid configuration: kubeReservedCgroup (--kube-reserved-cgroup) must be specified when \"kube-reserved\" or \"kube-reserved-compressible\" included in enforceNodeAllocatable (--enforce-node-allocatable)",
418+
}, {
419+
name: "specify KubeReservedCompressibleEnforcementKey with KubeReservedEnforcementKey",
420+
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
421+
conf.EnforceNodeAllocatable = []string{kubetypes.KubeReservedCompressibleEnforcementKey, kubetypes.KubeReservedEnforcementKey}
422+
return conf
423+
},
424+
errMsg: "invalid configuration: both \"kube-reserved\" and \"kube-reserved-compressible\" cannot be specified together in enforceNodeAllocatable (--enforce-node-allocatable)",
395425
}, {
396426
name: "specify NodeAllocatableNoneKey with additional enforcements",
397427
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
@@ -412,7 +442,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
412442
conf.EnforceNodeAllocatable = []string{"invalid-enforce-node-allocatable"}
413443
return conf
414444
},
415-
errMsg: "invalid configuration: option \"invalid-enforce-node-allocatable\" specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are \"pods\", \"system-reserved\", \"kube-reserved\", or \"none\"",
445+
errMsg: "invalid configuration: option \"invalid-enforce-node-allocatable\" specified for enforceNodeAllocatable (--enforce-node-allocatable). Valid options are \"pods\", \"system-reserved\", \"system-reserved-compressible\", \"kube-reserved\", \"kube-reserved-compressible\" or \"none\"",
416446
}, {
417447
name: "invalid HairpinMode",
418448
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {

pkg/kubelet/cm/node_container_manager_linux.go

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
5353
cgroupConfig := &CgroupConfig{
5454
Name: cm.cgroupRoot,
5555
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
56-
ResourceParameters: getCgroupConfig(nodeAllocatable),
56+
ResourceParameters: getCgroupConfig(nodeAllocatable, false),
5757
}
5858
if cm.cgroupManager.Exists(cgroupConfig.Name) {
5959
return nil
@@ -81,7 +81,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
8181

8282
cgroupConfig := &CgroupConfig{
8383
Name: cm.cgroupRoot,
84-
ResourceParameters: getCgroupConfig(nodeAllocatable),
84+
ResourceParameters: getCgroupConfig(nodeAllocatable, false),
8585
}
8686

8787
// Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail.
@@ -110,7 +110,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
110110
// Now apply kube reserved and system reserved limits if required.
111111
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) {
112112
klog.V(2).InfoS("Enforcing system reserved on cgroup", "cgroupName", nc.SystemReservedCgroupName, "limits", nc.SystemReserved)
113-
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.SystemReservedCgroupName), nc.SystemReserved); err != nil {
113+
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.SystemReservedCgroupName), nc.SystemReserved, false); err != nil {
114114
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
115115
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
116116
return errors.New(message)
@@ -119,19 +119,40 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
119119
}
120120
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) {
121121
klog.V(2).InfoS("Enforcing kube reserved on cgroup", "cgroupName", nc.KubeReservedCgroupName, "limits", nc.KubeReserved)
122-
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.KubeReservedCgroupName), nc.KubeReserved); err != nil {
122+
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.KubeReservedCgroupName), nc.KubeReserved, false); err != nil {
123123
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
124124
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
125125
return errors.New(message)
126126
}
127127
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
128128
}
129+
130+
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedCompressibleEnforcementKey) {
131+
klog.V(2).InfoS("Enforcing system reserved compressible on cgroup", "cgroupName", nc.SystemReservedCgroupName, "limits", nc.SystemReserved)
132+
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.SystemReservedCgroupName), nc.SystemReserved, true); err != nil {
133+
message := fmt.Sprintf("Failed to enforce System Reserved Compressible Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
134+
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
135+
return errors.New(message)
136+
}
137+
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on system reserved cgroup %v", nc.SystemReservedCgroupName)
138+
}
139+
140+
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedCompressibleEnforcementKey) {
141+
klog.V(2).InfoS("Enforcing kube reserved compressible on cgroup", "cgroupName", nc.KubeReservedCgroupName, "limits", nc.KubeReserved)
142+
if err := enforceExistingCgroup(cm.cgroupManager, cm.cgroupManager.CgroupName(nc.KubeReservedCgroupName), nc.KubeReserved, true); err != nil {
143+
message := fmt.Sprintf("Failed to enforce Kube Reserved Compressible Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
144+
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
145+
return errors.New(message)
146+
}
147+
cm.recorder.Eventf(nodeRef, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
148+
}
129149
return nil
130150
}
131151

132152
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
133-
func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error {
134-
rp := getCgroupConfig(rl)
153+
func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList, compressibleResources bool) error {
154+
rp := getCgroupConfig(rl, compressibleResources)
155+
135156
if rp == nil {
136157
return fmt.Errorf("%q cgroup is not configured properly", cName)
137158
}
@@ -162,27 +183,39 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
162183
}
163184

164185
// getCgroupConfig returns a ResourceConfig object that can be used to create or update cgroups via CgroupManager interface.
165-
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
186+
func getCgroupConfig(rl v1.ResourceList, compressibleResourcesOnly bool) *ResourceConfig {
166187
// TODO(vishh): Set CPU Quota if necessary.
167188
if rl == nil {
168189
return nil
169190
}
170191
var rc ResourceConfig
171-
if q, exists := rl[v1.ResourceMemory]; exists {
172-
// Memory is defined in bytes.
173-
val := q.Value()
174-
rc.Memory = &val
175-
}
176-
if q, exists := rl[v1.ResourceCPU]; exists {
177-
// CPU is defined in milli-cores.
178-
val := MilliCPUToShares(q.MilliValue())
179-
rc.CPUShares = &val
192+
193+
setCompressibleResources := func() {
194+
if q, exists := rl[v1.ResourceCPU]; exists {
195+
// CPU is defined in milli-cores.
196+
val := MilliCPUToShares(q.MilliValue())
197+
rc.CPUShares = &val
198+
}
180199
}
181-
if q, exists := rl[pidlimit.PIDs]; exists {
182-
val := q.Value()
183-
rc.PidsLimit = &val
200+
201+
// Only return compressible resources
202+
if compressibleResourcesOnly {
203+
setCompressibleResources()
204+
} else {
205+
if q, exists := rl[v1.ResourceMemory]; exists {
206+
// Memory is defined in bytes.
207+
val := q.Value()
208+
rc.Memory = &val
209+
}
210+
211+
setCompressibleResources()
212+
213+
if q, exists := rl[pidlimit.PIDs]; exists {
214+
val := q.Value()
215+
rc.PidsLimit = &val
216+
}
217+
rc.HugePageLimit = HugePageLimits(rl)
184218
}
185-
rc.HugePageLimit = HugePageLimits(rl)
186219

187220
return &rc
188221
}

0 commit comments

Comments
 (0)