@@ -53,7 +53,7 @@ func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
53
53
cgroupConfig := & CgroupConfig {
54
54
Name : cm .cgroupRoot ,
55
55
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
56
- ResourceParameters : getCgroupConfig (nodeAllocatable , false ),
56
+ ResourceParameters : cm . getCgroupConfig (nodeAllocatable , false ),
57
57
}
58
58
if cm .cgroupManager .Exists (cgroupConfig .Name ) {
59
59
return nil
@@ -81,7 +81,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
81
81
82
82
cgroupConfig := & CgroupConfig {
83
83
Name : cm .cgroupRoot ,
84
- ResourceParameters : getCgroupConfig (nodeAllocatable , false ),
84
+ ResourceParameters : cm . getCgroupConfig (nodeAllocatable , false ),
85
85
}
86
86
87
87
// Using ObjectReference for events as the node maybe not cached; refer to #42701 for detail.
@@ -110,7 +110,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
110
110
// Now apply kube reserved and system reserved limits if required.
111
111
if nc .EnforceNodeAllocatable .Has (kubetypes .SystemReservedEnforcementKey ) {
112
112
klog .V (2 ).InfoS ("Enforcing system reserved on cgroup" , "cgroupName" , nc .SystemReservedCgroupName , "limits" , nc .SystemReserved )
113
- if err := enforceExistingCgroup ( cm .cgroupManager , cm . cgroupManager . CgroupName (nc .SystemReservedCgroupName ) , nc .SystemReserved , false ); err != nil {
113
+ if err := cm .enforceExistingCgroup (nc .SystemReservedCgroupName , nc .SystemReserved , false ); err != nil {
114
114
message := fmt .Sprintf ("Failed to enforce System Reserved Cgroup Limits on %q: %v" , nc .SystemReservedCgroupName , err )
115
115
cm .recorder .Event (nodeRef , v1 .EventTypeWarning , events .FailedNodeAllocatableEnforcement , message )
116
116
return errors .New (message )
@@ -119,7 +119,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
119
119
}
120
120
if nc .EnforceNodeAllocatable .Has (kubetypes .KubeReservedEnforcementKey ) {
121
121
klog .V (2 ).InfoS ("Enforcing kube reserved on cgroup" , "cgroupName" , nc .KubeReservedCgroupName , "limits" , nc .KubeReserved )
122
- if err := enforceExistingCgroup ( cm .cgroupManager , cm . cgroupManager . CgroupName (nc .KubeReservedCgroupName ) , nc .KubeReserved , false ); err != nil {
122
+ if err := cm .enforceExistingCgroup (nc .KubeReservedCgroupName , nc .KubeReserved , false ); err != nil {
123
123
message := fmt .Sprintf ("Failed to enforce Kube Reserved Cgroup Limits on %q: %v" , nc .KubeReservedCgroupName , err )
124
124
cm .recorder .Event (nodeRef , v1 .EventTypeWarning , events .FailedNodeAllocatableEnforcement , message )
125
125
return errors .New (message )
@@ -129,7 +129,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
129
129
130
130
if nc .EnforceNodeAllocatable .Has (kubetypes .SystemReservedCompressibleEnforcementKey ) {
131
131
klog .V (2 ).InfoS ("Enforcing system reserved compressible on cgroup" , "cgroupName" , nc .SystemReservedCgroupName , "limits" , nc .SystemReserved )
132
- if err := enforceExistingCgroup ( cm .cgroupManager , cm . cgroupManager . CgroupName (nc .SystemReservedCgroupName ) , nc .SystemReserved , true ); err != nil {
132
+ if err := cm .enforceExistingCgroup (nc .SystemReservedCgroupName , nc .SystemReserved , true ); err != nil {
133
133
message := fmt .Sprintf ("Failed to enforce System Reserved Compressible Cgroup Limits on %q: %v" , nc .SystemReservedCgroupName , err )
134
134
cm .recorder .Event (nodeRef , v1 .EventTypeWarning , events .FailedNodeAllocatableEnforcement , message )
135
135
return errors .New (message )
@@ -139,7 +139,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
139
139
140
140
if nc .EnforceNodeAllocatable .Has (kubetypes .KubeReservedCompressibleEnforcementKey ) {
141
141
klog .V (2 ).InfoS ("Enforcing kube reserved compressible on cgroup" , "cgroupName" , nc .KubeReservedCgroupName , "limits" , nc .KubeReserved )
142
- if err := enforceExistingCgroup ( cm .cgroupManager , cm . cgroupManager . CgroupName (nc .KubeReservedCgroupName ) , nc .KubeReserved , true ); err != nil {
142
+ if err := cm .enforceExistingCgroup (nc .KubeReservedCgroupName , nc .KubeReserved , true ); err != nil {
143
143
message := fmt .Sprintf ("Failed to enforce Kube Reserved Compressible Cgroup Limits on %q: %v" , nc .KubeReservedCgroupName , err )
144
144
cm .recorder .Event (nodeRef , v1 .EventTypeWarning , events .FailedNodeAllocatableEnforcement , message )
145
145
return errors .New (message )
@@ -150,9 +150,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
150
150
}
151
151
152
152
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
153
- func enforceExistingCgroup ( cgroupManager CgroupManager , cName CgroupName , rl v1.ResourceList , compressibleResources bool ) error {
154
- rp := getCgroupConfig ( rl , compressibleResources )
155
-
153
+ func ( cm * containerManagerImpl ) enforceExistingCgroup ( cNameStr string , rl v1.ResourceList , compressibleResources bool ) error {
154
+ cName := cm . cgroupManager . CgroupName ( cNameStr )
155
+ rp := cm . getCgroupConfig ( rl , compressibleResources )
156
156
if rp == nil {
157
157
return fmt .Errorf ("%q cgroup is not configured properly" , cName )
158
158
}
@@ -173,17 +173,40 @@ func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.
173
173
ResourceParameters : rp ,
174
174
}
175
175
klog .V (4 ).InfoS ("Enforcing limits on cgroup" , "cgroupName" , cName , "cpuShares" , cgroupConfig .ResourceParameters .CPUShares , "memory" , cgroupConfig .ResourceParameters .Memory , "pidsLimit" , cgroupConfig .ResourceParameters .PidsLimit )
176
- if err := cgroupManager .Validate (cgroupConfig .Name ); err != nil {
176
+ if err := cm . cgroupManager .Validate (cgroupConfig .Name ); err != nil {
177
177
return err
178
178
}
179
- if err := cgroupManager .Update (cgroupConfig ); err != nil {
179
+ if err := cm . cgroupManager .Update (cgroupConfig ); err != nil {
180
180
return err
181
181
}
182
182
return nil
183
183
}
184
184
185
185
// getCgroupConfig returns a ResourceConfig object that can be used to create or update cgroups via CgroupManager interface.
186
- func getCgroupConfig (rl v1.ResourceList , compressibleResourcesOnly bool ) * ResourceConfig {
186
+ func (cm * containerManagerImpl ) getCgroupConfig (rl v1.ResourceList , compressibleResourcesOnly bool ) * ResourceConfig {
187
+ rc := getCgroupConfigInternal (rl , compressibleResourcesOnly )
188
+ if rc == nil {
189
+ return nil
190
+ }
191
+
192
+ // In the case of a None policy, cgroupv2 and systemd cgroup manager, we must make sure systemd is aware of the cpuset cgroup.
193
+ // By default, systemd will not create it, as we've not chosen to delegate it, and we haven't included it in the Apply() request.
194
+ // However, this causes a bug where kubelet restarts unnecessarily (cpuset cgroup is created in the cgroupfs, but systemd
195
+ // doesn't know about it and deletes it, and then kubelet doesn't continue because the cgroup isn't configured as expected).
196
+ // An alternative is to delegate the `cpuset` cgroup to the kubelet, but that would require some plumbing in libcontainer,
197
+ // and this is sufficient.
198
+ // Only do so on None policy, as Static policy will do its own updating of the cpuset.
199
+ // Please see the comment on policy none's GetAllocatableCPUs
200
+ if cm .cpuManager .GetAllocatableCPUs ().IsEmpty () {
201
+ rc .CPUSet = cm .cpuManager .GetAllCPUs ()
202
+ }
203
+
204
+ return rc
205
+ }
206
+
207
+ // getCgroupConfigInternal are the pieces of getCgroupConfig that don't require the cm object.
208
+ // This is added to unit test without needing to create a full containerManager
209
+ func getCgroupConfigInternal (rl v1.ResourceList , compressibleResourcesOnly bool ) * ResourceConfig {
187
210
// TODO(vishh): Set CPU Quota if necessary.
188
211
if rl == nil {
189
212
return nil
@@ -216,7 +239,6 @@ func getCgroupConfig(rl v1.ResourceList, compressibleResourcesOnly bool) *Resour
216
239
}
217
240
rc .HugePageLimit = HugePageLimits (rl )
218
241
}
219
-
220
242
return & rc
221
243
}
222
244
0 commit comments