@@ -18,15 +18,18 @@ package cm
18
18
19
19
import (
20
20
"fmt"
21
+ "io/ioutil"
21
22
"os"
22
23
"path"
23
24
"path/filepath"
24
25
"strconv"
25
26
"strings"
27
+ "sync"
26
28
"time"
27
29
28
30
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
29
31
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
32
+ cgroupfs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
30
33
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
31
34
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
32
35
"k8s.io/klog"
@@ -36,6 +39,7 @@ import (
36
39
"k8s.io/apimachinery/pkg/util/sets"
37
40
utilfeature "k8s.io/apiserver/pkg/util/feature"
38
41
kubefeatures "k8s.io/kubernetes/pkg/features"
42
+ cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
39
43
"k8s.io/kubernetes/pkg/kubelet/metrics"
40
44
)
41
45
@@ -228,6 +232,12 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string
228
232
return cgroupPaths
229
233
}
230
234
235
+ // buildCgroupUnifiedPath builds a path to the specified name.
236
+ func (m * cgroupManagerImpl ) buildCgroupUnifiedPath (name CgroupName ) string {
237
+ cgroupFsAdaptedName := m .Name (name )
238
+ return path .Join (cmutil .CgroupRoot , cgroupFsAdaptedName )
239
+ }
240
+
231
241
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
232
242
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
233
243
// and split it appropriately, using essentially the logic below.
@@ -246,6 +256,21 @@ func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupNam
246
256
247
257
// Exists checks if all subsystem cgroups already exist
248
258
func (m * cgroupManagerImpl ) Exists (name CgroupName ) bool {
259
+ if libcontainercgroups .IsCgroup2UnifiedMode () {
260
+ cgroupPath := m .buildCgroupUnifiedPath (name )
261
+ neededControllers := getSupportedUnifiedControllers ()
262
+ enabledControllers , err := readUnifiedControllers (cgroupPath )
263
+ if err != nil {
264
+ return false
265
+ }
266
+ difference := neededControllers .Difference (enabledControllers )
267
+ if difference .Len () > 0 {
268
+ klog .V (4 ).Infof ("The Cgroup %v has some missing controllers: %v" , name , difference )
269
+ return false
270
+ }
271
+ return true
272
+ }
273
+
249
274
// Get map of all cgroup paths on the system for the particular cgroup
250
275
cgroupPaths := m .buildCgroupPaths (name )
251
276
@@ -338,7 +363,7 @@ func getSupportedSubsystems() map[subsystem]bool {
338
363
return supportedSubsystems
339
364
}
340
365
341
- // setSupportedSubsystems sets cgroup resource limits only on the supported
366
+ // setSupportedSubsystemsV1 sets cgroup resource limits on cgroup v1 only on the supported
342
367
// subsystems. ie. cpu and memory. We don't use libcontainer's cgroup/fs/Set()
343
368
// method as it doesn't allow us to skip updates on the devices cgroup
344
369
// Allowing or denying all devices by writing 'a' to devices.allow or devices.deny is
@@ -347,7 +372,7 @@ func getSupportedSubsystems() map[subsystem]bool {
347
372
// We would like to skip setting any values on the device cgroup in this case
348
373
// but this is not possible with libcontainers Set() method
349
374
// See https://github.com/opencontainers/runc/issues/932
350
- func setSupportedSubsystems (cgroupConfig * libcontainerconfigs.Cgroup ) error {
375
+ func setSupportedSubsystemsV1 (cgroupConfig * libcontainerconfigs.Cgroup ) error {
351
376
for sys , required := range getSupportedSubsystems () {
352
377
if _ , ok := cgroupConfig .Paths [sys .Name ()]; ! ok {
353
378
if required {
@@ -388,6 +413,106 @@ func getCpuMax(cpuQuota *int64, cpuPeriod *uint64) string {
388
413
return fmt .Sprintf ("%s %s" , quotaStr , periodStr )
389
414
}
390
415
416
+ // readUnifiedControllers reads the controllers available at the specified cgroup
417
+ func readUnifiedControllers (path string ) (sets.String , error ) {
418
+ controllersFileContent , err := ioutil .ReadFile (filepath .Join (path , "cgroup.controllers" ))
419
+ if err != nil {
420
+ return nil , err
421
+ }
422
+ controllers := strings .Fields (string (controllersFileContent ))
423
+ return sets .NewString (controllers ... ), nil
424
+ }
425
+
426
+ var (
427
+ availableRootControllersOnce sync.Once
428
+ availableRootControllers sets.String
429
+ )
430
+
431
+ // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2
432
+ func getSupportedUnifiedControllers () sets.String {
433
+ // This is the set of controllers used by the Kubelet
434
+ supportedControllers := sets .NewString ("cpu" , "cpuset" , "memory" , "hugetlb" )
435
+ if utilfeature .DefaultFeatureGate .Enabled (kubefeatures .SupportPodPidsLimit ) || utilfeature .DefaultFeatureGate .Enabled (kubefeatures .SupportNodePidsLimit ) {
436
+ supportedControllers .Insert ("pids" )
437
+ }
438
+ // Memoize the set of controllers that are present in the root cgroup
439
+ availableRootControllersOnce .Do (func () {
440
+ var err error
441
+ availableRootControllers , err = readUnifiedControllers (cmutil .CgroupRoot )
442
+ if err != nil {
443
+ panic (fmt .Errorf ("cannot read cgroup controllers at %s" , cmutil .CgroupRoot ))
444
+ }
445
+ })
446
+ // Return the set of controllers that are supported both by the Kubelet and by the kernel
447
+ return supportedControllers .Intersection (availableRootControllers )
448
+ }
449
+
450
+ // propagateControllers on an unified hierarchy enables all the supported controllers for the specified cgroup
451
+ func propagateControllers (path string ) error {
452
+ if err := os .MkdirAll (path , 0755 ); err != nil {
453
+ return fmt .Errorf ("failed to create cgroup %q : %v" , path , err )
454
+ }
455
+
456
+ // Retrieve all the supported controllers from the cgroup root
457
+ controllersFileContent , err := ioutil .ReadFile (filepath .Join (cmutil .CgroupRoot , "cgroup.controllers" ))
458
+ if err != nil {
459
+ return fmt .Errorf ("failed to read controllers from %q : %v" , cmutil .CgroupRoot , err )
460
+ }
461
+
462
+ supportedControllers := getSupportedUnifiedControllers ()
463
+
464
+ // The retrieved content looks like: "cpuset cpu io memory hugetlb pids". Prepend each of the controllers
465
+ // with '+', so we have something like "+cpuset +cpu +io +memory +hugetlb +pids"
466
+ controllers := ""
467
+ for _ , controller := range strings .Fields (string (controllersFileContent )) {
468
+ // ignore controllers we don't care about
469
+ if ! supportedControllers .Has (controller ) {
470
+ continue
471
+ }
472
+
473
+ sep := " +"
474
+ if controllers == "" {
475
+ sep = "+"
476
+ }
477
+ controllers = controllers + sep + controller
478
+ }
479
+
480
+ current := cmutil .CgroupRoot
481
+ relPath , err := filepath .Rel (cmutil .CgroupRoot , path )
482
+ if err != nil {
483
+ return fmt .Errorf ("failed to get relative path to cgroup root from %q: %v" , path , err )
484
+ }
485
+ // Write the controllers list to each "cgroup.subtree_control" file until it reaches the parent cgroup.
486
+ // For the /foo/bar/baz cgroup, controllers must be enabled sequentially in the files:
487
+ // - /sys/fs/cgroup/foo/cgroup.subtree_control
488
+ // - /sys/fs/cgroup/foo/bar/cgroup.subtree_control
489
+ for _ , p := range strings .Split (filepath .Dir (relPath ), "/" ) {
490
+ current = filepath .Join (current , p )
491
+ if err := ioutil .WriteFile (filepath .Join (current , "cgroup.subtree_control" ), []byte (controllers ), 0755 ); err != nil {
492
+ return fmt .Errorf ("failed to enable controllers on %q: %v" , cmutil .CgroupRoot , err )
493
+ }
494
+ }
495
+ return nil
496
+ }
497
+
498
+ // setResourcesV2 sets cgroup resource limits on cgroup v2
499
+ func setResourcesV2 (cgroupConfig * libcontainerconfigs.Cgroup ) error {
500
+ if err := propagateControllers (cgroupConfig .Path ); err != nil {
501
+ return err
502
+ }
503
+ allowAll := true
504
+ cgroupConfig .Resources .AllowAllDevices = & allowAll
505
+
506
+ manager , err := cgroupfs2 .NewManager (cgroupConfig , cgroupConfig .Path , false )
507
+ if err != nil {
508
+ return fmt .Errorf ("failed to create cgroup v2 manager: %v" , err )
509
+ }
510
+ config := & libcontainerconfigs.Config {
511
+ Cgroups : cgroupConfig ,
512
+ }
513
+ return manager .Set (config )
514
+ }
515
+
391
516
func (m * cgroupManagerImpl ) toResources (resourceConfig * ResourceConfig ) * libcontainerconfigs.Resources {
392
517
resources := & libcontainerconfigs.Resources {}
393
518
if resourceConfig == nil {
@@ -454,12 +579,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
454
579
resourceConfig := cgroupConfig .ResourceParameters
455
580
resources := m .toResources (resourceConfig )
456
581
457
- cgroupPaths := m .buildCgroupPaths (cgroupConfig .Name )
458
-
459
582
libcontainerCgroupConfig := & libcontainerconfigs.Cgroup {
460
583
Resources : resources ,
461
- Paths : cgroupPaths ,
462
584
}
585
+
586
+ unified := libcontainercgroups .IsCgroup2UnifiedMode ()
587
+ if unified {
588
+ libcontainerCgroupConfig .Path = m .buildCgroupUnifiedPath (cgroupConfig .Name )
589
+ } else {
590
+ libcontainerCgroupConfig .Paths = m .buildCgroupPaths (cgroupConfig .Name )
591
+ }
592
+
463
593
// libcontainer consumes a different field and expects a different syntax
464
594
// depending on the cgroup driver in use, so we need this conditional here.
465
595
if m .adapter .cgroupManagerType == libcontainerSystemd {
@@ -472,8 +602,14 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
472
602
libcontainerCgroupConfig .PidsLimit = * cgroupConfig .ResourceParameters .PidsLimit
473
603
}
474
604
475
- if err := setSupportedSubsystems (libcontainerCgroupConfig ); err != nil {
476
- return fmt .Errorf ("failed to set supported cgroup subsystems for cgroup %v: %v" , cgroupConfig .Name , err )
605
+ if unified {
606
+ if err := setResourcesV2 (libcontainerCgroupConfig ); err != nil {
607
+ return fmt .Errorf ("failed to set resources for cgroup %v: %v" , cgroupConfig .Name , err )
608
+ }
609
+ } else {
610
+ if err := setSupportedSubsystemsV1 (libcontainerCgroupConfig ); err != nil {
611
+ return fmt .Errorf ("failed to set supported cgroup subsystems for cgroup %v: %v" , cgroupConfig .Name , err )
612
+ }
477
613
}
478
614
return nil
479
615
}
@@ -619,10 +755,25 @@ func toResourceStats(stats *libcontainercgroups.Stats) *ResourceStats {
619
755
620
756
// Get sets the ResourceParameters of the specified cgroup as read from the cgroup fs
621
757
func (m * cgroupManagerImpl ) GetResourceStats (name CgroupName ) (* ResourceStats , error ) {
622
- cgroupPaths := m .buildCgroupPaths (name )
623
- stats , err := getStatsSupportedSubsystems (cgroupPaths )
624
- if err != nil {
625
- return nil , fmt .Errorf ("failed to get stats supported cgroup subsystems for cgroup %v: %v" , name , err )
758
+ var err error
759
+ var stats * libcontainercgroups.Stats
760
+ if libcontainercgroups .IsCgroup2UnifiedMode () {
761
+ cgroupPath := m .buildCgroupUnifiedPath (name )
762
+ manager , err := cgroupfs2 .NewManager (nil , cgroupPath , false )
763
+ if err != nil {
764
+ return nil , fmt .Errorf ("failed to create cgroup v2 manager: %v" , err )
765
+ }
766
+
767
+ stats , err = manager .GetStats ()
768
+ if err != nil {
769
+ return nil , fmt .Errorf ("failed to get stats for cgroup %v: %v" , name , err )
770
+ }
771
+ } else {
772
+ cgroupPaths := m .buildCgroupPaths (name )
773
+ stats , err = getStatsSupportedSubsystems (cgroupPaths )
774
+ if err != nil {
775
+ return nil , fmt .Errorf ("failed to get stats supported cgroup subsystems for cgroup %v: %v" , name , err )
776
+ }
626
777
}
627
778
return toResourceStats (stats ), nil
628
779
}
0 commit comments