Skip to content

Commit d92fdeb

Browse files
authored
Merge pull request kubernetes#89897 from giuseppe/test-e2e-node
kubelet: fix e2e-node cgroups test on cgroup v2
2 parents 8b0a7de + 26d94ad commit d92fdeb

12 files changed

+320
-36
lines changed

pkg/kubelet/cm/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ go_library(
7070
"//vendor/github.com/docker/go-units:go_default_library",
7171
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
7272
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
73+
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
7374
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
7475
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
7576
"//vendor/k8s.io/utils/io:go_default_library",
@@ -121,6 +122,7 @@ go_library(
121122
"//vendor/github.com/docker/go-units:go_default_library",
122123
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
123124
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
125+
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
124126
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
125127
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
126128
"//vendor/k8s.io/utils/io:go_default_library",

pkg/kubelet/cm/cgroup_manager_linux.go

Lines changed: 162 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,18 @@ package cm
1818

1919
import (
2020
"fmt"
21+
"io/ioutil"
2122
"os"
2223
"path"
2324
"path/filepath"
2425
"strconv"
2526
"strings"
27+
"sync"
2628
"time"
2729

2830
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
2931
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
32+
cgroupfs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
3033
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
3134
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
3235
"k8s.io/klog"
@@ -36,6 +39,7 @@ import (
3639
"k8s.io/apimachinery/pkg/util/sets"
3740
utilfeature "k8s.io/apiserver/pkg/util/feature"
3841
kubefeatures "k8s.io/kubernetes/pkg/features"
42+
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
3943
"k8s.io/kubernetes/pkg/kubelet/metrics"
4044
)
4145

@@ -228,6 +232,12 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string
228232
return cgroupPaths
229233
}
230234

235+
// buildCgroupUnifiedPath builds a path to the specified name.
236+
func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string {
237+
cgroupFsAdaptedName := m.Name(name)
238+
return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName)
239+
}
240+
231241
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
232242
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
233243
// and split it appropriately, using essentially the logic below.
@@ -246,6 +256,21 @@ func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupNam
246256

247257
// Exists checks if all subsystem cgroups already exist
248258
func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
259+
if libcontainercgroups.IsCgroup2UnifiedMode() {
260+
cgroupPath := m.buildCgroupUnifiedPath(name)
261+
neededControllers := getSupportedUnifiedControllers()
262+
enabledControllers, err := readUnifiedControllers(cgroupPath)
263+
if err != nil {
264+
return false
265+
}
266+
difference := neededControllers.Difference(enabledControllers)
267+
if difference.Len() > 0 {
268+
klog.V(4).Infof("The Cgroup %v has some missing controllers: %v", name, difference)
269+
return false
270+
}
271+
return true
272+
}
273+
249274
// Get map of all cgroup paths on the system for the particular cgroup
250275
cgroupPaths := m.buildCgroupPaths(name)
251276

@@ -338,7 +363,7 @@ func getSupportedSubsystems() map[subsystem]bool {
338363
return supportedSubsystems
339364
}
340365

341-
// setSupportedSubsystems sets cgroup resource limits only on the supported
366+
// setSupportedSubsystemsV1 sets cgroup resource limits on cgroup v1 only on the supported
342367
// subsystems. ie. cpu and memory. We don't use libcontainer's cgroup/fs/Set()
343368
// method as it doesn't allow us to skip updates on the devices cgroup
344369
// Allowing or denying all devices by writing 'a' to devices.allow or devices.deny is
@@ -347,7 +372,7 @@ func getSupportedSubsystems() map[subsystem]bool {
347372
// We would like to skip setting any values on the device cgroup in this case
348373
// but this is not possible with libcontainers Set() method
349374
// See https://github.com/opencontainers/runc/issues/932
350-
func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error {
375+
func setSupportedSubsystemsV1(cgroupConfig *libcontainerconfigs.Cgroup) error {
351376
for sys, required := range getSupportedSubsystems() {
352377
if _, ok := cgroupConfig.Paths[sys.Name()]; !ok {
353378
if required {
@@ -388,6 +413,106 @@ func getCpuMax(cpuQuota *int64, cpuPeriod *uint64) string {
388413
return fmt.Sprintf("%s %s", quotaStr, periodStr)
389414
}
390415

416+
// readUnifiedControllers reads the controllers available at the specified cgroup
417+
func readUnifiedControllers(path string) (sets.String, error) {
418+
controllersFileContent, err := ioutil.ReadFile(filepath.Join(path, "cgroup.controllers"))
419+
if err != nil {
420+
return nil, err
421+
}
422+
controllers := strings.Fields(string(controllersFileContent))
423+
return sets.NewString(controllers...), nil
424+
}
425+
426+
var (
427+
availableRootControllersOnce sync.Once
428+
availableRootControllers sets.String
429+
)
430+
431+
// getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2
432+
func getSupportedUnifiedControllers() sets.String {
433+
// This is the set of controllers used by the Kubelet
434+
supportedControllers := sets.NewString("cpu", "cpuset", "memory", "hugetlb")
435+
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) {
436+
supportedControllers.Insert("pids")
437+
}
438+
// Memoize the set of controllers that are present in the root cgroup
439+
availableRootControllersOnce.Do(func() {
440+
var err error
441+
availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot)
442+
if err != nil {
443+
panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot))
444+
}
445+
})
446+
// Return the set of controllers that are supported both by the Kubelet and by the kernel
447+
return supportedControllers.Intersection(availableRootControllers)
448+
}
449+
450+
// propagateControllers on an unified hierarchy enables all the supported controllers for the specified cgroup
451+
func propagateControllers(path string) error {
452+
if err := os.MkdirAll(path, 0755); err != nil {
453+
return fmt.Errorf("failed to create cgroup %q : %v", path, err)
454+
}
455+
456+
// Retrieve all the supported controllers from the cgroup root
457+
controllersFileContent, err := ioutil.ReadFile(filepath.Join(cmutil.CgroupRoot, "cgroup.controllers"))
458+
if err != nil {
459+
return fmt.Errorf("failed to read controllers from %q : %v", cmutil.CgroupRoot, err)
460+
}
461+
462+
supportedControllers := getSupportedUnifiedControllers()
463+
464+
// The retrieved content looks like: "cpuset cpu io memory hugetlb pids". Prepend each of the controllers
465+
// with '+', so we have something like "+cpuset +cpu +io +memory +hugetlb +pids"
466+
controllers := ""
467+
for _, controller := range strings.Fields(string(controllersFileContent)) {
468+
// ignore controllers we don't care about
469+
if !supportedControllers.Has(controller) {
470+
continue
471+
}
472+
473+
sep := " +"
474+
if controllers == "" {
475+
sep = "+"
476+
}
477+
controllers = controllers + sep + controller
478+
}
479+
480+
current := cmutil.CgroupRoot
481+
relPath, err := filepath.Rel(cmutil.CgroupRoot, path)
482+
if err != nil {
483+
return fmt.Errorf("failed to get relative path to cgroup root from %q: %v", path, err)
484+
}
485+
// Write the controllers list to each "cgroup.subtree_control" file until it reaches the parent cgroup.
486+
// For the /foo/bar/baz cgroup, controllers must be enabled sequentially in the files:
487+
// - /sys/fs/cgroup/foo/cgroup.subtree_control
488+
// - /sys/fs/cgroup/foo/bar/cgroup.subtree_control
489+
for _, p := range strings.Split(filepath.Dir(relPath), "/") {
490+
current = filepath.Join(current, p)
491+
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), []byte(controllers), 0755); err != nil {
492+
return fmt.Errorf("failed to enable controllers on %q: %v", cmutil.CgroupRoot, err)
493+
}
494+
}
495+
return nil
496+
}
497+
498+
// setResourcesV2 sets cgroup resource limits on cgroup v2
499+
func setResourcesV2(cgroupConfig *libcontainerconfigs.Cgroup) error {
500+
if err := propagateControllers(cgroupConfig.Path); err != nil {
501+
return err
502+
}
503+
allowAll := true
504+
cgroupConfig.Resources.AllowAllDevices = &allowAll
505+
506+
manager, err := cgroupfs2.NewManager(cgroupConfig, cgroupConfig.Path, false)
507+
if err != nil {
508+
return fmt.Errorf("failed to create cgroup v2 manager: %v", err)
509+
}
510+
config := &libcontainerconfigs.Config{
511+
Cgroups: cgroupConfig,
512+
}
513+
return manager.Set(config)
514+
}
515+
391516
func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
392517
resources := &libcontainerconfigs.Resources{}
393518
if resourceConfig == nil {
@@ -454,12 +579,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
454579
resourceConfig := cgroupConfig.ResourceParameters
455580
resources := m.toResources(resourceConfig)
456581

457-
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
458-
459582
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
460583
Resources: resources,
461-
Paths: cgroupPaths,
462584
}
585+
586+
unified := libcontainercgroups.IsCgroup2UnifiedMode()
587+
if unified {
588+
libcontainerCgroupConfig.Path = m.buildCgroupUnifiedPath(cgroupConfig.Name)
589+
} else {
590+
libcontainerCgroupConfig.Paths = m.buildCgroupPaths(cgroupConfig.Name)
591+
}
592+
463593
// libcontainer consumes a different field and expects a different syntax
464594
// depending on the cgroup driver in use, so we need this conditional here.
465595
if m.adapter.cgroupManagerType == libcontainerSystemd {
@@ -472,8 +602,14 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
472602
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
473603
}
474604

475-
if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
476-
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
605+
if unified {
606+
if err := setResourcesV2(libcontainerCgroupConfig); err != nil {
607+
return fmt.Errorf("failed to set resources for cgroup %v: %v", cgroupConfig.Name, err)
608+
}
609+
} else {
610+
if err := setSupportedSubsystemsV1(libcontainerCgroupConfig); err != nil {
611+
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
612+
}
477613
}
478614
return nil
479615
}
@@ -619,10 +755,25 @@ func toResourceStats(stats *libcontainercgroups.Stats) *ResourceStats {
619755

620756
// Get sets the ResourceParameters of the specified cgroup as read from the cgroup fs
621757
func (m *cgroupManagerImpl) GetResourceStats(name CgroupName) (*ResourceStats, error) {
622-
cgroupPaths := m.buildCgroupPaths(name)
623-
stats, err := getStatsSupportedSubsystems(cgroupPaths)
624-
if err != nil {
625-
return nil, fmt.Errorf("failed to get stats supported cgroup subsystems for cgroup %v: %v", name, err)
758+
var err error
759+
var stats *libcontainercgroups.Stats
760+
if libcontainercgroups.IsCgroup2UnifiedMode() {
761+
cgroupPath := m.buildCgroupUnifiedPath(name)
762+
manager, err := cgroupfs2.NewManager(nil, cgroupPath, false)
763+
if err != nil {
764+
return nil, fmt.Errorf("failed to create cgroup v2 manager: %v", err)
765+
}
766+
767+
stats, err = manager.GetStats()
768+
if err != nil {
769+
return nil, fmt.Errorf("failed to get stats for cgroup %v: %v", name, err)
770+
}
771+
} else {
772+
cgroupPaths := m.buildCgroupPaths(name)
773+
stats, err = getStatsSupportedSubsystems(cgroupPaths)
774+
if err != nil {
775+
return nil, fmt.Errorf("failed to get stats supported cgroup subsystems for cgroup %v: %v", name, err)
776+
}
626777
}
627778
return toResourceStats(stats), nil
628779
}

pkg/kubelet/cm/container_manager_linux.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
162162
}
163163

164164
if cgroups.IsCgroup2UnifiedMode() {
165+
f.cpuHardcapping = true
165166
return f, nil
166167
}
167168

test/e2e_node/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ go_library(
2121
"util_sriov.go",
2222
"util_xfs_linux.go",
2323
"util_xfs_unsupported.go",
24+
"utils_linux.go",
25+
"utils_unsupported.go",
2426
],
2527
importpath = "k8s.io/kubernetes/test/e2e_node",
2628
visibility = ["//visibility:public"],

test/e2e_node/hugepages_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,13 @@ func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity)
5050
cgroupFsName = cgroupName.ToCgroupfs()
5151
}
5252

53-
// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.limit_in_bytes
54-
command := fmt.Sprintf("expected=%v; actual=$(cat /tmp/hugetlb/%v/hugetlb.2MB.limit_in_bytes); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
53+
command := ""
54+
// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.<LIMIT>
55+
if IsCgroup2UnifiedMode() {
56+
command = fmt.Sprintf("expected=%v; actual=$(cat /tmp/%v/hugetlb.2MB.max); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
57+
} else {
58+
command = fmt.Sprintf("expected=%v; actual=$(cat /tmp/hugetlb/%v/hugetlb.2MB.limit_in_bytes); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
59+
}
5560
framework.Logf("Pod to run command: %v", command)
5661
pod := &v1.Pod{
5762
ObjectMeta: metav1.ObjectMeta{

0 commit comments

Comments
 (0)