From 3fe402bb68056acd7551466d94964de68914b808 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 10 Nov 2025 13:30:21 -0500 Subject: [PATCH 1/9] Revert "Revert Cgroups V1 removal" This reverts commit 52736854ef6fca4effac0162c6788e732c83d957 , re-enabling cgv1 removal. Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/blkio_linux.go | 131 ++------- common/pkg/cgroups/cgroups_linux.go | 322 ++------------------- common/pkg/cgroups/cgroups_linux_test.go | 2 +- common/pkg/cgroups/cpu_linux.go | 76 +---- common/pkg/cgroups/cpuset_linux.go | 28 +- common/pkg/cgroups/memory_linux.go | 80 ++--- common/pkg/cgroups/pids_linux.go | 33 +-- common/pkg/cgroups/systemd_linux.go | 50 +--- common/pkg/cgroups/utils_linux.go | 25 +- common/pkg/cgroupv2/cgroups_linux.go | 27 -- common/pkg/cgroupv2/cgroups_unsupported.go | 8 - common/pkg/config/default.go | 13 +- common/pkg/config/systemd.go | 7 - common/pkg/sysinfo/sysinfo_linux.go | 18 +- common/pkg/systemd/systemd_linux.go | 6 +- 15 files changed, 126 insertions(+), 700 deletions(-) delete mode 100644 common/pkg/cgroupv2/cgroups_linux.go delete mode 100644 common/pkg/cgroupv2/cgroups_unsupported.go diff --git a/common/pkg/cgroups/blkio_linux.go b/common/pkg/cgroups/blkio_linux.go index 4d85ba4a70..c5f085ecd7 100644 --- a/common/pkg/cgroups/blkio_linux.go +++ b/common/pkg/cgroups/blkio_linux.go @@ -3,10 +3,6 @@ package cgroups import ( - "bufio" - "errors" - "fmt" - "os" "path/filepath" "strconv" "strings" @@ -26,122 +22,56 @@ func getBlkioHandler() *linuxBlkioHandler { // Apply set the specified constraints. func (c *linuxBlkioHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - if ctr.cgroup2 { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) - } - path := filepath.Join(cgroupRoot, Blkio, ctr.config.Path) - return c.Blkio.Set(path, res) -} - -// Create the cgroup. -func (c *linuxBlkioHandler) Create(ctr *CgroupControl) (bool, error) { - if ctr.cgroup2 { - return false, nil + man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) + if err != nil { + return err } - return ctr.createCgroupDirectory(Blkio) -} - -// Destroy the cgroup. -func (c *linuxBlkioHandler) Destroy(ctr *CgroupControl) error { - return rmDirRecursively(ctr.getCgroupv1Path(Blkio)) + return man.Set(res) } // Stat fills a metrics structure with usage stats for the controller. func (c *linuxBlkioHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { var ioServiceBytesRecursive []cgroups.BlkioStatEntry - if ctr.cgroup2 { - // more details on the io.stat file format:X https://facebookmicrosites.github.io/cgroup2/docs/io-controller.html - values, err := readCgroup2MapFile(ctr, "io.stat") + // more details on the io.stat file format:X https://facebookmicrosites.github.io/cgroup2/docs/io-controller.html + values, err := readCgroup2MapFile(ctr, "io.stat") + if err != nil { + return err + } + for k, v := range values { + d := strings.Split(k, ":") + if len(d) != 2 { + continue + } + minor, err := strconv.ParseUint(d[0], 10, 0) if err != nil { return err } - for k, v := range values { - d := strings.Split(k, ":") - if len(d) != 2 { - continue - } - minor, err := strconv.ParseUint(d[0], 10, 0) - if err != nil { - return err - } - major, err := strconv.ParseUint(d[1], 10, 0) - if err != nil { - return err - } - - for _, item := range v { - d := strings.Split(item, "=") - if len(d) != 2 { - continue - } - op := d[0] - - // Accommodate the cgroup v1 naming - switch op { - case "rbytes": - op = "read" - case "wbytes": - op = "write" - } - - value, err := strconv.ParseUint(d[1], 10, 0) - if err != nil { - return err - } - - entry := cgroups.BlkioStatEntry{ - Op: op, - Major: major, - Minor: minor, - Value: value, - } - ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry) - } - } - } else { - BlkioRoot := ctr.getCgroupv1Path(Blkio) - - p := filepath.Join(BlkioRoot, "blkio.throttle.io_service_bytes_recursive") - f, err := os.Open(p) + major, err := strconv.ParseUint(d[1], 10, 0) if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil - } - return fmt.Errorf("open %s: %w", p, err) + return err } - defer f.Close() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(line) - if len(parts) < 3 { - continue - } - d := strings.Split(parts[0], ":") + for _, item := range v { + d := strings.Split(item, "=") if len(d) != 2 { continue } - minor, err := strconv.ParseUint(d[0], 10, 0) - if err != nil { - return err + op := d[0] + + // Accommodate the cgroup v1 naming + switch op { + case "rbytes": + op = "read" + case "wbytes": + op = "write" } - major, err := strconv.ParseUint(d[1], 10, 0) - if err != nil { - return err - } - - op := parts[1] - value, err := strconv.ParseUint(parts[2], 10, 0) + value, err := strconv.ParseUint(d[1], 10, 0) if err != nil { return err } + entry := cgroups.BlkioStatEntry{ Op: op, Major: major, @@ -150,9 +80,6 @@ func (c *linuxBlkioHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { } ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry) } - if err := scanner.Err(); err != nil { - return fmt.Errorf("parse %s: %w", p, err) - } } m.BlkioStats.IoServiceBytesRecursive = ioServiceBytesRecursive return nil diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index 1c66a8d9cc..4d7c6c2e84 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -8,11 +8,9 @@ import ( "context" "errors" "fmt" - "maps" "math" "os" "path/filepath" - "slices" "strconv" "strings" "sync" @@ -22,8 +20,6 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs2" - "github.com/sirupsen/logrus" "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/unshare" "golang.org/x/sys/unix" @@ -32,9 +28,7 @@ import ( var ( // ErrCgroupDeleted means the cgroup was deleted. ErrCgroupDeleted = errors.New("cgroup deleted") - // ErrCgroupV1Rootless means the cgroup v1 were attempted to be used in rootless environment. - ErrCgroupV1Rootless = errors.New("no support for CGroups V1 in rootless environments") - ErrStatCgroup = errors.New("no cgroup available for gathering user statistics") + ErrStatCgroup = errors.New("no cgroup available for gathering user statistics") isUnifiedOnce sync.Once isUnified bool @@ -43,23 +37,12 @@ var ( // CgroupControl controls a cgroup hierarchy. type CgroupControl struct { - cgroup2 bool config *cgroups.Cgroup systemd bool - // List of additional cgroup subsystems joined that - // do not have a custom handler. - additionalControllers []controller -} - -type controller struct { - name string - symlink bool } type controllerHandler interface { - Create(*CgroupControl) (bool, error) Apply(*CgroupControl, *cgroups.Resources) error - Destroy(*CgroupControl) error Stat(*CgroupControl, *cgroups.Stats) error } @@ -92,97 +75,33 @@ func init() { } // getAvailableControllers get the available controllers. -func getAvailableControllers(exclude map[string]controllerHandler, cgroup2 bool) ([]controller, error) { - if cgroup2 { - controllers := []controller{} - controllersFile := filepath.Join(cgroupRoot, "cgroup.controllers") - - // rootless cgroupv2: check available controllers for current user, systemd or servicescope will inherit - if unshare.IsRootless() { - userSlice, err := getCgroupPathForCurrentProcess() - if err != nil { - return controllers, err - } - // userSlice already contains '/' so not adding here - basePath := cgroupRoot + userSlice - controllersFile = filepath.Join(basePath, "cgroup.controllers") - } - controllersFileBytes, err := os.ReadFile(controllersFile) - if err != nil { - return nil, fmt.Errorf("failed while reading controllers for cgroup v2: %w", err) - } - for controllerName := range strings.FieldsSeq(string(controllersFileBytes)) { - c := controller{ - name: controllerName, - symlink: false, - } - controllers = append(controllers, c) - } - return controllers, nil - } +func getAvailableControllers() ([]string, error) { + controllers := []string{} + controllersFile := filepath.Join(cgroupRoot, "cgroup.controllers") - subsystems, _ := cgroupV1GetAllSubsystems() - controllers := []controller{} - // cgroupv1 and rootless: No subsystem is available: delegation is unsafe. + // rootless cgroupv2: check available controllers for current user, systemd or servicescope will inherit if unshare.IsRootless() { - return controllers, nil - } - - for _, name := range subsystems { - if _, found := exclude[name]; found { - continue - } - fileInfo, err := os.Stat(cgroupRoot + "/" + name) + userSlice, err := getCgroupPathForCurrentProcess() if err != nil { - continue - } - c := controller{ - name: name, - symlink: !fileInfo.IsDir(), + return controllers, err } - controllers = append(controllers, c) + // userSlice already contains '/' so not adding here + basePath := cgroupRoot + userSlice + controllersFile = filepath.Join(basePath, "cgroup.controllers") } - - return controllers, nil -} - -// AvailableControllers get string:bool map of all the available controllers. -func AvailableControllers(exclude map[string]controllerHandler, cgroup2 bool) ([]string, error) { - availableControllers, err := getAvailableControllers(exclude, cgroup2) + controllersFileBytes, err := os.ReadFile(controllersFile) if err != nil { - return nil, err + return nil, fmt.Errorf("failed while reading controllers for cgroup v2: %w", err) } - controllerList := []string{} - for _, controller := range availableControllers { - controllerList = append(controllerList, controller.name) + for controllerName := range strings.FieldsSeq(string(controllersFileBytes)) { + controllers = append(controllers, controllerName) } - - return controllerList, nil + return controllers, nil } -func cgroupV1GetAllSubsystems() ([]string, error) { - f, err := os.Open("/proc/cgroups") - if err != nil { - return nil, err - } - defer f.Close() - - subsystems := []string{} - - s := bufio.NewScanner(f) - for s.Scan() { - text := s.Text() - if text[0] != '#' { - parts := strings.Fields(text) - if len(parts) >= 4 && parts[3] != "0" { - subsystems = append(subsystems, parts[0]) - } - } - } - if err := s.Err(); err != nil { - return nil, err - } - return subsystems, nil +// AvailableControllers get string:bool map of all the available controllers. +func AvailableControllers(exclude map[string]controllerHandler) ([]string, error) { + return getAvailableControllers() } func getCgroupPathForCurrentProcess() (string, error) { @@ -208,51 +127,11 @@ func getCgroupPathForCurrentProcess() (string, error) { return cgroupPath, nil } -// getCgroupv1Path is a helper function to get the cgroup v1 path. -func (c *CgroupControl) getCgroupv1Path(name string) string { - return filepath.Join(cgroupRoot, name, c.config.Path) -} - // initialize initializes the specified hierarchy. func (c *CgroupControl) initialize() (err error) { - createdSoFar := map[string]controllerHandler{} - defer func() { - if err != nil { - for name, ctr := range createdSoFar { - if err := ctr.Destroy(c); err != nil { - logrus.Warningf("error cleaning up controller %s for %s", name, c.config.Path) - } - } - } - }() - if c.cgroup2 { - if err := createCgroupv2Path(filepath.Join(cgroupRoot, c.config.Path)); err != nil { - return fmt.Errorf("creating cgroup path %s: %w", c.config.Path, err) - } - } - for name, handler := range handlers { - created, err := handler.Create(c) - if err != nil { - return err - } - if created { - createdSoFar[name] = handler - } + if err := createCgroupv2Path(filepath.Join(cgroupRoot, c.config.Path)); err != nil { + return fmt.Errorf("creating cgroup path %s: %w", c.config.Path, err) } - - if !c.cgroup2 { - // We won't need to do this for cgroup v2 - for _, ctr := range c.additionalControllers { - if ctr.symlink { - continue - } - path := c.getCgroupv1Path(ctr.name) - if err := os.MkdirAll(path, 0o755); err != nil { - return fmt.Errorf("creating cgroup path for %s: %w", ctr.name, err) - } - } - } - return nil } @@ -297,26 +176,17 @@ func readFileByKeyAsUint64(path, key string) (uint64, error) { // New creates a new cgroup control. func New(path string, resources *cgroups.Resources) (*CgroupControl, error) { - cgroup2, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { return nil, err } control := &CgroupControl{ - cgroup2: cgroup2, config: &cgroups.Cgroup{ Path: path, Resources: resources, }, } - if !cgroup2 { - controllers, err := getAvailableControllers(handlers, false) - if err != nil { - return nil, err - } - control.additionalControllers = controllers - } - if err := control.initialize(); err != nil { return nil, err } @@ -326,12 +196,11 @@ func New(path string, resources *cgroups.Resources) (*CgroupControl, error) { // NewSystemd creates a new cgroup control. func NewSystemd(path string, resources *cgroups.Resources) (*CgroupControl, error) { - cgroup2, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { return nil, err } control := &CgroupControl{ - cgroup2: cgroup2, systemd: true, config: &cgroups.Cgroup{ Path: path, @@ -345,45 +214,16 @@ func NewSystemd(path string, resources *cgroups.Resources) (*CgroupControl, erro // Load loads an existing cgroup control. func Load(path string) (*CgroupControl, error) { - cgroup2, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { return nil, err } control := &CgroupControl{ - cgroup2: cgroup2, systemd: false, config: &cgroups.Cgroup{ Path: path, }, } - if !cgroup2 { - controllers, err := getAvailableControllers(handlers, false) - if err != nil { - return nil, err - } - control.additionalControllers = controllers - } - if !cgroup2 { - oneExists := false - // check that the cgroup exists at least under one controller - for name := range handlers { - p := control.getCgroupv1Path(name) - if err := fileutils.Exists(p); err == nil { - oneExists = true - break - } - } - - // if there is no controller at all, raise an error - if !oneExists { - if unshare.IsRootless() { - return nil, ErrCgroupV1Rootless - } - // compatible with the error code - // used by containerd/cgroups - return nil, ErrCgroupDeleted - } - } return control, nil } @@ -448,26 +288,7 @@ func (c *CgroupControl) DeleteByPathConn(path string, conn *systemdDbus.Conn) er if c.systemd { return systemdDestroyConn(path, conn) } - if c.cgroup2 { - return rmDirRecursively(filepath.Join(cgroupRoot, c.config.Path)) - } - var lastError error - for _, h := range handlers { - if err := h.Destroy(c); err != nil { - lastError = err - } - } - - for _, ctr := range c.additionalControllers { - if ctr.symlink { - continue - } - p := c.getCgroupv1Path(ctr.name) - if err := rmDirRecursively(p); err != nil { - lastError = fmt.Errorf("remove %s: %w", p, err) - } - } - return lastError + return rmDirRecursively(filepath.Join(cgroupRoot, c.config.Path)) } // DeleteByPath deletes the specified cgroup path. @@ -493,36 +314,6 @@ func (c *CgroupControl) Update(resources *cgroups.Resources) error { return nil } -// AddPid moves the specified pid to the cgroup. -func (c *CgroupControl) AddPid(pid int) error { - pidString := []byte(fmt.Sprintf("%d\n", pid)) - - if c.cgroup2 { - path := filepath.Join(cgroupRoot, c.config.Path) - return fs2.CreateCgroupPath(path, c.config) - } - - names := slices.Collect(maps.Keys(handlers)) - - for _, c := range c.additionalControllers { - if !c.symlink { - names = append(names, c.name) - } - } - - for _, n := range names { - // If we aren't using cgroup2, we won't write correctly to unified hierarchy - if !c.cgroup2 && n == "unified" { - continue - } - p := filepath.Join(c.getCgroupv1Path(n), "tasks") - if err := os.WriteFile(p, pidString, 0o644); err != nil { - return fmt.Errorf("write %s: %w", p, err) - } - } - return nil -} - // Stat returns usage statistics for the cgroup. func (c *CgroupControl) Stat() (*cgroups.Stats, error) { m := cgroups.Stats{} @@ -573,23 +364,6 @@ func readCgroup2MapFile(ctr *CgroupControl, name string) (map[string][]string, e return readCgroupMapPath(p) } -func (c *CgroupControl) createCgroupDirectory(controller string) (bool, error) { - cPath := c.getCgroupv1Path(controller) - err := fileutils.Exists(cPath) - if err == nil { - return false, nil - } - - if !errors.Is(err, os.ErrNotExist) { - return false, err - } - - if err := os.MkdirAll(cPath, 0o755); err != nil { - return false, fmt.Errorf("creating cgroup for %s: %w", controller, err) - } - return true, nil -} - var TestMode bool func createCgroupv2Path(path string) (deferredError error) { @@ -671,32 +445,6 @@ func cleanString(s string) string { return strings.Trim(s, "\n") } -func readAcct(ctr *CgroupControl, name string) (uint64, error) { - p := filepath.Join(ctr.getCgroupv1Path(CPUAcct), name) - return readFileAsUint64(p) -} - -func readAcctList(ctr *CgroupControl, name string) ([]uint64, error) { - p := filepath.Join(ctr.getCgroupv1Path(CPUAcct), name) - data, err := os.ReadFile(p) - if err != nil { - return nil, err - } - r := []uint64{} - for s := range strings.SplitSeq(string(data), " ") { - s = cleanString(s) - if s == "" { - break - } - v, err := strconv.ParseUint(s, 10, 64) - if err != nil { - return nil, fmt.Errorf("parsing %s: %w", s, err) - } - r = append(r, v) - } - return r, nil -} - func cpusetCopyFromParent(path string, cgroupv2 bool) error { for _, file := range []string{"cpuset.cpus", "cpuset.mems"} { if _, err := cpusetCopyFileFromParent(path, file, cgroupv2); err != nil { @@ -739,15 +487,10 @@ func cpusetCopyFileFromParent(dir, file string, cgroupv2 bool) ([]byte, error) { // SystemCPUUsage returns the system usage for all the cgroups. func SystemCPUUsage() (uint64, error) { - cgroupv2, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { return 0, err } - if !cgroupv2 { - p := filepath.Join(cgroupRoot, CPUAcct, "cpuacct.usage") - return readFileAsUint64(p) - } - files, err := os.ReadDir(cgroupRoot) if err != nil { return 0, err @@ -800,7 +543,7 @@ func UserConnection(uid int) (*systemdDbus.Conn, error) { func UserOwnsCurrentSystemdCgroup() (bool, error) { uid := os.Geteuid() - cgroup2, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { return false, err } @@ -822,20 +565,11 @@ func UserOwnsCurrentSystemdCgroup() (bool, error) { // If we are on a cgroup v2 system and there are cgroup v1 controllers // mounted, ignore them when the current process is at the root cgroup. - if cgroup2 && parts[1] != "" && parts[2] == "/" { + if parts[1] != "" && parts[2] == "/" { continue } - var cgroupPath string - - if cgroup2 { - cgroupPath = filepath.Join(cgroupRoot, parts[2]) - } else { - if parts[1] != "name=systemd" { - continue - } - cgroupPath = filepath.Join(cgroupRoot, "systemd", parts[2]) - } + cgroupPath := filepath.Join(cgroupRoot, parts[2]) st, err := os.Stat(cgroupPath) if err != nil { diff --git a/common/pkg/cgroups/cgroups_linux_test.go b/common/pkg/cgroups/cgroups_linux_test.go index 2a415acea4..aaf800940e 100644 --- a/common/pkg/cgroups/cgroups_linux_test.go +++ b/common/pkg/cgroups/cgroups_linux_test.go @@ -89,7 +89,7 @@ func TestResources(t *testing.T) { } // test CPU Quota adjustment. - u, _, b, _, _, _ := resourcesToProps(&resources, true) + u, _, b, _, _, _ := resourcesToProps(&resources) val, ok := u["CPUQuotaPerSecUSec"] if !ok { diff --git a/common/pkg/cgroups/cpu_linux.go b/common/pkg/cgroups/cpu_linux.go index 899a86d5d3..f89bac87c2 100644 --- a/common/pkg/cgroups/cpu_linux.go +++ b/common/pkg/cgroups/cpu_linux.go @@ -3,8 +3,6 @@ package cgroups import ( - "errors" - "os" "path/filepath" "strconv" @@ -23,75 +21,33 @@ func getCPUHandler() *linuxCPUHandler { // Apply set the specified constraints. func (c *linuxCPUHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - if ctr.cgroup2 { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) + man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) + if err != nil { + return err } - path := filepath.Join(cgroupRoot, CPU, ctr.config.Path) - return c.CPU.Set(path, res) -} - -// Create the cgroup. -func (c *linuxCPUHandler) Create(ctr *CgroupControl) (bool, error) { - if ctr.cgroup2 { - return false, nil - } - return ctr.createCgroupDirectory(CPU) -} - -// Destroy the cgroup. -func (c *linuxCPUHandler) Destroy(ctr *CgroupControl) error { - return rmDirRecursively(ctr.getCgroupv1Path(CPU)) + return man.Set(res) } // Stat fills a metrics structure with usage stats for the controller. func (c *linuxCPUHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { - var err error cpu := cgroups.CpuStats{} - if ctr.cgroup2 { - values, err := readCgroup2MapFile(ctr, "cpu.stat") + values, err := readCgroup2MapFile(ctr, "cpu.stat") + if err != nil { + return err + } + if val, found := values["usage_usec"]; found { + cpu.CpuUsage.TotalUsage, err = strconv.ParseUint(cleanString(val[0]), 10, 64) if err != nil { return err } - if val, found := values["usage_usec"]; found { - cpu.CpuUsage.TotalUsage, err = strconv.ParseUint(cleanString(val[0]), 10, 64) - if err != nil { - return err - } - cpu.CpuUsage.UsageInKernelmode *= 1000 - } - if val, found := values["system_usec"]; found { - cpu.CpuUsage.UsageInKernelmode, err = strconv.ParseUint(cleanString(val[0]), 10, 64) - if err != nil { - return err - } - cpu.CpuUsage.TotalUsage *= 1000 - } - } else { - cpu.CpuUsage.TotalUsage, err = readAcct(ctr, "cpuacct.usage") - if err != nil { - if !errors.Is(err, os.ErrNotExist) { - return err - } - cpu.CpuUsage.TotalUsage = 0 - } - cpu.CpuUsage.UsageInKernelmode, err = readAcct(ctr, "cpuacct.usage_sys") - if err != nil { - if !errors.Is(err, os.ErrNotExist) { - return err - } - cpu.CpuUsage.UsageInKernelmode = 0 - } - cpu.CpuUsage.PercpuUsage, err = readAcctList(ctr, "cpuacct.usage_percpu") + cpu.CpuUsage.TotalUsage *= 1000 + } + if val, found := values["system_usec"]; found { + cpu.CpuUsage.UsageInKernelmode, err = strconv.ParseUint(cleanString(val[0]), 10, 64) if err != nil { - if !errors.Is(err, os.ErrNotExist) { - return err - } - cpu.CpuUsage.PercpuUsage = nil + return err } + cpu.CpuUsage.UsageInKernelmode *= 1000 } m.CpuStats = cpu return nil diff --git a/common/pkg/cgroups/cpuset_linux.go b/common/pkg/cgroups/cpuset_linux.go index 10b2298e12..c03a73623c 100644 --- a/common/pkg/cgroups/cpuset_linux.go +++ b/common/pkg/cgroups/cpuset_linux.go @@ -20,33 +20,17 @@ func getCpusetHandler() *linuxCpusetHandler { // Apply set the specified constraints. func (c *linuxCpusetHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - if ctr.cgroup2 { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) + man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) + if err != nil { + return err } - path := filepath.Join(cgroupRoot, CPUset, ctr.config.Path) - return c.CPUSet.Set(path, res) + return man.Set(res) } // Create the cgroup. func (c *linuxCpusetHandler) Create(ctr *CgroupControl) (bool, error) { - if ctr.cgroup2 { - path := filepath.Join(cgroupRoot, ctr.config.Path) - return true, cpusetCopyFromParent(path, true) - } - created, err := ctr.createCgroupDirectory(CPUset) - if !created || err != nil { - return created, err - } - return true, cpusetCopyFromParent(ctr.getCgroupv1Path(CPUset), false) -} - -// Destroy the cgroup. -func (c *linuxCpusetHandler) Destroy(ctr *CgroupControl) error { - return rmDirRecursively(ctr.getCgroupv1Path(CPUset)) + path := filepath.Join(cgroupRoot, ctr.config.Path) + return true, cpusetCopyFromParent(path, true) } // Stat fills a metrics structure with usage stats for the controller. diff --git a/common/pkg/cgroups/memory_linux.go b/common/pkg/cgroups/memory_linux.go index 7f61900308..5a06d902ee 100644 --- a/common/pkg/cgroups/memory_linux.go +++ b/common/pkg/cgroups/memory_linux.go @@ -20,28 +20,11 @@ func getMemoryHandler() *linuxMemHandler { // Apply set the specified constraints. func (c *linuxMemHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - if ctr.cgroup2 { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) - } - path := filepath.Join(cgroupRoot, Memory, ctr.config.Path) - return c.Mem.Set(path, res) -} - -// Create the cgroup. -func (c *linuxMemHandler) Create(ctr *CgroupControl) (bool, error) { - if ctr.cgroup2 { - return false, nil + man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) + if err != nil { + return err } - return ctr.createCgroupDirectory(Memory) -} - -// Destroy the cgroup. -func (c *linuxMemHandler) Destroy(ctr *CgroupControl) error { - return rmDirRecursively(ctr.getCgroupv1Path(Memory)) + return man.Set(res) } // Stat fills a metrics structure with usage stats for the controller. @@ -52,48 +35,25 @@ func (c *linuxMemHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { var memoryRoot string var limitFilename string - if ctr.cgroup2 { - memoryRoot = filepath.Join(cgroupRoot, ctr.config.Path) - limitFilename = "memory.max" - - // Read memory.current - current, err := readFileAsUint64(filepath.Join(memoryRoot, "memory.current")) - if err != nil { - return err - } + memoryRoot = filepath.Join(cgroupRoot, ctr.config.Path) + limitFilename = "memory.max" - // Read inactive_file from memory.stat - inactiveFile, err := readFileByKeyAsUint64(filepath.Join(memoryRoot, "memory.stat"), "inactive_file") - if err != nil { - return err - } - - // Docker calculation: memory.current - memory.stat['inactive_file'] - memUsage.Usage.Usage = 0 - if inactiveFile < current { - memUsage.Usage.Usage = current - inactiveFile - } - } else { - memoryRoot = ctr.getCgroupv1Path(Memory) - limitFilename = "memory.limit_in_bytes" - - // Read memory.usage_in_bytes - usageInBytes, err := readFileAsUint64(filepath.Join(memoryRoot, "memory.usage_in_bytes")) - if err != nil { - return err - } + // Read memory.current + current, err := readFileAsUint64(filepath.Join(memoryRoot, "memory.current")) + if err != nil { + return err + } - // Read total_inactive_file from memory.stat - totalInactiveFile, err := readFileByKeyAsUint64(filepath.Join(memoryRoot, "memory.stat"), "total_inactive_file") - if err != nil { - return err - } + // Read inactive_file from memory.stat + inactiveFile, err := readFileByKeyAsUint64(filepath.Join(memoryRoot, "memory.stat"), "inactive_file") + if err != nil { + return err + } - // Docker calculation: memory.usage_in_bytes - memory.stat['total_inactive_file'] - memUsage.Usage.Usage = 0 - if totalInactiveFile < usageInBytes { - memUsage.Usage.Usage = usageInBytes - totalInactiveFile - } + // Docker calculation: memory.current - memory.stat['inactive_file'] + memUsage.Usage.Usage = 0 + if inactiveFile < current { + memUsage.Usage.Usage = current - inactiveFile } memUsage.Usage.Limit, err = readFileAsUint64(filepath.Join(memoryRoot, limitFilename)) diff --git a/common/pkg/cgroups/pids_linux.go b/common/pkg/cgroups/pids_linux.go index 82202830e0..f74d80a9bf 100644 --- a/common/pkg/cgroups/pids_linux.go +++ b/common/pkg/cgroups/pids_linux.go @@ -20,29 +20,11 @@ func getPidsHandler() *linuxPidHandler { // Apply set the specified constraints. func (c *linuxPidHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - if ctr.cgroup2 { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) - } - - path := filepath.Join(cgroupRoot, Pids, ctr.config.Path) - return c.Pid.Set(path, res) -} - -// Create the cgroup. -func (c *linuxPidHandler) Create(ctr *CgroupControl) (bool, error) { - if ctr.cgroup2 { - return false, nil + man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) + if err != nil { + return err } - return ctr.createCgroupDirectory(Pids) -} - -// Destroy the cgroup. -func (c *linuxPidHandler) Destroy(ctr *CgroupControl) error { - return rmDirRecursively(ctr.getCgroupv1Path(Pids)) + return man.Set(res) } // Stat fills a metrics structure with usage stats for the controller. @@ -52,12 +34,7 @@ func (c *linuxPidHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { return nil } - var PIDRoot string - if ctr.cgroup2 { - PIDRoot = filepath.Join(cgroupRoot, ctr.config.Path) - } else { - PIDRoot = ctr.getCgroupv1Path(Pids) - } + PIDRoot := filepath.Join(cgroupRoot, ctr.config.Path) current, err := readFileAsUint64(filepath.Join(PIDRoot, "pids.current")) if err != nil { diff --git a/common/pkg/cgroups/systemd_linux.go b/common/pkg/cgroups/systemd_linux.go index c0bc6d9d38..e59a007615 100644 --- a/common/pkg/cgroups/systemd_linux.go +++ b/common/pkg/cgroups/systemd_linux.go @@ -32,18 +32,11 @@ func systemdCreate(resources *cgroups.Resources, path string, c *systemdDbus.Con systemdDbus.PropDescription("cgroup " + name), systemdDbus.PropWants(slice), } - var ioString string - v2, _ := IsCgroup2UnifiedMode() - if v2 { - ioString = "IOAccounting" - } else { - ioString = "BlockIOAccounting" - } pMap := map[string]bool{ "DefaultDependencies": false, "MemoryAccounting": true, "CPUAccounting": true, - ioString: true, + "IOAccounting": true, } if i == 0 { pMap["Delegate"] = true @@ -57,7 +50,7 @@ func systemdCreate(resources *cgroups.Resources, path string, c *systemdDbus.Con properties = append(properties, p) } - uMap, sMap, bMap, iMap, structMap, err := resourcesToProps(resources, v2) + uMap, sMap, bMap, iMap, structMap, err := resourcesToProps(resources) if err != nil { lastError = err continue @@ -150,7 +143,7 @@ func systemdDestroyConn(path string, c *systemdDbus.Conn) error { return nil } -func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[string]string, map[string][]byte, map[string]int64, map[string][]BlkioDev, error) { +func resourcesToProps(res *cgroups.Resources) (map[string]uint64, map[string]string, map[string][]byte, map[string]int64, map[string][]BlkioDev, error) { bMap := make(map[string][]byte) // this array is not used but will be once more resource limits are added sMap := make(map[string]string) @@ -176,13 +169,8 @@ func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[s if res.CpuShares != 0 { // convert from shares to weight. weight only supports 1-10000 - v2, _ := IsCgroup2UnifiedMode() - if v2 { - wt := (1 + ((res.CpuShares-2)*9999)/262142) - uMap["CPUWeight"] = wt - } else { - uMap["CPUShares"] = res.CpuShares - } + wt := (1 + ((res.CpuShares-2)*9999)/262142) + uMap["CPUWeight"] = wt } // CPUSet @@ -212,21 +200,15 @@ func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[s case res.Memory == -1 || res.MemorySwap == -1: swap := -1 uMap["MemorySwapMax"] = uint64(swap) - case v2: + default: // swap max = swap (limit + swap limit) - limit uMap["MemorySwapMax"] = uint64(res.MemorySwap - res.Memory) - default: - uMap["MemorySwapMax"] = uint64(res.MemorySwap) } } // Blkio if res.BlkioWeight > 0 { - if v2 { - uMap["IOWeight"] = uint64(res.BlkioWeight) - } else { - uMap["BlockIOWeight"] = uint64(res.BlkioWeight) - } + uMap["IOWeight"] = uint64(res.BlkioWeight) } // systemd requires the paths to be in the form /dev/{block, char}/major:minor @@ -238,11 +220,7 @@ func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[s Device: fmt.Sprintf("/dev/block/%d:%d", entry.Major, entry.Minor), Bytes: entry.Rate, } - if v2 { - structMap["IOReadBandwidthMax"] = append(structMap["IOReadBandwidthMax"], newThrottle) - } else { - structMap["BlockIOReadBandwidth"] = append(structMap["BlockIOReadBandwidth"], newThrottle) - } + structMap["IOReadBandwidthMax"] = append(structMap["IOReadBandwidthMax"], newThrottle) } } @@ -252,11 +230,7 @@ func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[s Device: fmt.Sprintf("/dev/block/%d:%d", entry.Major, entry.Minor), Bytes: entry.Rate, } - if v2 { - structMap["IOWriteBandwidthMax"] = append(structMap["IOWriteBandwidthMax"], newThrottle) - } else { - structMap["BlockIOWriteBandwidth"] = append(structMap["BlockIOWriteBandwidth"], newThrottle) - } + structMap["IOWriteBandwidthMax"] = append(structMap["IOWriteBandwidthMax"], newThrottle) } } @@ -266,11 +240,7 @@ func resourcesToProps(res *cgroups.Resources, v2 bool) (map[string]uint64, map[s Device: fmt.Sprintf("/dev/block/%d:%d", entry.Major, entry.Minor), Bytes: uint64(entry.Weight), } - if v2 { - structMap["IODeviceWeight"] = append(structMap["IODeviceWeight"], newWeight) - } else { - structMap["BlockIODeviceWeight"] = append(structMap["BlockIODeviceWeight"], newWeight) - } + structMap["IODeviceWeight"] = append(structMap["IODeviceWeight"], newWeight) } } diff --git a/common/pkg/cgroups/utils_linux.go b/common/pkg/cgroups/utils_linux.go index a1b18a9695..b1ee60a294 100644 --- a/common/pkg/cgroups/utils_linux.go +++ b/common/pkg/cgroups/utils_linux.go @@ -15,7 +15,6 @@ import ( "github.com/opencontainers/cgroups" "github.com/sirupsen/logrus" - "go.podman.io/storage/pkg/fileutils" "golang.org/x/sys/unix" ) @@ -207,7 +206,7 @@ func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error { } defer f.Close() - unifiedMode, err := IsCgroup2UnifiedMode() + _, err = IsCgroup2UnifiedMode() if err != nil { return err } @@ -221,24 +220,12 @@ func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error { } // root cgroup, skip it - if parts[2] == "/" && (!unifiedMode || parts[1] != "") { + if parts[2] == "/" && parts[1] != "" { continue } cgroupRoot := "/sys/fs/cgroup" - // Special case the unified mount on hybrid cgroup and named hierarchies. - // This works on Fedora 31, but we should really parse the mounts to see - // where the cgroup hierarchy is mounted. - if parts[1] == "" && !unifiedMode { - // If it is not using unified mode, the cgroup v2 hierarchy is - // usually mounted under /sys/fs/cgroup/unified - cgroupRoot = filepath.Join(cgroupRoot, "unified") - - // Ignore the unified mount if it doesn't exist - if err := fileutils.Exists(cgroupRoot); err != nil && os.IsNotExist(err) { - continue - } - } else if parts[1] != "" { + if parts[1] != "" { // Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER. controller := strings.TrimPrefix(parts[1], "name=") cgroupRoot = filepath.Join(cgroupRoot, controller) @@ -292,15 +279,11 @@ var ( // it is running in the root cgroup on a system that uses cgroupv2. func MaybeMoveToSubCgroup() error { maybeMoveToSubCgroupSync.Do(func() { - unifiedMode, err := IsCgroup2UnifiedMode() + _, err := IsCgroup2UnifiedMode() if err != nil { maybeMoveToSubCgroupSyncErr = err return } - if !unifiedMode { - maybeMoveToSubCgroupSyncErr = nil - return - } cgroup, err := GetOwnCgroup() if err != nil { maybeMoveToSubCgroupSyncErr = err diff --git a/common/pkg/cgroupv2/cgroups_linux.go b/common/pkg/cgroupv2/cgroups_linux.go deleted file mode 100644 index b7e1e6aeac..0000000000 --- a/common/pkg/cgroupv2/cgroups_linux.go +++ /dev/null @@ -1,27 +0,0 @@ -package cgroupv2 - -import ( - "sync" - "syscall" - - "golang.org/x/sys/unix" -) - -var ( - isCgroupV2Once sync.Once - isCgroupV2 bool - isCgroupV2Err error -) - -// Enabled returns whether we are running on cgroup v2. -func Enabled() (bool, error) { - isCgroupV2Once.Do(func() { - var st syscall.Statfs_t - if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil { - isCgroupV2, isCgroupV2Err = false, err - } else { - isCgroupV2, isCgroupV2Err = st.Type == unix.CGROUP2_SUPER_MAGIC, nil - } - }) - return isCgroupV2, isCgroupV2Err -} diff --git a/common/pkg/cgroupv2/cgroups_unsupported.go b/common/pkg/cgroupv2/cgroups_unsupported.go deleted file mode 100644 index 8de8e60d80..0000000000 --- a/common/pkg/cgroupv2/cgroups_unsupported.go +++ /dev/null @@ -1,8 +0,0 @@ -//go:build !linux - -package cgroupv2 - -// Enabled returns whether we are running on cgroup v2. -func Enabled() (bool, error) { - return false, nil -} diff --git a/common/pkg/config/default.go b/common/pkg/config/default.go index 54402d1712..ee4dd63791 100644 --- a/common/pkg/config/default.go +++ b/common/pkg/config/default.go @@ -15,7 +15,6 @@ import ( "go.podman.io/common/internal/attributedstring" nettypes "go.podman.io/common/libnetwork/types" "go.podman.io/common/pkg/apparmor" - "go.podman.io/common/pkg/cgroupv2" "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/homedir" "go.podman.io/storage/pkg/unshare" @@ -231,17 +230,12 @@ func defaultConfig() (*Config, error) { } } - cgroupNS := "host" - if cgroup2, _ := cgroupv2.Enabled(); cgroup2 { - cgroupNS = "private" - } - return &Config{ Containers: ContainersConfig{ Annotations: attributedstring.Slice{}, ApparmorProfile: DefaultApparmorProfile, BaseHostsFile: "", - CgroupNS: cgroupNS, + CgroupNS: "private", Cgroups: getDefaultCgroupsMode(), DNSOptions: attributedstring.Slice{}, DNSSearches: attributedstring.Slice{}, @@ -650,12 +644,7 @@ func (c *Config) PidsLimit() int64 { if c.Engine.CgroupManager != SystemdCgroupsManager { return 0 } - cgroup2, _ := cgroupv2.Enabled() - if !cgroup2 { - return 0 - } } - return c.Containers.PidsLimit } diff --git a/common/pkg/config/systemd.go b/common/pkg/config/systemd.go index e7c15b5909..f8e84acece 100644 --- a/common/pkg/config/systemd.go +++ b/common/pkg/config/systemd.go @@ -7,9 +7,7 @@ import ( "path/filepath" "sync" - "go.podman.io/common/pkg/cgroupv2" "go.podman.io/common/pkg/systemd" - "go.podman.io/storage/pkg/unshare" ) var ( @@ -26,11 +24,6 @@ func defaultCgroupManager() string { if !useSystemd() { return CgroupfsCgroupsManager } - enabled, err := cgroupv2.Enabled() - if err == nil && !enabled && unshare.IsRootless() { - return CgroupfsCgroupsManager - } - return SystemdCgroupsManager } diff --git a/common/pkg/sysinfo/sysinfo_linux.go b/common/pkg/sysinfo/sysinfo_linux.go index ea98d49481..752b1bc120 100644 --- a/common/pkg/sysinfo/sysinfo_linux.go +++ b/common/pkg/sysinfo/sysinfo_linux.go @@ -9,7 +9,7 @@ import ( "github.com/opencontainers/cgroups" "github.com/sirupsen/logrus" - "go.podman.io/common/pkg/cgroupv2" + cgroupv2 "go.podman.io/common/pkg/cgroups" "go.podman.io/storage/pkg/fileutils" "golang.org/x/sys/unix" ) @@ -41,7 +41,7 @@ func New(quiet bool) *SysInfo { sysInfo.cgroupCPUInfo = checkCgroupCPU(cgMounts, quiet) sysInfo.cgroupBlkioInfo = checkCgroupBlkioInfo(cgMounts, quiet) sysInfo.cgroupCpusetInfo = checkCgroupCpusetInfo(cgMounts, quiet) - sysInfo.cgroupPids = checkCgroupPids(cgMounts, quiet) + sysInfo.cgroupPids = checkCgroupPids() } _, ok := cgMounts["devices"] @@ -228,22 +228,12 @@ func checkCgroupCpusetInfo(cgMounts map[string]string, quiet bool) cgroupCpusetI } // checkCgroupPids reads the pids information from the pids cgroup mount point. -func checkCgroupPids(cgMounts map[string]string, quiet bool) cgroupPids { - cgroup2, err := cgroupv2.Enabled() +func checkCgroupPids() cgroupPids { + _, err := cgroupv2.IsCgroup2UnifiedMode() if err != nil { logrus.Errorf("Failed to check cgroups version: %v", err) return cgroupPids{} } - if !cgroup2 { - _, ok := cgMounts["pids"] - if !ok { - if !quiet { - logrus.Warn("Unable to find pids cgroup in mounts") - } - return cgroupPids{} - } - } - return cgroupPids{ PidsLimit: true, } diff --git a/common/pkg/systemd/systemd_linux.go b/common/pkg/systemd/systemd_linux.go index a189cfbe05..1d839636aa 100644 --- a/common/pkg/systemd/systemd_linux.go +++ b/common/pkg/systemd/systemd_linux.go @@ -88,14 +88,12 @@ func MovePauseProcessToScope(pausePidPath string) { } if err != nil { - unified, err2 := cgroups.IsCgroup2UnifiedMode() + _, err2 := cgroups.IsCgroup2UnifiedMode() if err2 != nil { logrus.Warnf("Failed to detect if running with cgroup unified: %v", err) } - if RunsOnSystemd() && unified { + if RunsOnSystemd() { logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err) - } else { - logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err) } } } From ddcc4e042428de3a4cd961d6f9d47f123f8f0209 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Thu, 13 Nov 2025 14:19:47 -0500 Subject: [PATCH 2/9] common/pkg/cgroups: Remove linuxCpusetHandler.Create and cpusetCopyFromParent The manual cpusetCopyFromParent function was needed for cgroups v1 to populate cpuset.cpus and cpuset.mems from parent cgroups. In cgroups v2, the kernel automatically handles cpuset inheritance through the cpuset.cpus.effective and cpuset.mems.effective interfaces. When fs2.NewManager().Set() is called with cpuset resources, it directly writes the values to cpuset.cpus and cpuset.mems if provided. If not provided, the kernel ensures child cgroups have valid effective cpusets by automatically inheriting from their parent. Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/cgroups_linux.go | 40 ----------------------------- common/pkg/cgroups/cpuset_linux.go | 6 ----- 2 files changed, 46 deletions(-) diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index 4d7c6c2e84..414ac1563a 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -445,46 +445,6 @@ func cleanString(s string) string { return strings.Trim(s, "\n") } -func cpusetCopyFromParent(path string, cgroupv2 bool) error { - for _, file := range []string{"cpuset.cpus", "cpuset.mems"} { - if _, err := cpusetCopyFileFromParent(path, file, cgroupv2); err != nil { - return err - } - } - return nil -} - -func cpusetCopyFileFromParent(dir, file string, cgroupv2 bool) ([]byte, error) { - if dir == cgroupRoot { - return nil, fmt.Errorf("could not find parent to initialize cpuset %s", file) - } - path := filepath.Join(dir, file) - parentPath := path - if cgroupv2 { - parentPath += ".effective" - } - data, err := os.ReadFile(parentPath) - if err != nil { - // if the file doesn't exist, it is likely that the cpuset controller - // is not enabled in the kernel. - if os.IsNotExist(err) { - return nil, nil - } - return nil, err - } - if strings.Trim(string(data), "\n") != "" { - return data, nil - } - data, err = cpusetCopyFileFromParent(filepath.Dir(dir), file, cgroupv2) - if err != nil { - return nil, err - } - if err := os.WriteFile(path, data, 0o644); err != nil { - return nil, fmt.Errorf("write %s: %w", path, err) - } - return data, nil -} - // SystemCPUUsage returns the system usage for all the cgroups. func SystemCPUUsage() (uint64, error) { _, err := IsCgroup2UnifiedMode() diff --git a/common/pkg/cgroups/cpuset_linux.go b/common/pkg/cgroups/cpuset_linux.go index c03a73623c..d95079abb9 100644 --- a/common/pkg/cgroups/cpuset_linux.go +++ b/common/pkg/cgroups/cpuset_linux.go @@ -27,12 +27,6 @@ func (c *linuxCpusetHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) e return man.Set(res) } -// Create the cgroup. -func (c *linuxCpusetHandler) Create(ctr *CgroupControl) (bool, error) { - path := filepath.Join(cgroupRoot, ctr.config.Path) - return true, cpusetCopyFromParent(path, true) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxCpusetHandler) Stat(_ *CgroupControl, _ *cgroups.Stats) error { return nil From 2a7fc75f3cbab2eef4bebac38b3a112465e8419c Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Fri, 21 Nov 2025 10:41:44 -0500 Subject: [PATCH 3/9] common: controllerHandler.Apply replaced by fs2.NewManager The Update() function previously iterated through handlers calling their Apply() methods. With cgroups v1 removed, all handlers' Apply() implementations were identical - they all called fs2.NewManager().Set(). This commit removes the redundant handler Apply() methods and has Update() call fs2.NewManager().Set() directly. The Update() method now performs the same function more efficiently without the handler indirection. Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/blkio_linux.go | 11 ----------- common/pkg/cgroups/cgroups_linux.go | 11 +++++------ common/pkg/cgroups/cpu_linux.go | 11 ----------- common/pkg/cgroups/cpuset_linux.go | 12 ------------ common/pkg/cgroups/memory_linux.go | 10 ---------- common/pkg/cgroups/pids_linux.go | 10 ---------- 6 files changed, 5 insertions(+), 60 deletions(-) diff --git a/common/pkg/cgroups/blkio_linux.go b/common/pkg/cgroups/blkio_linux.go index c5f085ecd7..98914df015 100644 --- a/common/pkg/cgroups/blkio_linux.go +++ b/common/pkg/cgroups/blkio_linux.go @@ -3,13 +3,11 @@ package cgroups import ( - "path/filepath" "strconv" "strings" "github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups/fs" - "github.com/opencontainers/cgroups/fs2" ) type linuxBlkioHandler struct { @@ -20,15 +18,6 @@ func getBlkioHandler() *linuxBlkioHandler { return &linuxBlkioHandler{} } -// Apply set the specified constraints. -func (c *linuxBlkioHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxBlkioHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { var ioServiceBytesRecursive []cgroups.BlkioStatEntry diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index 414ac1563a..3954d54b6d 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -20,6 +20,7 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/cgroups" + "github.com/opencontainers/cgroups/fs2" "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/unshare" "golang.org/x/sys/unix" @@ -42,7 +43,6 @@ type CgroupControl struct { } type controllerHandler interface { - Apply(*CgroupControl, *cgroups.Resources) error Stat(*CgroupControl, *cgroups.Stats) error } @@ -306,12 +306,11 @@ func (c *CgroupControl) DeleteByPath(path string) error { // Update updates the cgroups. func (c *CgroupControl) Update(resources *cgroups.Resources) error { - for _, h := range handlers { - if err := h.Apply(c, resources); err != nil { - return err - } + man, err := fs2.NewManager(c.config, filepath.Join(cgroupRoot, c.config.Path)) + if err != nil { + return err } - return nil + return man.Set(resources) } // Stat returns usage statistics for the cgroup. diff --git a/common/pkg/cgroups/cpu_linux.go b/common/pkg/cgroups/cpu_linux.go index f89bac87c2..c916e44407 100644 --- a/common/pkg/cgroups/cpu_linux.go +++ b/common/pkg/cgroups/cpu_linux.go @@ -3,12 +3,10 @@ package cgroups import ( - "path/filepath" "strconv" "github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups/fs" - "github.com/opencontainers/cgroups/fs2" ) type linuxCPUHandler struct { @@ -19,15 +17,6 @@ func getCPUHandler() *linuxCPUHandler { return &linuxCPUHandler{} } -// Apply set the specified constraints. -func (c *linuxCPUHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxCPUHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { cpu := cgroups.CpuStats{} diff --git a/common/pkg/cgroups/cpuset_linux.go b/common/pkg/cgroups/cpuset_linux.go index d95079abb9..e202118f44 100644 --- a/common/pkg/cgroups/cpuset_linux.go +++ b/common/pkg/cgroups/cpuset_linux.go @@ -3,11 +3,8 @@ package cgroups import ( - "path/filepath" - "github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups/fs" - "github.com/opencontainers/cgroups/fs2" ) type linuxCpusetHandler struct { @@ -18,15 +15,6 @@ func getCpusetHandler() *linuxCpusetHandler { return &linuxCpusetHandler{} } -// Apply set the specified constraints. -func (c *linuxCpusetHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxCpusetHandler) Stat(_ *CgroupControl, _ *cgroups.Stats) error { return nil diff --git a/common/pkg/cgroups/memory_linux.go b/common/pkg/cgroups/memory_linux.go index 5a06d902ee..2376d29b81 100644 --- a/common/pkg/cgroups/memory_linux.go +++ b/common/pkg/cgroups/memory_linux.go @@ -7,7 +7,6 @@ import ( "github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups/fs" - "github.com/opencontainers/cgroups/fs2" ) type linuxMemHandler struct { @@ -18,15 +17,6 @@ func getMemoryHandler() *linuxMemHandler { return &linuxMemHandler{} } -// Apply set the specified constraints. -func (c *linuxMemHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxMemHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { var err error diff --git a/common/pkg/cgroups/pids_linux.go b/common/pkg/cgroups/pids_linux.go index f74d80a9bf..d7c7b37e07 100644 --- a/common/pkg/cgroups/pids_linux.go +++ b/common/pkg/cgroups/pids_linux.go @@ -7,7 +7,6 @@ import ( "github.com/opencontainers/cgroups" "github.com/opencontainers/cgroups/fs" - "github.com/opencontainers/cgroups/fs2" ) type linuxPidHandler struct { @@ -18,15 +17,6 @@ func getPidsHandler() *linuxPidHandler { return &linuxPidHandler{} } -// Apply set the specified constraints. -func (c *linuxPidHandler) Apply(ctr *CgroupControl, res *cgroups.Resources) error { - man, err := fs2.NewManager(ctr.config, filepath.Join(cgroupRoot, ctr.config.Path)) - if err != nil { - return err - } - return man.Set(res) -} - // Stat fills a metrics structure with usage stats for the controller. func (c *linuxPidHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { if ctr.config.Path == "" { From 2b70717e711b3a277ce431c8c824f4c884c3f570 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 24 Nov 2025 10:37:30 -0500 Subject: [PATCH 4/9] common/pkg/cgroups: Remove unused fs imports and handler fields The github.com/opencontainers/cgroups/fs import and associated struct fields (Blkio, CPU, CPUSet, Mem, Pid) were never used in any of the handler implementations. These were remnants from the cgroups v1 removal. The empty handler structs are retained as they serve as method receivers for implementing the controllerHandler interface. Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/blkio_linux.go | 2 -- common/pkg/cgroups/cpu_linux.go | 2 -- common/pkg/cgroups/cpuset_linux.go | 2 -- common/pkg/cgroups/memory_linux.go | 2 -- common/pkg/cgroups/pids_linux.go | 2 -- 5 files changed, 10 deletions(-) diff --git a/common/pkg/cgroups/blkio_linux.go b/common/pkg/cgroups/blkio_linux.go index 98914df015..82725e33af 100644 --- a/common/pkg/cgroups/blkio_linux.go +++ b/common/pkg/cgroups/blkio_linux.go @@ -7,11 +7,9 @@ import ( "strings" "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs" ) type linuxBlkioHandler struct { - Blkio fs.BlkioGroup } func getBlkioHandler() *linuxBlkioHandler { diff --git a/common/pkg/cgroups/cpu_linux.go b/common/pkg/cgroups/cpu_linux.go index c916e44407..5ea41b64ee 100644 --- a/common/pkg/cgroups/cpu_linux.go +++ b/common/pkg/cgroups/cpu_linux.go @@ -6,11 +6,9 @@ import ( "strconv" "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs" ) type linuxCPUHandler struct { - CPU fs.CpuGroup } func getCPUHandler() *linuxCPUHandler { diff --git a/common/pkg/cgroups/cpuset_linux.go b/common/pkg/cgroups/cpuset_linux.go index e202118f44..15689891ab 100644 --- a/common/pkg/cgroups/cpuset_linux.go +++ b/common/pkg/cgroups/cpuset_linux.go @@ -4,11 +4,9 @@ package cgroups import ( "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs" ) type linuxCpusetHandler struct { - CPUSet fs.CpusetGroup } func getCpusetHandler() *linuxCpusetHandler { diff --git a/common/pkg/cgroups/memory_linux.go b/common/pkg/cgroups/memory_linux.go index 2376d29b81..a49726c5df 100644 --- a/common/pkg/cgroups/memory_linux.go +++ b/common/pkg/cgroups/memory_linux.go @@ -6,11 +6,9 @@ import ( "path/filepath" "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs" ) type linuxMemHandler struct { - Mem fs.MemoryGroup } func getMemoryHandler() *linuxMemHandler { diff --git a/common/pkg/cgroups/pids_linux.go b/common/pkg/cgroups/pids_linux.go index d7c7b37e07..8a9ee508d7 100644 --- a/common/pkg/cgroups/pids_linux.go +++ b/common/pkg/cgroups/pids_linux.go @@ -6,11 +6,9 @@ import ( "path/filepath" "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fs" ) type linuxPidHandler struct { - Pid fs.PidsGroup } func getPidsHandler() *linuxPidHandler { From 40da35874247339e5627c1d7daa450ac96f22857 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 24 Nov 2025 10:40:49 -0500 Subject: [PATCH 5/9] common/pkg/cgroups: Remove unused cpuset handler The cpuset handler only had an empty Stat method that returned nil, providing no functionality. Remove the handler entirely along with the unused CPUset and CPUAcct controller constants which were remnants from cgroups v1. Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/cgroups_linux.go | 5 ----- common/pkg/cgroups/cpuset_linux.go | 19 ------------------- 2 files changed, 24 deletions(-) delete mode 100644 common/pkg/cgroups/cpuset_linux.go diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index 3954d54b6d..fa5e7d85d8 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -50,10 +50,6 @@ const ( cgroupRoot = "/sys/fs/cgroup" // CPU is the cpu controller. CPU = "cpu" - // CPUAcct is the cpuacct controller. - CPUAcct = "cpuacct" - // CPUset is the cpuset controller. - CPUset = "cpuset" // Memory is the memory controller. Memory = "memory" // Pids is the pids controller. @@ -67,7 +63,6 @@ var handlers map[string]controllerHandler func init() { handlers = map[string]controllerHandler{ CPU: getCPUHandler(), - CPUset: getCpusetHandler(), Memory: getMemoryHandler(), Pids: getPidsHandler(), Blkio: getBlkioHandler(), diff --git a/common/pkg/cgroups/cpuset_linux.go b/common/pkg/cgroups/cpuset_linux.go deleted file mode 100644 index 15689891ab..0000000000 --- a/common/pkg/cgroups/cpuset_linux.go +++ /dev/null @@ -1,19 +0,0 @@ -//go:build linux - -package cgroups - -import ( - "github.com/opencontainers/cgroups" -) - -type linuxCpusetHandler struct { -} - -func getCpusetHandler() *linuxCpusetHandler { - return &linuxCpusetHandler{} -} - -// Stat fills a metrics structure with usage stats for the controller. -func (c *linuxCpusetHandler) Stat(_ *CgroupControl, _ *cgroups.Stats) error { - return nil -} From 284865bb97d0904a8d8d1674500abdacbf47fb85 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 24 Nov 2025 14:06:45 -0500 Subject: [PATCH 6/9] common/pkg/cgroups: Replace handler interface with stat functions Replace the controllerHandler interface with a simple statFunc type and convert all handler structs to plain functions. This eliminates unnecessary abstraction since all handlers only implemented a single Stat method. Changes: - Replace controllerHandler interface with statFunc type - Convert linuxBlkioHandler, linuxCPUHandler, linuxMemHandler, and linuxPidHandler structs to blkioStat, cpuStat, memoryStat, and pidsStat functions - Remove init() function and factory methods (getBlkioHandler, etc.) - Simplify handlers map to direct function references - Update Stat() method to call functions directly - Update AvailableControllers signature to use statFunc type Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/blkio_linux.go | 11 ++--------- common/pkg/cgroups/cgroups_linux.go | 25 ++++++++++--------------- common/pkg/cgroups/cpu_linux.go | 11 ++--------- common/pkg/cgroups/memory_linux.go | 11 ++--------- common/pkg/cgroups/pids_linux.go | 11 ++--------- 5 files changed, 18 insertions(+), 51 deletions(-) diff --git a/common/pkg/cgroups/blkio_linux.go b/common/pkg/cgroups/blkio_linux.go index 82725e33af..c12bb09580 100644 --- a/common/pkg/cgroups/blkio_linux.go +++ b/common/pkg/cgroups/blkio_linux.go @@ -9,15 +9,8 @@ import ( "github.com/opencontainers/cgroups" ) -type linuxBlkioHandler struct { -} - -func getBlkioHandler() *linuxBlkioHandler { - return &linuxBlkioHandler{} -} - -// Stat fills a metrics structure with usage stats for the controller. -func (c *linuxBlkioHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { +// blkioStat fills a metrics structure with usage stats for the blkio controller. +func blkioStat(ctr *CgroupControl, m *cgroups.Stats) error { var ioServiceBytesRecursive []cgroups.BlkioStatEntry // more details on the io.stat file format:X https://facebookmicrosites.github.io/cgroup2/docs/io-controller.html diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index fa5e7d85d8..b444aed2d7 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -42,9 +42,8 @@ type CgroupControl struct { systemd bool } -type controllerHandler interface { - Stat(*CgroupControl, *cgroups.Stats) error -} +// statFunc is a function that gathers statistics for a cgroup controller. +type statFunc func(*CgroupControl, *cgroups.Stats) error const ( cgroupRoot = "/sys/fs/cgroup" @@ -58,15 +57,11 @@ const ( Blkio = "blkio" ) -var handlers map[string]controllerHandler - -func init() { - handlers = map[string]controllerHandler{ - CPU: getCPUHandler(), - Memory: getMemoryHandler(), - Pids: getPidsHandler(), - Blkio: getBlkioHandler(), - } +var handlers = map[string]statFunc{ + CPU: cpuStat, + Memory: memoryStat, + Pids: pidsStat, + Blkio: blkioStat, } // getAvailableControllers get the available controllers. @@ -95,7 +90,7 @@ func getAvailableControllers() ([]string, error) { } // AvailableControllers get string:bool map of all the available controllers. -func AvailableControllers(exclude map[string]controllerHandler) ([]string, error) { +func AvailableControllers(exclude map[string]statFunc) ([]string, error) { return getAvailableControllers() } @@ -312,8 +307,8 @@ func (c *CgroupControl) Update(resources *cgroups.Resources) error { func (c *CgroupControl) Stat() (*cgroups.Stats, error) { m := cgroups.Stats{} found := false - for _, h := range handlers { - if err := h.Stat(c, &m); err != nil { + for _, statFunc := range handlers { + if err := statFunc(c, &m); err != nil { if !errors.Is(err, os.ErrNotExist) { return nil, err } diff --git a/common/pkg/cgroups/cpu_linux.go b/common/pkg/cgroups/cpu_linux.go index 5ea41b64ee..ced7239987 100644 --- a/common/pkg/cgroups/cpu_linux.go +++ b/common/pkg/cgroups/cpu_linux.go @@ -8,15 +8,8 @@ import ( "github.com/opencontainers/cgroups" ) -type linuxCPUHandler struct { -} - -func getCPUHandler() *linuxCPUHandler { - return &linuxCPUHandler{} -} - -// Stat fills a metrics structure with usage stats for the controller. -func (c *linuxCPUHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { +// cpuStat fills a metrics structure with usage stats for the cpu controller. +func cpuStat(ctr *CgroupControl, m *cgroups.Stats) error { cpu := cgroups.CpuStats{} values, err := readCgroup2MapFile(ctr, "cpu.stat") if err != nil { diff --git a/common/pkg/cgroups/memory_linux.go b/common/pkg/cgroups/memory_linux.go index a49726c5df..52cf606efe 100644 --- a/common/pkg/cgroups/memory_linux.go +++ b/common/pkg/cgroups/memory_linux.go @@ -8,15 +8,8 @@ import ( "github.com/opencontainers/cgroups" ) -type linuxMemHandler struct { -} - -func getMemoryHandler() *linuxMemHandler { - return &linuxMemHandler{} -} - -// Stat fills a metrics structure with usage stats for the controller. -func (c *linuxMemHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { +// memoryStat fills a metrics structure with usage stats for the memory controller. +func memoryStat(ctr *CgroupControl, m *cgroups.Stats) error { var err error memUsage := cgroups.MemoryStats{} diff --git a/common/pkg/cgroups/pids_linux.go b/common/pkg/cgroups/pids_linux.go index 8a9ee508d7..4b29a34ea9 100644 --- a/common/pkg/cgroups/pids_linux.go +++ b/common/pkg/cgroups/pids_linux.go @@ -8,15 +8,8 @@ import ( "github.com/opencontainers/cgroups" ) -type linuxPidHandler struct { -} - -func getPidsHandler() *linuxPidHandler { - return &linuxPidHandler{} -} - -// Stat fills a metrics structure with usage stats for the controller. -func (c *linuxPidHandler) Stat(ctr *CgroupControl, m *cgroups.Stats) error { +// pidsStat fills a metrics structure with usage stats for the pids controller. +func pidsStat(ctr *CgroupControl, m *cgroups.Stats) error { if ctr.config.Path == "" { // nothing we can do to retrieve the pids.current path return nil From ae262c8ce0a53a4ae7027178841c86135d7a8e4c Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 24 Nov 2025 14:11:43 -0500 Subject: [PATCH 7/9] vendor: remove unused files from github.com/opencontainers/cgroups Signed-off-by: Lokesh Mandvekar --- .../opencontainers/cgroups/fs/blkio.go | 310 --------------- .../opencontainers/cgroups/fs/cpu.go | 181 --------- .../opencontainers/cgroups/fs/cpuacct.go | 162 -------- .../opencontainers/cgroups/fs/cpuset.go | 276 -------------- .../opencontainers/cgroups/fs/devices.go | 38 -- .../opencontainers/cgroups/fs/error.go | 15 - .../opencontainers/cgroups/fs/freezer.go | 157 -------- .../opencontainers/cgroups/fs/fs.go | 294 --------------- .../opencontainers/cgroups/fs/hugetlb.go | 83 ---- .../opencontainers/cgroups/fs/memory.go | 356 ------------------ .../opencontainers/cgroups/fs/name.go | 30 -- .../opencontainers/cgroups/fs/net_cls.go | 31 -- .../opencontainers/cgroups/fs/net_prio.go | 29 -- .../opencontainers/cgroups/fs/paths.go | 169 --------- .../opencontainers/cgroups/fs/perf_event.go | 23 -- .../opencontainers/cgroups/fs/pids.go | 66 ---- .../opencontainers/cgroups/fs/rdma.go | 24 -- vendor/modules.txt | 1 - 18 files changed, 2245 deletions(-) delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/blkio.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpu.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpuacct.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/cpuset.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/devices.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/error.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/freezer.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/fs.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/hugetlb.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/memory.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/name.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/net_cls.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/net_prio.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/paths.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/perf_event.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/pids.go delete mode 100644 vendor/github.com/opencontainers/cgroups/fs/rdma.go diff --git a/vendor/github.com/opencontainers/cgroups/fs/blkio.go b/vendor/github.com/opencontainers/cgroups/fs/blkio.go deleted file mode 100644 index f3c4c5cf81..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/blkio.go +++ /dev/null @@ -1,310 +0,0 @@ -package fs - -import ( - "bufio" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/opencontainers/cgroups" -) - -type BlkioGroup struct { - weightFilename string - weightDeviceFilename string -} - -func (s *BlkioGroup) Name() string { - return "blkio" -} - -func (s *BlkioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *BlkioGroup) Set(path string, r *cgroups.Resources) error { - s.detectWeightFilenames(path) - if r.BlkioWeight != 0 { - if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { - return err - } - } - - if r.BlkioLeafWeight != 0 { - if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil { - return err - } - } - for _, wd := range r.BlkioWeightDevice { - if wd.Weight != 0 { - if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { - return err - } - } - if wd.LeafWeight != 0 { - if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { - return err - } - } - } - for _, td := range r.BlkioThrottleReadBpsDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteBpsDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleReadIOPSDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteIOPSDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { - return err - } - } - - return nil -} - -/* -examples: - - blkio.sectors - 8:0 6792 - - blkio.io_service_bytes - 8:0 Read 1282048 - 8:0 Write 2195456 - 8:0 Sync 2195456 - 8:0 Async 1282048 - 8:0 Total 3477504 - Total 3477504 - - blkio.io_serviced - 8:0 Read 124 - 8:0 Write 104 - 8:0 Sync 104 - 8:0 Async 124 - 8:0 Total 228 - Total 228 - - blkio.io_queued - 8:0 Read 0 - 8:0 Write 0 - 8:0 Sync 0 - 8:0 Async 0 - 8:0 Total 0 - Total 0 -*/ - -func splitBlkioStatLine(r rune) bool { - return r == ' ' || r == ':' -} - -func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) { - var blkioStats []cgroups.BlkioStatEntry - f, err := cgroups.OpenFile(dir, file, os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return blkioStats, nil - } - return nil, err - } - defer f.Close() - - sc := bufio.NewScanner(f) - for sc.Scan() { - // format: dev type amount - fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) - if len(fields) < 3 { - if len(fields) == 2 && fields[0] == "Total" { - // skip total line - continue - } else { - return nil, malformedLine(dir, file, sc.Text()) - } - } - - v, err := strconv.ParseUint(fields[0], 10, 64) - if err != nil { - return nil, &parseError{Path: dir, File: file, Err: err} - } - major := v - - v, err = strconv.ParseUint(fields[1], 10, 64) - if err != nil { - return nil, &parseError{Path: dir, File: file, Err: err} - } - minor := v - - op := "" - valueField := 2 - if len(fields) == 4 { - op = fields[2] - valueField = 3 - } - v, err = strconv.ParseUint(fields[valueField], 10, 64) - if err != nil { - return nil, &parseError{Path: dir, File: file, Err: err} - } - blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) - } - if err := sc.Err(); err != nil { - return nil, &parseError{Path: dir, File: file, Err: err} - } - - return blkioStats, nil -} - -func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { - type blkioStatInfo struct { - filename string - blkioStatEntriesPtr *[]cgroups.BlkioStatEntry - } - bfqDebugStats := []blkioStatInfo{ - { - filename: "blkio.bfq.sectors_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, - }, - { - filename: "blkio.bfq.io_service_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, - }, - { - filename: "blkio.bfq.io_wait_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, - }, - { - filename: "blkio.bfq.io_merged_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, - }, - { - filename: "blkio.bfq.io_queued_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, - }, - { - filename: "blkio.bfq.time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, - }, - { - filename: "blkio.bfq.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.bfq.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - bfqStats := []blkioStatInfo{ - { - filename: "blkio.bfq.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.bfq.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - cfqStats := []blkioStatInfo{ - { - filename: "blkio.sectors_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, - }, - { - filename: "blkio.io_service_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, - }, - { - filename: "blkio.io_wait_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, - }, - { - filename: "blkio.io_merged_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, - }, - { - filename: "blkio.io_queued_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, - }, - { - filename: "blkio.time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, - }, - { - filename: "blkio.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - throttleRecursiveStats := []blkioStatInfo{ - { - filename: "blkio.throttle.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.throttle.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - baseStats := []blkioStatInfo{ - { - filename: "blkio.throttle.io_serviced", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.throttle.io_service_bytes", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - orderedStats := [][]blkioStatInfo{ - bfqDebugStats, - bfqStats, - cfqStats, - throttleRecursiveStats, - baseStats, - } - - var blkioStats []cgroups.BlkioStatEntry - var err error - - for _, statGroup := range orderedStats { - for i, statInfo := range statGroup { - if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil { - // if error occurs on first file, move to next group - if i == 0 { - break - } - return err - } - *statInfo.blkioStatEntriesPtr = blkioStats - // finish if all stats are gathered - if i == len(statGroup)-1 { - return nil - } - } - } - return nil -} - -func (s *BlkioGroup) detectWeightFilenames(path string) { - if s.weightFilename != "" { - // Already detected. - return - } - if cgroups.PathExists(filepath.Join(path, "blkio.weight")) { - s.weightFilename = "blkio.weight" - s.weightDeviceFilename = "blkio.weight_device" - } else { - s.weightFilename = "blkio.bfq.weight" - s.weightDeviceFilename = "blkio.bfq.weight_device" - } -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/cgroups/fs/cpu.go deleted file mode 100644 index 3e05788a3f..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/cpu.go +++ /dev/null @@ -1,181 +0,0 @@ -package fs - -import ( - "bufio" - "errors" - "fmt" - "os" - "strconv" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" - "golang.org/x/sys/unix" -) - -type CpuGroup struct{} - -func (s *CpuGroup) Name() string { - return "cpu" -} - -func (s *CpuGroup) Apply(path string, r *cgroups.Resources, pid int) error { - if err := os.MkdirAll(path, 0o755); err != nil { - return err - } - // We should set the real-Time group scheduling settings before moving - // in the process because if the process is already in SCHED_RR mode - // and no RT bandwidth is set, adding it will fail. - if err := s.SetRtSched(path, r); err != nil { - return err - } - // Since we are not using apply(), we need to place the pid - // into the procs file. - return cgroups.WriteCgroupProc(path, pid) -} - -func (s *CpuGroup) SetRtSched(path string, r *cgroups.Resources) error { - var period string - if r.CpuRtPeriod != 0 { - period = strconv.FormatUint(r.CpuRtPeriod, 10) - if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { - // The values of cpu.rt_period_us and cpu.rt_runtime_us - // are inter-dependent and need to be set in a proper order. - // If the kernel rejects the new period value with EINVAL - // and the new runtime value is also being set, let's - // ignore the error for now and retry later. - if !errors.Is(err, unix.EINVAL) || r.CpuRtRuntime == 0 { - return err - } - } else { - period = "" - } - } - if r.CpuRtRuntime != 0 { - if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil { - return err - } - if period != "" { - if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { - return err - } - } - } - return nil -} - -func (s *CpuGroup) Set(path string, r *cgroups.Resources) error { - if r.CpuShares != 0 { - shares := r.CpuShares - if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil { - return err - } - // read it back - sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares") - if err != nil { - return err - } - // ... and check - if shares > sharesRead { - return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead) - } else if shares < sharesRead { - return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead) - } - } - - var period string - if r.CpuPeriod != 0 { - period = strconv.FormatUint(r.CpuPeriod, 10) - if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { - // Sometimes when the period to be set is smaller - // than the current one, it is rejected by the kernel - // (EINVAL) as old_quota/new_period exceeds the parent - // cgroup quota limit. If this happens and the quota is - // going to be set, ignore the error for now and retry - // after setting the quota. - if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { - return err - } - } else { - period = "" - } - } - - var burst string - if r.CpuBurst != nil { - burst = strconv.FormatUint(*r.CpuBurst, 10) - if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { - if errors.Is(err, unix.ENOENT) { - // If CPU burst knob is not available (e.g. - // older kernel), ignore it. - burst = "" - } else { - // Sometimes when the burst to be set is larger - // than the current one, it is rejected by the kernel - // (EINVAL) as old_quota/new_burst exceeds the parent - // cgroup quota limit. If this happens and the quota is - // going to be set, ignore the error for now and retry - // after setting the quota. - if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { - return err - } - } - } else { - burst = "" - } - } - if r.CpuQuota != 0 { - if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { - return err - } - if period != "" { - if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil { - return err - } - } - if burst != "" { - if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { - return err - } - } - } - - if r.CPUIdle != nil { - idle := strconv.FormatInt(*r.CPUIdle, 10) - if err := cgroups.WriteFile(path, "cpu.idle", idle); err != nil { - return err - } - } - - return s.SetRtSched(path, r) -} - -func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { - const file = "cpu.stat" - f, err := cgroups.OpenFile(path, file, os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer f.Close() - - sc := bufio.NewScanner(f) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return &parseError{Path: path, File: file, Err: err} - } - switch t { - case "nr_periods": - stats.CpuStats.ThrottlingData.Periods = v - - case "nr_throttled": - stats.CpuStats.ThrottlingData.ThrottledPeriods = v - - case "throttled_time": - stats.CpuStats.ThrottlingData.ThrottledTime = v - } - } - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go b/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go deleted file mode 100644 index bde25b0759..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/cpuacct.go +++ /dev/null @@ -1,162 +0,0 @@ -package fs - -import ( - "bufio" - "os" - "strconv" - "strings" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -const ( - nsInSec = 1000000000 - - // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and - // on Linux it's a constant which is safe to be hard coded, - // so we can avoid using cgo here. For details, see: - // https://github.com/containerd/cgroups/pull/12 - clockTicks uint64 = 100 -) - -type CpuacctGroup struct{} - -func (s *CpuacctGroup) Name() string { - return "cpuacct" -} - -func (s *CpuacctGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *CpuacctGroup) Set(_ string, _ *cgroups.Resources) error { - return nil -} - -func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { - if !cgroups.PathExists(path) { - return nil - } - userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) - if err != nil { - return err - } - - totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage") - if err != nil { - return err - } - - percpuUsage, err := getPercpuUsage(path) - if err != nil { - return err - } - - percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path) - if err != nil { - return err - } - - stats.CpuStats.CpuUsage.TotalUsage = totalUsage - stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage - stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode - stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode - stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage - stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage - return nil -} - -// Returns user and kernel usage breakdown in nanoseconds. -func getCpuUsageBreakdown(path string) (uint64, uint64, error) { - var userModeUsage, kernelModeUsage uint64 - const ( - userField = "user" - systemField = "system" - file = "cpuacct.stat" - ) - - // Expected format: - // user - // system - data, err := cgroups.ReadFile(path, file) - if err != nil { - return 0, 0, err - } - - fields := strings.Fields(data) - if len(fields) < 4 || fields[0] != userField || fields[2] != systemField { - return 0, 0, malformedLine(path, file, data) - } - if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { - return 0, 0, &parseError{Path: path, File: file, Err: err} - } - if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { - return 0, 0, &parseError{Path: path, File: file, Err: err} - } - - return (userModeUsage * nsInSec) / clockTicks, (kernelModeUsage * nsInSec) / clockTicks, nil -} - -func getPercpuUsage(path string) ([]uint64, error) { - const file = "cpuacct.usage_percpu" - percpuUsage := []uint64{} - data, err := cgroups.ReadFile(path, file) - if err != nil { - return percpuUsage, err - } - for _, value := range strings.Fields(data) { - value, err := strconv.ParseUint(value, 10, 64) - if err != nil { - return percpuUsage, &parseError{Path: path, File: file, Err: err} - } - percpuUsage = append(percpuUsage, value) - } - return percpuUsage, nil -} - -func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) { - usageKernelMode := []uint64{} - usageUserMode := []uint64{} - const file = "cpuacct.usage_all" - - fd, err := cgroups.OpenFile(path, file, os.O_RDONLY) - if os.IsNotExist(err) { - return usageKernelMode, usageUserMode, nil - } else if err != nil { - return nil, nil, err - } - defer fd.Close() - - scanner := bufio.NewScanner(fd) - scanner.Scan() // Read header line. - const want = "cpu user system" - if hdr := scanner.Text(); !strings.HasPrefix(hdr, want) { - return nil, nil, malformedLine(path, file, hdr) - } - - for scanner.Scan() { - // Each line is: cpu user system. Keep N at 4 to ignore extra fields. - fields := strings.SplitN(scanner.Text(), " ", 4) - if len(fields) < 3 { - continue - } - - user, err := strconv.ParseUint(fields[1], 10, 64) - if err != nil { - return nil, nil, &parseError{Path: path, File: file, Err: err} - } - usageUserMode = append(usageUserMode, user) - - kernel, err := strconv.ParseUint(fields[2], 10, 64) - if err != nil { - return nil, nil, &parseError{Path: path, File: file, Err: err} - } - usageKernelMode = append(usageKernelMode, kernel) - } - if err := scanner.Err(); err != nil { - return nil, nil, &parseError{Path: path, File: file, Err: err} - } - - return usageKernelMode, usageUserMode, nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/cpuset.go b/vendor/github.com/opencontainers/cgroups/fs/cpuset.go deleted file mode 100644 index ef6ff7da30..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/cpuset.go +++ /dev/null @@ -1,276 +0,0 @@ -package fs - -import ( - "errors" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - - "golang.org/x/sys/unix" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -var ( - cpusetLock sync.Mutex - cpusetPrefix = "cpuset." - cpusetFastPath bool -) - -func cpusetFile(path string, name string) string { - cpusetLock.Lock() - defer cpusetLock.Unlock() - - // Only the v1 cpuset cgroup is allowed to mount with noprefix. - // See kernel source: https://github.com/torvalds/linux/blob/2e1b3cc9d7f790145a80cb705b168f05dab65df2/kernel/cgroup/cgroup-v1.c#L1070 - // Cpuset cannot be mounted with and without prefix simultaneously. - // Commonly used in Android environments. - - if cpusetFastPath { - return cpusetPrefix + name - } - - err := unix.Access(filepath.Join(path, cpusetPrefix+name), unix.F_OK) - if err == nil { - // Use the fast path only if we can access one type of mount for cpuset already - cpusetFastPath = true - } else { - err = unix.Access(filepath.Join(path, name), unix.F_OK) - if err == nil { - cpusetPrefix = "" - cpusetFastPath = true - } - } - - return cpusetPrefix + name -} - -type CpusetGroup struct{} - -func (s *CpusetGroup) Name() string { - return "cpuset" -} - -func (s *CpusetGroup) Apply(path string, r *cgroups.Resources, pid int) error { - return s.ApplyDir(path, r, pid) -} - -func (s *CpusetGroup) Set(path string, r *cgroups.Resources) error { - if r.CpusetCpus != "" { - if err := cgroups.WriteFile(path, cpusetFile(path, "cpus"), r.CpusetCpus); err != nil { - return err - } - } - if r.CpusetMems != "" { - if err := cgroups.WriteFile(path, cpusetFile(path, "mems"), r.CpusetMems); err != nil { - return err - } - } - return nil -} - -func getCpusetStat(path string, file string) ([]uint16, error) { - var extracted []uint16 - fileContent, err := fscommon.GetCgroupParamString(path, file) - if err != nil { - return extracted, err - } - if len(fileContent) == 0 { - return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")} - } - - for _, s := range strings.Split(fileContent, ",") { - fromStr, toStr, ok := strings.Cut(s, "-") - if ok { - from, err := strconv.ParseUint(fromStr, 10, 16) - if err != nil { - return extracted, &parseError{Path: path, File: file, Err: err} - } - to, err := strconv.ParseUint(toStr, 10, 16) - if err != nil { - return extracted, &parseError{Path: path, File: file, Err: err} - } - if from > to { - return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, from > to")} - } - for i := from; i <= to; i++ { - extracted = append(extracted, uint16(i)) - } - } else { - value, err := strconv.ParseUint(s, 10, 16) - if err != nil { - return extracted, &parseError{Path: path, File: file, Err: err} - } - extracted = append(extracted, uint16(value)) - } - } - - return extracted, nil -} - -func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { - var err error - - stats.CPUSetStats.CPUs, err = getCpusetStat(path, cpusetFile(path, "cpus")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "cpu_exclusive")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.Mems, err = getCpusetStat(path, cpusetFile(path, "mems")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "mem_hardwall")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "mem_exclusive")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_migrate")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_spread_page")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_spread_slab")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "memory_pressure")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, cpusetFile(path, "sched_load_balance")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, cpusetFile(path, "sched_relax_domain_level")) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - return nil -} - -func (s *CpusetGroup) ApplyDir(dir string, r *cgroups.Resources, pid int) error { - // This might happen if we have no cpuset cgroup mounted. - // Just do nothing and don't fail. - if dir == "" { - return nil - } - // 'ensureParent' start with parent because we don't want to - // explicitly inherit from parent, it could conflict with - // 'cpuset.cpu_exclusive'. - if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil { - return err - } - if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) { - return err - } - // We didn't inherit cpuset configs from parent, but we have - // to ensure cpuset configs are set before moving task into the - // cgroup. - // The logic is, if user specified cpuset configs, use these - // specified configs, otherwise, inherit from parent. This makes - // cpuset configs work correctly with 'cpuset.cpu_exclusive', and - // keep backward compatibility. - if err := s.ensureCpusAndMems(dir, r); err != nil { - return err - } - // Since we are not using apply(), we need to place the pid - // into the procs file. - return cgroups.WriteCgroupProc(dir, pid) -} - -func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) { - if cpus, err = cgroups.ReadFile(parent, cpusetFile(parent, "cpus")); err != nil { - return - } - if mems, err = cgroups.ReadFile(parent, cpusetFile(parent, "mems")); err != nil { - return - } - return cpus, mems, nil -} - -// cpusetEnsureParent makes sure that the parent directories of current -// are created and populated with the proper cpus and mems files copied -// from their respective parent. It does that recursively, starting from -// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). -func cpusetEnsureParent(current string) error { - var st unix.Statfs_t - - parent := filepath.Dir(current) - err := unix.Statfs(parent, &st) - if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { - return nil - } - // Treat non-existing directory as cgroupfs as it will be created, - // and the root cpuset directory obviously exists. - if err != nil && err != unix.ENOENT { - return &os.PathError{Op: "statfs", Path: parent, Err: err} - } - - if err := cpusetEnsureParent(parent); err != nil { - return err - } - if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) { - return err - } - return cpusetCopyIfNeeded(current, parent) -} - -// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent -// directory to the current directory if the file's contents are 0 -func cpusetCopyIfNeeded(current, parent string) error { - currentCpus, currentMems, err := getCpusetSubsystemSettings(current) - if err != nil { - return err - } - parentCpus, parentMems, err := getCpusetSubsystemSettings(parent) - if err != nil { - return err - } - - if isEmptyCpuset(currentCpus) { - if err := cgroups.WriteFile(current, cpusetFile(current, "cpus"), parentCpus); err != nil { - return err - } - } - if isEmptyCpuset(currentMems) { - if err := cgroups.WriteFile(current, cpusetFile(current, "mems"), parentMems); err != nil { - return err - } - } - return nil -} - -func isEmptyCpuset(str string) bool { - return str == "" || str == "\n" -} - -func (s *CpusetGroup) ensureCpusAndMems(path string, r *cgroups.Resources) error { - if err := s.Set(path, r); err != nil { - return err - } - return cpusetCopyIfNeeded(path, filepath.Dir(path)) -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/devices.go b/vendor/github.com/opencontainers/cgroups/fs/devices.go deleted file mode 100644 index 26483ecb7d..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/devices.go +++ /dev/null @@ -1,38 +0,0 @@ -package fs - -import ( - "github.com/opencontainers/cgroups" -) - -type DevicesGroup struct{} - -func (s *DevicesGroup) Name() string { - return "devices" -} - -func (s *DevicesGroup) Apply(path string, r *cgroups.Resources, pid int) error { - if r.SkipDevices { - return nil - } - if path == "" { - // Return error here, since devices cgroup - // is a hard requirement for container's security. - return errSubsystemDoesNotExist - } - - return apply(path, pid) -} - -func (s *DevicesGroup) Set(path string, r *cgroups.Resources) error { - if cgroups.DevicesSetV1 == nil { - if len(r.Devices) == 0 { - return nil - } - return cgroups.ErrDevicesUnsupported - } - return cgroups.DevicesSetV1(path, r) -} - -func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/error.go b/vendor/github.com/opencontainers/cgroups/fs/error.go deleted file mode 100644 index f13033e3d8..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/error.go +++ /dev/null @@ -1,15 +0,0 @@ -package fs - -import ( - "fmt" - - "github.com/opencontainers/cgroups/fscommon" -) - -type parseError = fscommon.ParseError - -// malformedLine is used by all cgroupfs file parsers that expect a line -// in a particular format but get some garbage instead. -func malformedLine(path, file, line string) error { - return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)} -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/cgroups/fs/freezer.go deleted file mode 100644 index fe0f0dde48..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/freezer.go +++ /dev/null @@ -1,157 +0,0 @@ -package fs - -import ( - "errors" - "fmt" - "os" - "strings" - "time" - - "github.com/opencontainers/cgroups" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -type FreezerGroup struct{} - -func (s *FreezerGroup) Name() string { - return "freezer" -} - -func (s *FreezerGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *FreezerGroup) Set(path string, r *cgroups.Resources) (Err error) { - switch r.Freezer { - case cgroups.Frozen: - defer func() { - if Err != nil { - // Freezing failed, and it is bad and dangerous - // to leave the cgroup in FROZEN or FREEZING - // state, so (try to) thaw it back. - _ = cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) - } - }() - - // As per older kernel docs (freezer-subsystem.txt before - // kernel commit ef9fe980c6fcc1821), if FREEZING is seen, - // userspace should either retry or thaw. While current - // kernel cgroup v1 docs no longer mention a need to retry, - // even a recent kernel (v5.4, Ubuntu 20.04) can't reliably - // freeze a cgroup v1 while new processes keep appearing in it - // (either via fork/clone or by writing new PIDs to - // cgroup.procs). - // - // The numbers below are empirically chosen to have a decent - // chance to succeed in various scenarios ("runc pause/unpause - // with parallel runc exec" and "bare freeze/unfreeze on a very - // slow system"), tested on RHEL7 and Ubuntu 20.04 kernels. - // - // Adding any amount of sleep in between retries did not - // increase the chances of successful freeze in "pause/unpause - // with parallel exec" reproducer. OTOH, adding an occasional - // sleep helped for the case where the system is extremely slow - // (CentOS 7 VM on GHA CI). - // - // Alas, this is still a game of chances, since the real fix - // belong to the kernel (cgroup v2 do not have this bug). - - for i := range 1000 { - if i%50 == 49 { - // Occasional thaw and sleep improves - // the chances to succeed in freezing - // in case new processes keep appearing - // in the cgroup. - _ = cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) - time.Sleep(10 * time.Millisecond) - } - - if err := cgroups.WriteFile(path, "freezer.state", string(cgroups.Frozen)); err != nil { - return err - } - - if i%25 == 24 { - // Occasional short sleep before reading - // the state back also improves the chances to - // succeed in freezing in case of a very slow - // system. - time.Sleep(10 * time.Microsecond) - } - state, err := cgroups.ReadFile(path, "freezer.state") - if err != nil { - return err - } - state = strings.TrimSpace(state) - switch state { - case "FREEZING": - continue - case string(cgroups.Frozen): - if i > 1 { - logrus.Debugf("frozen after %d retries", i) - } - return nil - default: - // should never happen - return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state)) - } - } - // Despite our best efforts, it got stuck in FREEZING. - return errors.New("unable to freeze") - case cgroups.Thawed: - return cgroups.WriteFile(path, "freezer.state", string(cgroups.Thawed)) - case cgroups.Undefined: - return nil - default: - return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer)) - } -} - -func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} - -func (s *FreezerGroup) GetState(path string) (cgroups.FreezerState, error) { - for { - state, err := cgroups.ReadFile(path, "freezer.state") - if err != nil { - // If the kernel is too old, then we just treat the freezer as - // being in an "undefined" state. - if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) { - err = nil - } - return cgroups.Undefined, err - } - switch strings.TrimSpace(state) { - case "THAWED": - return cgroups.Thawed, nil - case "FROZEN": - // Find out whether the cgroup is frozen directly, - // or indirectly via an ancestor. - self, err := cgroups.ReadFile(path, "freezer.self_freezing") - if err != nil { - // If the kernel is too old, then we just treat - // it as being frozen. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) { - err = nil - } - return cgroups.Frozen, err - } - switch self { - case "0\n": - return cgroups.Thawed, nil - case "1\n": - return cgroups.Frozen, nil - default: - return cgroups.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self) - } - case "FREEZING": - // Make sure we get a stable freezer state, so retry if the cgroup - // is still undergoing freezing. This should be a temporary delay. - time.Sleep(1 * time.Millisecond) - continue - default: - return cgroups.Undefined, fmt.Errorf("unknown freezer.state %q", state) - } - } -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/fs.go b/vendor/github.com/opencontainers/cgroups/fs/fs.go deleted file mode 100644 index 625931193e..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/fs.go +++ /dev/null @@ -1,294 +0,0 @@ -package fs - -import ( - "errors" - "fmt" - "os" - "path" - "strings" - "sync" - - "golang.org/x/sys/unix" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -var subsystems = []subsystem{ - &CpusetGroup{}, - &DevicesGroup{}, - &MemoryGroup{}, - &CpuGroup{}, - &CpuacctGroup{}, - &PidsGroup{}, - &BlkioGroup{}, - &HugetlbGroup{}, - &NetClsGroup{}, - &NetPrioGroup{}, - &PerfEventGroup{}, - &FreezerGroup{}, - &RdmaGroup{}, - &NameGroup{GroupName: "name=systemd", Join: true}, - &NameGroup{GroupName: "misc", Join: true}, -} - -var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") - -func init() { - // If using cgroups-hybrid mode then add a "" controller indicating - // it should join the cgroups v2. - if cgroups.IsCgroup2HybridMode() { - subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true}) - } -} - -type subsystem interface { - // Name returns the name of the subsystem. - Name() string - // GetStats fills in the stats for the subsystem. - GetStats(path string, stats *cgroups.Stats) error - // Apply creates and joins a cgroup, adding pid into it. Some - // subsystems use resources to pre-configure the cgroup parents - // before creating or joining it. - Apply(path string, r *cgroups.Resources, pid int) error - // Set sets the cgroup resources. - Set(path string, r *cgroups.Resources) error -} - -type Manager struct { - mu sync.Mutex - cgroups *cgroups.Cgroup - paths map[string]string -} - -func NewManager(cg *cgroups.Cgroup, paths map[string]string) (*Manager, error) { - // Some v1 controllers (cpu, cpuset, and devices) expect - // cgroups.Resources to not be nil in Apply. - if cg.Resources == nil { - return nil, errors.New("cgroup v1 manager needs cgroups.Resources to be set during manager creation") - } - if cg.Resources.Unified != nil { - return nil, cgroups.ErrV1NoUnified - } - - if paths == nil { - var err error - paths, err = initPaths(cg) - if err != nil { - return nil, err - } - } - - return &Manager{ - cgroups: cg, - paths: paths, - }, nil -} - -// isIgnorableError returns whether err is a permission error (in the loose -// sense of the word). This includes EROFS (which for an unprivileged user is -// basically a permission error) and EACCES (for similar reasons) as well as -// the normal EPERM. -func isIgnorableError(rootless bool, err error) bool { - // We do not ignore errors if we are root. - if !rootless { - return false - } - // Is it an ordinary EPERM? - if errors.Is(err, os.ErrPermission) { - return true - } - // Handle some specific syscall errors. - var errno unix.Errno - if errors.As(err, &errno) { - return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES - } - return false -} - -func (m *Manager) Apply(pid int) (retErr error) { - m.mu.Lock() - defer m.mu.Unlock() - - c := m.cgroups - - for _, sys := range subsystems { - name := sys.Name() - p, ok := m.paths[name] - if !ok { - continue - } - - if err := sys.Apply(p, c.Resources, pid); err != nil { - // In the case of rootless (including euid=0 in userns), where an - // explicit cgroup path hasn't been set, we don't bail on error in - // case of permission problems here, but do delete the path from - // the m.paths map, since it is either non-existent and could not - // be created, or the pid could not be added to it. - // - // Cases where limits for the subsystem have been set are handled - // later by Set, which fails with a friendly error (see - // if path == "" in Set). - if isIgnorableError(c.Rootless, err) && c.Path == "" { - retErr = cgroups.ErrRootless - delete(m.paths, name) - continue - } - return err - } - - } - return retErr -} - -// AddPid adds a process with a given pid to an existing cgroup. -// The subcgroup argument is either empty, or a path relative to -// a cgroup under under the manager's cgroup. -func (m *Manager) AddPid(subcgroup string, pid int) (retErr error) { - m.mu.Lock() - defer m.mu.Unlock() - - c := m.cgroups - - for _, dir := range m.paths { - path := path.Join(dir, subcgroup) - if !strings.HasPrefix(path, dir) { - return fmt.Errorf("bad sub cgroup path: %s", subcgroup) - } - - if err := cgroups.WriteCgroupProc(path, pid); err != nil { - if isIgnorableError(c.Rootless, err) && c.Path == "" { - retErr = cgroups.ErrRootless - continue - } - return err - } - } - - return retErr -} - -func (m *Manager) Destroy() error { - m.mu.Lock() - defer m.mu.Unlock() - return cgroups.RemovePaths(m.paths) -} - -func (m *Manager) Path(subsys string) string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths[subsys] -} - -func (m *Manager) GetStats() (*cgroups.Stats, error) { - m.mu.Lock() - defer m.mu.Unlock() - stats := cgroups.NewStats() - for _, sys := range subsystems { - path := m.paths[sys.Name()] - if path == "" { - continue - } - if err := sys.GetStats(path, stats); err != nil { - return nil, err - } - } - return stats, nil -} - -func (m *Manager) Set(r *cgroups.Resources) error { - if r == nil { - return nil - } - - if r.Unified != nil { - return cgroups.ErrV1NoUnified - } - - m.mu.Lock() - defer m.mu.Unlock() - for _, sys := range subsystems { - path := m.paths[sys.Name()] - if err := sys.Set(path, r); err != nil { - // When rootless is true, errors from the device subsystem - // are ignored, as it is really not expected to work. - if m.cgroups.Rootless && sys.Name() == "devices" && !errors.Is(err, cgroups.ErrDevicesUnsupported) { - continue - } - // However, errors from other subsystems are not ignored. - // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if path == "" { - // We never created a path for this cgroup, so we cannot set - // limits for it (though we have already tried at this point). - return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) - } - return err - } - } - - return nil -} - -// Freeze toggles the container's freezer cgroup depending on the state -// provided -func (m *Manager) Freeze(state cgroups.FreezerState) error { - path := m.Path("freezer") - if path == "" { - return errors.New("cannot toggle freezer: cgroups not configured for container") - } - - prevState := m.cgroups.Resources.Freezer - m.cgroups.Resources.Freezer = state - freezer := &FreezerGroup{} - if err := freezer.Set(path, m.cgroups.Resources); err != nil { - m.cgroups.Resources.Freezer = prevState - return err - } - return nil -} - -func (m *Manager) GetPids() ([]int, error) { - return cgroups.GetPids(m.Path("devices")) -} - -func (m *Manager) GetAllPids() ([]int, error) { - return cgroups.GetAllPids(m.Path("devices")) -} - -func (m *Manager) GetPaths() map[string]string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths -} - -func (m *Manager) GetCgroups() (*cgroups.Cgroup, error) { - return m.cgroups, nil -} - -func (m *Manager) GetFreezerState() (cgroups.FreezerState, error) { - dir := m.Path("freezer") - // If the container doesn't have the freezer cgroup, say it's undefined. - if dir == "" { - return cgroups.Undefined, nil - } - freezer := &FreezerGroup{} - return freezer.GetState(dir) -} - -func (m *Manager) Exists() bool { - return cgroups.PathExists(m.Path("devices")) -} - -func OOMKillCount(path string) (uint64, error) { - return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") -} - -func (m *Manager) OOMKillCount() (uint64, error) { - c, err := OOMKillCount(m.Path("memory")) - // Ignore ENOENT when rootless as it couldn't create cgroup. - if err != nil && m.cgroups.Rootless && os.IsNotExist(err) { - err = nil - } - - return c, err -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go b/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go deleted file mode 100644 index 698fd691e1..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/hugetlb.go +++ /dev/null @@ -1,83 +0,0 @@ -package fs - -import ( - "errors" - "os" - "strconv" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -type HugetlbGroup struct{} - -func (s *HugetlbGroup) Name() string { - return "hugetlb" -} - -func (s *HugetlbGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *HugetlbGroup) Set(path string, r *cgroups.Resources) error { - const suffix = ".limit_in_bytes" - skipRsvd := false - - for _, hugetlb := range r.HugetlbLimit { - prefix := "hugetlb." + hugetlb.Pagesize - val := strconv.FormatUint(hugetlb.Limit, 10) - if err := cgroups.WriteFile(path, prefix+suffix, val); err != nil { - return err - } - if skipRsvd { - continue - } - if err := cgroups.WriteFile(path, prefix+".rsvd"+suffix, val); err != nil { - if errors.Is(err, os.ErrNotExist) { - skipRsvd = true - continue - } - return err - } - } - - return nil -} - -func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { - if !cgroups.PathExists(path) { - return nil - } - rsvd := ".rsvd" - hugetlbStats := cgroups.HugetlbStats{} - for _, pageSize := range cgroups.HugePageSizes() { - again: - prefix := "hugetlb." + pageSize + rsvd - - value, err := fscommon.GetCgroupParamUint(path, prefix+".usage_in_bytes") - if err != nil { - if rsvd != "" && errors.Is(err, os.ErrNotExist) { - rsvd = "" - goto again - } - return err - } - hugetlbStats.Usage = value - - value, err = fscommon.GetCgroupParamUint(path, prefix+".max_usage_in_bytes") - if err != nil { - return err - } - hugetlbStats.MaxUsage = value - - value, err = fscommon.GetCgroupParamUint(path, prefix+".failcnt") - if err != nil { - return err - } - hugetlbStats.Failcnt = value - - stats.HugetlbStats[pageSize] = hugetlbStats - } - - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/memory.go b/vendor/github.com/opencontainers/cgroups/fs/memory.go deleted file mode 100644 index d92f2322be..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/memory.go +++ /dev/null @@ -1,356 +0,0 @@ -package fs - -import ( - "bufio" - "errors" - "fmt" - "math" - "os" - "path/filepath" - "strconv" - "strings" - - "golang.org/x/sys/unix" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -const ( - cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" - cgroupMemoryLimit = "memory.limit_in_bytes" - cgroupMemoryUsage = "memory.usage_in_bytes" - cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" -) - -type MemoryGroup struct{} - -func (s *MemoryGroup) Name() string { - return "memory" -} - -func (s *MemoryGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func setMemory(path string, val int64) error { - if val == 0 { - return nil - } - - err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) - if !errors.Is(err, unix.EBUSY) { - return err - } - - // EBUSY means the kernel can't set new limit as it's too low - // (lower than the current usage). Return more specific error. - usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage) - if err != nil { - return err - } - max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage) - if err != nil { - return err - } - - return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max) -} - -func setSwap(path string, val int64) error { - if val == 0 { - return nil - } - - return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) -} - -func setMemoryAndSwap(path string, r *cgroups.Resources) error { - // If the memory update is set to -1 and the swap is not explicitly - // set, we should also set swap to -1, it means unlimited memory. - if r.Memory == -1 && r.MemorySwap == 0 { - // Only set swap if it's enabled in kernel - if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { - r.MemorySwap = -1 - } - } - - // When memory and swap memory are both set, we need to handle the cases - // for updating container. - if r.Memory != 0 && r.MemorySwap != 0 { - curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit) - if err != nil { - return err - } - - // When update memory limit, we should adapt the write sequence - // for memory and swap memory, so it won't fail because the new - // value and the old value don't fit kernel's validation. - if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) { - if err := setSwap(path, r.MemorySwap); err != nil { - return err - } - if err := setMemory(path, r.Memory); err != nil { - return err - } - return nil - } - } - - if err := setMemory(path, r.Memory); err != nil { - return err - } - if err := setSwap(path, r.MemorySwap); err != nil { - return err - } - - return nil -} - -func (s *MemoryGroup) Set(path string, r *cgroups.Resources) error { - if err := setMemoryAndSwap(path, r); err != nil { - return err - } - - // ignore KernelMemory and KernelMemoryTCP - - if r.MemoryReservation != 0 { - if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil { - return err - } - } - - if r.OomKillDisable { - if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil { - return err - } - } - if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 { - return nil - } else if *r.MemorySwappiness <= 100 { - if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil { - return err - } - } else { - return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness) - } - - return nil -} - -func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { - const file = "memory.stat" - statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer statsFile.Close() - - sc := bufio.NewScanner(statsFile) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return &parseError{Path: path, File: file, Err: err} - } - stats.MemoryStats.Stats[t] = v - } - stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] - - memoryUsage, err := getMemoryData(path, "") - if err != nil { - return err - } - stats.MemoryStats.Usage = memoryUsage - swapUsage, err := getMemoryData(path, "memsw") - if err != nil { - return err - } - stats.MemoryStats.SwapUsage = swapUsage - stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{ - Usage: swapUsage.Usage - memoryUsage.Usage, - Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt, - } - kernelUsage, err := getMemoryData(path, "kmem") - if err != nil { - return err - } - stats.MemoryStats.KernelUsage = kernelUsage - kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") - if err != nil { - return err - } - stats.MemoryStats.KernelTCPUsage = kernelTCPUsage - - value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy") - if err != nil { - return err - } - if value == 1 { - stats.MemoryStats.UseHierarchy = true - } - - pagesByNUMA, err := getPageUsageByNUMA(path) - if err != nil { - return err - } - stats.MemoryStats.PageUsageByNUMA = pagesByNUMA - - return nil -} - -func getMemoryData(path, name string) (cgroups.MemoryData, error) { - memoryData := cgroups.MemoryData{} - - moduleName := "memory" - if name != "" { - moduleName = "memory." + name - } - var ( - usage = moduleName + ".usage_in_bytes" - maxUsage = moduleName + ".max_usage_in_bytes" - failcnt = moduleName + ".failcnt" - limit = moduleName + ".limit_in_bytes" - ) - - value, err := fscommon.GetCgroupParamUint(path, usage) - if err != nil { - if name != "" && os.IsNotExist(err) { - // Ignore ENOENT as swap and kmem controllers - // are optional in the kernel. - return cgroups.MemoryData{}, nil - } - return cgroups.MemoryData{}, err - } - memoryData.Usage = value - value, err = fscommon.GetCgroupParamUint(path, maxUsage) - if err != nil { - return cgroups.MemoryData{}, err - } - memoryData.MaxUsage = value - value, err = fscommon.GetCgroupParamUint(path, failcnt) - if err != nil { - return cgroups.MemoryData{}, err - } - memoryData.Failcnt = value - value, err = fscommon.GetCgroupParamUint(path, limit) - if err != nil { - if name == "kmem" && os.IsNotExist(err) { - // Ignore ENOENT as kmem.limit_in_bytes has - // been removed in newer kernels. - return memoryData, nil - } - - return cgroups.MemoryData{}, err - } - memoryData.Limit = value - - return memoryData, nil -} - -func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) { - const ( - maxColumns = math.MaxUint8 + 1 - file = "memory.numa_stat" - ) - stats := cgroups.PageUsageByNUMA{} - - fd, err := cgroups.OpenFile(path, file, os.O_RDONLY) - if os.IsNotExist(err) { - return stats, nil - } else if err != nil { - return stats, err - } - defer fd.Close() - - // File format is documented in linux/Documentation/cgroup-v1/memory.txt - // and it looks like this: - // - // total= N0= N1= ... - // file= N0= N1= ... - // anon= N0= N1= ... - // unevictable= N0= N1= ... - // hierarchical_= N0= N1= ... - - scanner := bufio.NewScanner(fd) - for scanner.Scan() { - var field *cgroups.PageStats - - line := scanner.Text() - columns := strings.SplitN(line, " ", maxColumns) - for i, column := range columns { - key, val, ok := strings.Cut(column, "=") - // Some custom kernels have non-standard fields, like - // numa_locality 0 0 0 0 0 0 0 0 0 0 - // numa_exectime 0 - if !ok { - if i == 0 { - // Ignore/skip those. - break - } else { - // The first column was already validated, - // so be strict to the rest. - return stats, malformedLine(path, file, line) - } - } - if i == 0 { // First column: key is name, val is total. - field = getNUMAField(&stats, key) - if field == nil { // unknown field (new kernel?) - break - } - field.Total, err = strconv.ParseUint(val, 0, 64) - if err != nil { - return stats, &parseError{Path: path, File: file, Err: err} - } - field.Nodes = map[uint8]uint64{} - } else { // Subsequent columns: key is N, val is usage. - if len(key) < 2 || key[0] != 'N' { - // This is definitely an error. - return stats, malformedLine(path, file, line) - } - - n, err := strconv.ParseUint(key[1:], 10, 8) - if err != nil { - return stats, &parseError{Path: path, File: file, Err: err} - } - - usage, err := strconv.ParseUint(val, 10, 64) - if err != nil { - return stats, &parseError{Path: path, File: file, Err: err} - } - - field.Nodes[uint8(n)] = usage - } - - } - } - if err := scanner.Err(); err != nil { - return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err} - } - - return stats, nil -} - -func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats { - switch name { - case "total": - return &stats.Total - case "file": - return &stats.File - case "anon": - return &stats.Anon - case "unevictable": - return &stats.Unevictable - case "hierarchical_total": - return &stats.Hierarchical.Total - case "hierarchical_file": - return &stats.Hierarchical.File - case "hierarchical_anon": - return &stats.Hierarchical.Anon - case "hierarchical_unevictable": - return &stats.Hierarchical.Unevictable - } - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/name.go b/vendor/github.com/opencontainers/cgroups/fs/name.go deleted file mode 100644 index 28643519b5..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/name.go +++ /dev/null @@ -1,30 +0,0 @@ -package fs - -import ( - "github.com/opencontainers/cgroups" -) - -type NameGroup struct { - GroupName string - Join bool -} - -func (s *NameGroup) Name() string { - return s.GroupName -} - -func (s *NameGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - if s.Join { - // Ignore errors if the named cgroup does not exist. - _ = apply(path, pid) - } - return nil -} - -func (s *NameGroup) Set(_ string, _ *cgroups.Resources) error { - return nil -} - -func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_cls.go b/vendor/github.com/opencontainers/cgroups/fs/net_cls.go deleted file mode 100644 index 2bd6c5ab21..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/net_cls.go +++ /dev/null @@ -1,31 +0,0 @@ -package fs - -import ( - "strconv" - - "github.com/opencontainers/cgroups" -) - -type NetClsGroup struct{} - -func (s *NetClsGroup) Name() string { - return "net_cls" -} - -func (s *NetClsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *NetClsGroup) Set(path string, r *cgroups.Resources) error { - if r.NetClsClassid != 0 { - if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil { - return err - } - } - - return nil -} - -func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/net_prio.go b/vendor/github.com/opencontainers/cgroups/fs/net_prio.go deleted file mode 100644 index b51682b6da..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/net_prio.go +++ /dev/null @@ -1,29 +0,0 @@ -package fs - -import ( - "github.com/opencontainers/cgroups" -) - -type NetPrioGroup struct{} - -func (s *NetPrioGroup) Name() string { - return "net_prio" -} - -func (s *NetPrioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *NetPrioGroup) Set(path string, r *cgroups.Resources) error { - for _, prioMap := range r.NetPrioIfpriomap { - if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { - return err - } - } - - return nil -} - -func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/paths.go b/vendor/github.com/opencontainers/cgroups/fs/paths.go deleted file mode 100644 index edbe041ea8..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/paths.go +++ /dev/null @@ -1,169 +0,0 @@ -package fs - -import ( - "errors" - "os" - "path/filepath" - "sync" - - "golang.org/x/sys/unix" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/internal/path" -) - -// The absolute path to the root of the cgroup hierarchies. -var ( - cgroupRootLock sync.Mutex - cgroupRoot string -) - -const defaultCgroupRoot = "/sys/fs/cgroup" - -func initPaths(cg *cgroups.Cgroup) (map[string]string, error) { - root, err := rootPath() - if err != nil { - return nil, err - } - - inner, err := path.Inner(cg) - if err != nil { - return nil, err - } - - paths := make(map[string]string) - for _, sys := range subsystems { - name := sys.Name() - path, err := subsysPath(root, inner, name) - if err != nil { - // The non-presence of the devices subsystem - // is considered fatal for security reasons. - if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") { - continue - } - - return nil, err - } - paths[name] = path - } - - return paths, nil -} - -func tryDefaultCgroupRoot() string { - var st, pst unix.Stat_t - - // (1) it should be a directory... - err := unix.Lstat(defaultCgroupRoot, &st) - if err != nil || st.Mode&unix.S_IFDIR == 0 { - return "" - } - - // (2) ... and a mount point ... - err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst) - if err != nil { - return "" - } - - if st.Dev == pst.Dev { - // parent dir has the same dev -- not a mount point - return "" - } - - // (3) ... of 'tmpfs' fs type. - var fst unix.Statfs_t - err = unix.Statfs(defaultCgroupRoot, &fst) - if err != nil || fst.Type != unix.TMPFS_MAGIC { - return "" - } - - // (4) it should have at least 1 entry ... - dir, err := os.Open(defaultCgroupRoot) - if err != nil { - return "" - } - defer dir.Close() - names, err := dir.Readdirnames(1) - if err != nil { - return "" - } - if len(names) < 1 { - return "" - } - // ... which is a cgroup mount point. - err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst) - if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { - return "" - } - - return defaultCgroupRoot -} - -// rootPath finds and returns path to the root of the cgroup hierarchies. -func rootPath() (string, error) { - cgroupRootLock.Lock() - defer cgroupRootLock.Unlock() - - if cgroupRoot != "" { - return cgroupRoot, nil - } - - // fast path - cgroupRoot = tryDefaultCgroupRoot() - if cgroupRoot != "" { - return cgroupRoot, nil - } - - // slow path: parse mountinfo - mi, err := cgroups.GetCgroupMounts(false) - if err != nil { - return "", err - } - if len(mi) < 1 { - return "", errors.New("no cgroup mount found in mountinfo") - } - - // Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"), - // use its parent directory. - root := filepath.Dir(mi[0].Mountpoint) - - if _, err := os.Stat(root); err != nil { - return "", err - } - - cgroupRoot = root - return cgroupRoot, nil -} - -func subsysPath(root, inner, subsystem string) (string, error) { - // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. - if filepath.IsAbs(inner) { - mnt, err := cgroups.FindCgroupMountpoint(root, subsystem) - // If we didn't mount the subsystem, there is no point we make the path. - if err != nil { - return "", err - } - - // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. - return filepath.Join(root, filepath.Base(mnt), inner), nil - } - - // Use GetOwnCgroupPath for dind-like cases, when cgroupns is not - // available. This is ugly. - parentPath, err := cgroups.GetOwnCgroupPath(subsystem) - if err != nil { - return "", err - } - - return filepath.Join(parentPath, inner), nil -} - -func apply(path string, pid int) error { - if path == "" { - return nil - } - if err := os.MkdirAll(path, 0o755); err != nil { - return err - } - return cgroups.WriteCgroupProc(path, pid) -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/perf_event.go b/vendor/github.com/opencontainers/cgroups/fs/perf_event.go deleted file mode 100644 index 929c412a3a..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/perf_event.go +++ /dev/null @@ -1,23 +0,0 @@ -package fs - -import ( - "github.com/opencontainers/cgroups" -) - -type PerfEventGroup struct{} - -func (s *PerfEventGroup) Name() string { - return "perf_event" -} - -func (s *PerfEventGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *PerfEventGroup) Set(_ string, _ *cgroups.Resources) error { - return nil -} - -func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/pids.go b/vendor/github.com/opencontainers/cgroups/fs/pids.go deleted file mode 100644 index 36bd339af8..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/pids.go +++ /dev/null @@ -1,66 +0,0 @@ -package fs - -import ( - "math" - "strconv" - - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -type PidsGroup struct{} - -func (s *PidsGroup) Name() string { - return "pids" -} - -func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *PidsGroup) Set(path string, r *cgroups.Resources) error { - if r.PidsLimit == nil { - return nil - } - - // "max" is the fallback value. - val := "max" - if limit := *r.PidsLimit; limit > 0 { - val = strconv.FormatInt(limit, 10) - } else if limit == 0 { - // systemd doesn't support setting pids.max to "0", so when setting - // TasksMax we need to remap it to "1". We do the same thing here to - // avoid flip-flop behaviour between the fs and systemd drivers. In - // practice, the pids cgroup behaviour is basically identical. - val = "1" - } - if err := cgroups.WriteFile(path, "pids.max", val); err != nil { - return err - } - return nil -} - -func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { - if !cgroups.PathExists(path) { - return nil - } - current, err := fscommon.GetCgroupParamUint(path, "pids.current") - if err != nil { - return err - } - - max, err := fscommon.GetCgroupParamUint(path, "pids.max") - if err != nil { - return err - } - // If no limit is set, read from pids.max returns "max", which is - // converted to MaxUint64 by GetCgroupParamUint. Historically, we - // represent "no limit" for pids as 0, thus this conversion. - if max == math.MaxUint64 { - max = 0 - } - - stats.PidsStats.Current = current - stats.PidsStats.Limit = max - return nil -} diff --git a/vendor/github.com/opencontainers/cgroups/fs/rdma.go b/vendor/github.com/opencontainers/cgroups/fs/rdma.go deleted file mode 100644 index 4b175365f2..0000000000 --- a/vendor/github.com/opencontainers/cgroups/fs/rdma.go +++ /dev/null @@ -1,24 +0,0 @@ -package fs - -import ( - "github.com/opencontainers/cgroups" - "github.com/opencontainers/cgroups/fscommon" -) - -type RdmaGroup struct{} - -func (s *RdmaGroup) Name() string { - return "rdma" -} - -func (s *RdmaGroup) Apply(path string, _ *cgroups.Resources, pid int) error { - return apply(path, pid) -} - -func (s *RdmaGroup) Set(path string, r *cgroups.Resources) error { - return fscommon.RdmaSet(path, r) -} - -func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error { - return fscommon.RdmaGetStats(path, stats) -} diff --git a/vendor/modules.txt b/vendor/modules.txt index cb890f4829..f1834b3a73 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -358,7 +358,6 @@ github.com/onsi/gomega/types ## explicit; go 1.23.0 github.com/opencontainers/cgroups github.com/opencontainers/cgroups/devices/config -github.com/opencontainers/cgroups/fs github.com/opencontainers/cgroups/fs2 github.com/opencontainers/cgroups/fscommon github.com/opencontainers/cgroups/internal/path From e639dec1b677db39fd4f76d67055ca89ada739ac Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Mon, 24 Nov 2025 14:20:21 -0500 Subject: [PATCH 8/9] common/pkg/cgroups: replace getAvailableControllers by only caller Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/cgroups_linux.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index b444aed2d7..cc1599d64b 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -64,8 +64,8 @@ var handlers = map[string]statFunc{ Blkio: blkioStat, } -// getAvailableControllers get the available controllers. -func getAvailableControllers() ([]string, error) { +// AvailableControllers get string:bool map of all the available controllers. +func AvailableControllers() ([]string, error) { controllers := []string{} controllersFile := filepath.Join(cgroupRoot, "cgroup.controllers") @@ -89,11 +89,6 @@ func getAvailableControllers() ([]string, error) { return controllers, nil } -// AvailableControllers get string:bool map of all the available controllers. -func AvailableControllers(exclude map[string]statFunc) ([]string, error) { - return getAvailableControllers() -} - func getCgroupPathForCurrentProcess() (string, error) { path := fmt.Sprintf("/proc/%d/cgroup", os.Getpid()) f, err := os.Open(path) From 3ed278e8d0caf88fa8241297554fdcf7e7a85877 Mon Sep 17 00:00:00 2001 From: Lokesh Mandvekar Date: Tue, 25 Nov 2025 13:43:10 -0500 Subject: [PATCH 9/9] Add back cgroups.AddPid This reverts commit cce5ec8bd60149a1a52c5224da9120b4319cda54. Note: This has already gone through a revert and re-revert cycle in commits 52736854ef6fca4effac0162c6788e732c83d957 and 3fe402bb68056acd7551466d94964de68914b808, but that's wrong per: https://github.com/containers/podman/pull/27551#discussion_r2557592950 . Signed-off-by: Lokesh Mandvekar --- common/pkg/cgroups/cgroups_linux.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/common/pkg/cgroups/cgroups_linux.go b/common/pkg/cgroups/cgroups_linux.go index cc1599d64b..f7ac8ba12c 100644 --- a/common/pkg/cgroups/cgroups_linux.go +++ b/common/pkg/cgroups/cgroups_linux.go @@ -298,6 +298,12 @@ func (c *CgroupControl) Update(resources *cgroups.Resources) error { return man.Set(resources) } +// AddPid moves the specified pid to the cgroup. +func (c *CgroupControl) AddPid(pid int) error { + path := filepath.Join(cgroupRoot, c.config.Path) + return fs2.CreateCgroupPath(path, c.config) +} + // Stat returns usage statistics for the cgroup. func (c *CgroupControl) Stat() (*cgroups.Stats, error) { m := cgroups.Stats{}