Skip to content

Commit 5ceb477

Browse files
committed
Export libcontainer/cgroups from runc
This is mostly result of the following command: git filter-repo --path libcontainer/cgroups/ --path-rename libcontainer/cgroups/: Signed-off-by: Kir Kolyshkin <[email protected]>
2 parents d39e48d + 9d86f4b commit 5ceb477

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+15422
-0
lines changed

cgroups.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package cgroups
2+
3+
import (
4+
"errors"
5+
)
6+
7+
var (
8+
// ErrDevicesUnsupported is an error returned when a cgroup manager
9+
// is not configured to set device rules.
10+
ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules")
11+
12+
// ErrRootless is returned by [Manager.Apply] when there is an error
13+
// creating cgroup directory, and cgroup.Rootless is set. In general,
14+
// this error is to be ignored.
15+
ErrRootless = errors.New("cgroup manager can not access cgroup (rootless container)")
16+
17+
// DevicesSetV1 and DevicesSetV2 are functions to set devices for
18+
// cgroup v1 and v2, respectively. Unless
19+
// [github.com/opencontainers/cgroups/devices]
20+
// package is imported, it is set to nil, so cgroup managers can't
21+
// manage devices.
22+
DevicesSetV1 func(path string, r *Resources) error
23+
DevicesSetV2 func(path string, r *Resources) error
24+
)
25+
26+
type Manager interface {
27+
// Apply creates a cgroup, if not yet created, and adds a process
28+
// with the specified pid into that cgroup. A special value of -1
29+
// can be used to merely create a cgroup.
30+
Apply(pid int) error
31+
32+
// GetPids returns the PIDs of all processes inside the cgroup.
33+
GetPids() ([]int, error)
34+
35+
// GetAllPids returns the PIDs of all processes inside the cgroup
36+
// any all its sub-cgroups.
37+
GetAllPids() ([]int, error)
38+
39+
// GetStats returns cgroups statistics.
40+
GetStats() (*Stats, error)
41+
42+
// Freeze sets the freezer cgroup to the specified state.
43+
Freeze(state FreezerState) error
44+
45+
// Destroy removes cgroup.
46+
Destroy() error
47+
48+
// Path returns a cgroup path to the specified controller/subsystem.
49+
// For cgroupv2, the argument is unused and can be empty.
50+
Path(string) string
51+
52+
// Set sets cgroup resources parameters/limits. If the argument is nil,
53+
// the resources specified during Manager creation (or the previous call
54+
// to Set) are used.
55+
Set(r *Resources) error
56+
57+
// GetPaths returns cgroup path(s) to save in a state file in order to
58+
// restore later.
59+
//
60+
// For cgroup v1, a key is cgroup subsystem name, and the value is the
61+
// path to the cgroup for this subsystem.
62+
//
63+
// For cgroup v2 unified hierarchy, a key is "", and the value is the
64+
// unified path.
65+
GetPaths() map[string]string
66+
67+
// GetCgroups returns the cgroup data as configured.
68+
GetCgroups() (*Cgroup, error)
69+
70+
// GetFreezerState retrieves the current FreezerState of the cgroup.
71+
GetFreezerState() (FreezerState, error)
72+
73+
// Exists returns whether the cgroup path exists or not.
74+
Exists() bool
75+
76+
// OOMKillCount reports OOM kill count for the cgroup.
77+
OOMKillCount() (uint64, error)
78+
}

cgroups_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package cgroups
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestParseCgroups(t *testing.T) {
8+
// We don't need to use /proc/thread-self here because runc always runs
9+
// with every thread in the same cgroup. This lets us avoid having to do
10+
// runtime.LockOSThread.
11+
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
12+
if err != nil {
13+
t.Fatal(err)
14+
}
15+
if IsCgroup2UnifiedMode() {
16+
return
17+
}
18+
if _, ok := cgroups["cpu"]; !ok {
19+
t.Fail()
20+
}
21+
}

config_blkio_device.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package cgroups
2+
3+
import "fmt"
4+
5+
// BlockIODevice holds major:minor format supported in blkio cgroup.
6+
type BlockIODevice struct {
7+
// Major is the device's major number
8+
Major int64 `json:"major"`
9+
// Minor is the device's minor number
10+
Minor int64 `json:"minor"`
11+
}
12+
13+
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
14+
type WeightDevice struct {
15+
BlockIODevice
16+
// Weight is the bandwidth rate for the device, range is from 10 to 1000
17+
Weight uint16 `json:"weight"`
18+
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
19+
LeafWeight uint16 `json:"leafWeight"`
20+
}
21+
22+
// NewWeightDevice returns a configured WeightDevice pointer
23+
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
24+
wd := &WeightDevice{}
25+
wd.Major = major
26+
wd.Minor = minor
27+
wd.Weight = weight
28+
wd.LeafWeight = leafWeight
29+
return wd
30+
}
31+
32+
// WeightString formats the struct to be writable to the cgroup specific file
33+
func (wd *WeightDevice) WeightString() string {
34+
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
35+
}
36+
37+
// LeafWeightString formats the struct to be writable to the cgroup specific file
38+
func (wd *WeightDevice) LeafWeightString() string {
39+
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
40+
}
41+
42+
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
43+
type ThrottleDevice struct {
44+
BlockIODevice
45+
// Rate is the IO rate limit per cgroup per device
46+
Rate uint64 `json:"rate"`
47+
}
48+
49+
// NewThrottleDevice returns a configured ThrottleDevice pointer
50+
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
51+
td := &ThrottleDevice{}
52+
td.Major = major
53+
td.Minor = minor
54+
td.Rate = rate
55+
return td
56+
}
57+
58+
// String formats the struct to be writable to the cgroup specific file
59+
func (td *ThrottleDevice) String() string {
60+
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
61+
}
62+
63+
// StringName formats the struct to be writable to the cgroup specific file
64+
func (td *ThrottleDevice) StringName(name string) string {
65+
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
66+
}

config_hugepages.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package cgroups
2+
3+
type HugepageLimit struct {
4+
// which type of hugepage to limit.
5+
Pagesize string `json:"page_size"`
6+
7+
// usage limit for hugepage.
8+
Limit uint64 `json:"limit"`
9+
}

config_ifprio_map.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package cgroups
2+
3+
import (
4+
"fmt"
5+
)
6+
7+
type IfPrioMap struct {
8+
Interface string `json:"interface"`
9+
Priority int64 `json:"priority"`
10+
}
11+
12+
func (i *IfPrioMap) CgroupString() string {
13+
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
14+
}

config_linux.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package cgroups
2+
3+
import (
4+
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
5+
devices "github.com/opencontainers/cgroups/devices/config"
6+
)
7+
8+
type FreezerState string
9+
10+
const (
11+
Undefined FreezerState = ""
12+
Frozen FreezerState = "FROZEN"
13+
Thawed FreezerState = "THAWED"
14+
)
15+
16+
// Cgroup holds properties of a cgroup on Linux.
17+
type Cgroup struct {
18+
// Name specifies the name of the cgroup
19+
Name string `json:"name,omitempty"`
20+
21+
// Parent specifies the name of parent of cgroup or slice
22+
Parent string `json:"parent,omitempty"`
23+
24+
// Path specifies the path to cgroups that are created and/or joined by the container.
25+
// The path is assumed to be relative to the host system cgroup mountpoint.
26+
Path string `json:"path"`
27+
28+
// ScopePrefix describes prefix for the scope name
29+
ScopePrefix string `json:"scope_prefix"`
30+
31+
// Resources contains various cgroups settings to apply
32+
*Resources
33+
34+
// Systemd tells if systemd should be used to manage cgroups.
35+
Systemd bool
36+
37+
// SystemdProps are any additional properties for systemd,
38+
// derived from org.systemd.property.xxx annotations.
39+
// Ignored unless systemd is used for managing cgroups.
40+
SystemdProps []systemdDbus.Property `json:"-"`
41+
42+
// Rootless tells if rootless cgroups should be used.
43+
Rootless bool
44+
45+
// The host UID that should own the cgroup, or nil to accept
46+
// the default ownership. This should only be set when the
47+
// cgroupfs is to be mounted read/write.
48+
// Not all cgroup manager implementations support changing
49+
// the ownership.
50+
OwnerUID *int `json:"owner_uid,omitempty"`
51+
}
52+
53+
type Resources struct {
54+
// Devices is the set of access rules for devices in the container.
55+
Devices []*devices.Rule `json:"devices"`
56+
57+
// Memory limit (in bytes)
58+
Memory int64 `json:"memory"`
59+
60+
// Memory reservation or soft_limit (in bytes)
61+
MemoryReservation int64 `json:"memory_reservation"`
62+
63+
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
64+
MemorySwap int64 `json:"memory_swap"`
65+
66+
// CPU shares (relative weight vs. other containers)
67+
CpuShares uint64 `json:"cpu_shares"`
68+
69+
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
70+
CpuQuota int64 `json:"cpu_quota"`
71+
72+
// CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period.
73+
CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive
74+
75+
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
76+
CpuPeriod uint64 `json:"cpu_period"`
77+
78+
// How many time CPU will use in realtime scheduling (in usecs).
79+
CpuRtRuntime int64 `json:"cpu_rt_quota"`
80+
81+
// CPU period to be used for realtime scheduling (in usecs).
82+
CpuRtPeriod uint64 `json:"cpu_rt_period"`
83+
84+
// CPU to use
85+
CpusetCpus string `json:"cpuset_cpus"`
86+
87+
// MEM to use
88+
CpusetMems string `json:"cpuset_mems"`
89+
90+
// cgroup SCHED_IDLE
91+
CPUIdle *int64 `json:"cpu_idle,omitempty"`
92+
93+
// Process limit; set <= `0' to disable limit.
94+
PidsLimit int64 `json:"pids_limit"`
95+
96+
// Specifies per cgroup weight, range is from 10 to 1000.
97+
BlkioWeight uint16 `json:"blkio_weight"`
98+
99+
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
100+
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
101+
102+
// Weight per cgroup per device, can override BlkioWeight.
103+
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
104+
105+
// IO read rate limit per cgroup per device, bytes per second.
106+
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
107+
108+
// IO write rate limit per cgroup per device, bytes per second.
109+
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
110+
111+
// IO read rate limit per cgroup per device, IO per second.
112+
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
113+
114+
// IO write rate limit per cgroup per device, IO per second.
115+
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
116+
117+
// set the freeze value for the process
118+
Freezer FreezerState `json:"freezer"`
119+
120+
// Hugetlb limit (in bytes)
121+
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
122+
123+
// Whether to disable OOM Killer
124+
OomKillDisable bool `json:"oom_kill_disable"`
125+
126+
// Tuning swappiness behaviour per cgroup
127+
MemorySwappiness *uint64 `json:"memory_swappiness"`
128+
129+
// Set priority of network traffic for container
130+
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
131+
132+
// Set class identifier for container's network packets
133+
NetClsClassid uint32 `json:"net_cls_classid_u"`
134+
135+
// Rdma resource restriction configuration
136+
Rdma map[string]LinuxRdma `json:"rdma"`
137+
138+
// Used on cgroups v2:
139+
140+
// CpuWeight sets a proportional bandwidth limit.
141+
CpuWeight uint64 `json:"cpu_weight"`
142+
143+
// Unified is cgroupv2-only key-value map.
144+
Unified map[string]string `json:"unified"`
145+
146+
// SkipDevices allows to skip configuring device permissions.
147+
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
148+
// common for many containers, and by runc update.
149+
//
150+
// NOTE it is impossible to start a container which has this flag set.
151+
SkipDevices bool `json:"-"`
152+
153+
// SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup
154+
// freeze when setting resources. Only applicable to systemd legacy
155+
// (i.e. cgroup v1) manager (which uses freeze by default to avoid
156+
// spurious permission errors caused by systemd inability to update
157+
// device rules in a non-disruptive manner).
158+
//
159+
// If not set, a few methods (such as looking into cgroup's
160+
// devices.list and querying the systemd unit properties) are used
161+
// during Set() to figure out whether the freeze is required. Those
162+
// methods may be relatively slow, thus this flag.
163+
SkipFreezeOnSet bool `json:"-"`
164+
165+
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
166+
// if the new memory limits (Memory and MemorySwap) being set are lower
167+
// than the current memory usage, and reject if so.
168+
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
169+
}

config_rdma.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package cgroups
2+
3+
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
4+
type LinuxRdma struct {
5+
// Maximum number of HCA handles that can be opened. Default is "no limit".
6+
HcaHandles *uint32 `json:"hca_handles,omitempty"`
7+
// Maximum number of HCA objects that can be created. Default is "no limit".
8+
HcaObjects *uint32 `json:"hca_objects,omitempty"`
9+
}

config_unsupported.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
//go:build !linux
2+
3+
package cgroups
4+
5+
// Cgroup holds properties of a cgroup on Linux
6+
// TODO Windows: This can ultimately be entirely factored out on Windows as
7+
// cgroups are a Unix-specific construct.
8+
type Cgroup struct{}

0 commit comments

Comments
 (0)