Skip to content

Commit 4752fd2

Browse files
committed
libcontainer: add support for Intel RDT/CAT in runc
About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Intel Resource Director Technology (RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which currently supports L3 cache resource allocation. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual. About Intel RDT/CAT kernel interface: In Linux kernel, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | |-- cbm_mask | |-- min_cbm_bits | |-- num_closids |-- cpus |-- schemata |-- tasks |-- <container_id> |-- cpus |-- schemata |-- tasks For runc, we can make use of `tasks` and `schemata` configuration for L3 cache resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. If a pid is not in any sub group, it Is in root group. The file `schemata` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a group should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. For more information about Intel RDT/CAT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: There are two L3 caches in the two-socket machine, the default CBM is 0xfffff and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache id 0 and the whole L3 cache id 1 for the container: "linux": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=fffff" } } Signed-off-by: Xiaochen Shen <[email protected]>
1 parent c8486a7 commit 4752fd2

File tree

15 files changed

+894
-11
lines changed

15 files changed

+894
-11
lines changed

events.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ type event struct {
2424

2525
// stats is the runc specific stats structure for stability when encoding and decoding stats.
2626
type stats struct {
27-
CPU cpu `json:"cpu"`
28-
Memory memory `json:"memory"`
29-
Pids pids `json:"pids"`
30-
Blkio blkio `json:"blkio"`
31-
Hugetlb map[string]hugetlb `json:"hugetlb"`
27+
CPU cpu `json:"cpu"`
28+
Memory memory `json:"memory"`
29+
Pids pids `json:"pids"`
30+
Blkio blkio `json:"blkio"`
31+
Hugetlb map[string]hugetlb `json:"hugetlb"`
32+
IntelRdt intelRdt `json:"intelRdt"`
3233
}
3334

3435
type hugetlb struct {
@@ -95,6 +96,12 @@ type memory struct {
9596
Raw map[string]uint64 `json:"raw,omitempty"`
9697
}
9798

99+
type intelRdt struct {
100+
// The read-only default "schema" in root, for reference
101+
L3CacheSchemaRoot string `json:"l3CacheSchemaRoot,omitempty"`
102+
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
103+
}
104+
98105
var eventsCommand = cli.Command{
99106
Name: "events",
100107
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@@ -226,6 +233,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
226233
for k, v := range cg.HugetlbStats {
227234
s.Hugetlb[k] = convertHugtlb(v)
228235
}
236+
237+
is := ls.IntelRdtStats
238+
if is == nil {
239+
return &s
240+
}
241+
s.IntelRdt.L3CacheSchemaRoot = is.IntelRdtRootStats.L3CacheSchema
242+
s.IntelRdt.L3CacheSchema = is.IntelRdtStats.L3CacheSchema
243+
229244
return &s
230245
}
231246

libcontainer/configs/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ type Config struct {
186186

187187
// Rootless specifies whether the container is a rootless container.
188188
Rootless bool `json:"rootless"`
189+
190+
// IntelRdt specifies specific settings for Intel RDT/CAT group that the container is
191+
// placed into to limit the resources (e.g., L3 cache) the container has available
192+
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
189193
}
190194

191195
type Hooks struct {

libcontainer/configs/intelrdt.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package configs
2+
3+
type IntelRdt struct {
4+
// The schema for L3 cache id and capacity bitmask (CBM)
5+
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
6+
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
7+
}

libcontainer/container_linux.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/opencontainers/runc/libcontainer/cgroups"
2323
"github.com/opencontainers/runc/libcontainer/configs"
2424
"github.com/opencontainers/runc/libcontainer/criurpc"
25+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2526
"github.com/opencontainers/runc/libcontainer/resourcemanager"
2627
"github.com/opencontainers/runc/libcontainer/system"
2728
"github.com/opencontainers/runc/libcontainer/utils"
@@ -65,6 +66,9 @@ type State struct {
6566

6667
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
6768
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
69+
70+
// Intel RDT "resource control" filesystem path
71+
IntelRdtPath string `json:"intel_rdt_path"`
6872
}
6973

7074
// Container is a libcontainer container object.
@@ -163,6 +167,13 @@ func (c *linuxContainer) Stats() (*Stats, error) {
163167
if err != nil {
164168
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
165169
}
170+
if intelRdtManager, ok := c.resourceManagers["intelrdt"]; ok == true {
171+
intelRdtStats, err := intelRdtManager.GetStats()
172+
if err != nil {
173+
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
174+
}
175+
stats.IntelRdtStats = intelRdtStats.(*intelrdt.Stats)
176+
}
166177
for _, iface := range c.config.Networks {
167178
switch iface.Type {
168179
case "veth":
@@ -440,9 +451,14 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
440451
if err != nil {
441452
return nil, err
442453
}
454+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
455+
if err != nil {
456+
intelRdtPath = ""
457+
}
443458
return &setnsProcess{
444459
cmd: cmd,
445460
cgroupPaths: c.resourceManagers["cgroups"].GetPaths(),
461+
intelRdtPath: intelRdtPath,
446462
childPipe: childPipe,
447463
parentPipe: parentPipe,
448464
config: c.newInitConfig(p),
@@ -1341,6 +1357,10 @@ func (c *linuxContainer) currentState() (*State, error) {
13411357
startTime, _ = c.initProcess.startTime()
13421358
externalDescriptors = c.initProcess.externalDescriptors()
13431359
}
1360+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1361+
if err != nil {
1362+
intelRdtPath = ""
1363+
}
13441364
state := &State{
13451365
BaseState: BaseState{
13461366
ID: c.ID(),
@@ -1351,6 +1371,7 @@ func (c *linuxContainer) currentState() (*State, error) {
13511371
},
13521372
Rootless: c.config.Rootless,
13531373
CgroupPaths: c.resourceManagers["cgroups"].GetPaths(),
1374+
IntelRdtPath: intelRdtPath,
13541375
NamespacePaths: make(map[configs.NamespaceType]string),
13551376
ExternalDescriptors: externalDescriptors,
13561377
}

libcontainer/container_linux_test.go

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/opencontainers/runc/libcontainer/cgroups"
1111
"github.com/opencontainers/runc/libcontainer/configs"
12+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1213
"github.com/opencontainers/runc/libcontainer/resourcemanager"
1314
)
1415

@@ -19,6 +20,13 @@ type mockCgroupManager struct {
1920
paths map[string]string
2021
}
2122

23+
type mockIntelRdtManager struct {
24+
pids []int
25+
allPids []int
26+
stats *intelrdt.Stats
27+
path string
28+
}
29+
2230
func (m *mockCgroupManager) GetPids() ([]int, error) {
2331
return m.pids, nil
2432
}
@@ -51,6 +59,40 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
5159
return nil
5260
}
5361

62+
func (m *mockIntelRdtManager) GetPids() ([]int, error) {
63+
return m.pids, nil
64+
}
65+
66+
func (m *mockIntelRdtManager) GetAllPids() ([]int, error) {
67+
return m.allPids, nil
68+
}
69+
70+
func (m *mockIntelRdtManager) GetStats() (interface{}, error) {
71+
return m.stats, nil
72+
}
73+
74+
func (m *mockIntelRdtManager) Apply(pid int) error {
75+
return nil
76+
}
77+
78+
func (m *mockIntelRdtManager) Set(container *configs.Config) error {
79+
return nil
80+
}
81+
82+
func (m *mockIntelRdtManager) Destroy() error {
83+
return nil
84+
}
85+
86+
func (m *mockIntelRdtManager) GetPaths() map[string]string {
87+
paths := make(map[string]string)
88+
paths["intelrdt"] = m.path
89+
return paths
90+
}
91+
92+
func (m *mockIntelRdtManager) Freeze(state configs.FreezerState) error {
93+
return nil
94+
}
95+
5496
type mockProcess struct {
5597
_pid int
5698
started string
@@ -121,6 +163,14 @@ func TestGetContainerStats(t *testing.T) {
121163
},
122164
},
123165
}
166+
container.resourceManagers["intelrdt"] = &mockIntelRdtManager{
167+
pids: []int{1, 2, 3},
168+
stats: &intelrdt.Stats{
169+
IntelRdtStats: intelrdt.IntelRdtStats{
170+
L3CacheSchema: "L3:0=ffff0;1=fff00",
171+
},
172+
},
173+
}
124174
stats, err := container.Stats()
125175
if err != nil {
126176
t.Fatal(err)
@@ -131,13 +181,22 @@ func TestGetContainerStats(t *testing.T) {
131181
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
132182
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
133183
}
184+
if intelrdt.IsIntelRdtEnabled() {
185+
if stats.IntelRdtStats == nil {
186+
t.Fatal("intel rdt stats are nil")
187+
}
188+
if stats.IntelRdtStats.IntelRdtStats.L3CacheSchema != "L3:0=ffff0;1=fff00" {
189+
t.Fatalf("expected L3CacheSchema L3:0=ffff0;1=fff00 but recevied %s", stats.IntelRdtStats.IntelRdtStats.L3CacheSchema)
190+
}
191+
}
134192
}
135193

136194
func TestGetContainerState(t *testing.T) {
137195
var (
138-
pid = os.Getpid()
139-
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
140-
expectedNetworkPath = "/networks/fd"
196+
pid = os.Getpid()
197+
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
198+
expectedNetworkPath = "/networks/fd"
199+
expectedIntelRdtPath = "/sys/fs/resctrl/myid"
141200
)
142201
container := &linuxContainer{
143202
id: "myid",
@@ -170,6 +229,15 @@ func TestGetContainerState(t *testing.T) {
170229
"memory": expectedMemoryPath,
171230
},
172231
}
232+
container.resourceManagers["intelrdt"] = &mockIntelRdtManager{
233+
pids: []int{1, 2, 3},
234+
stats: &intelrdt.Stats{
235+
IntelRdtStats: intelrdt.IntelRdtStats{
236+
L3CacheSchema: "L3:0=ffff0;1=fff00",
237+
},
238+
},
239+
path: expectedIntelRdtPath,
240+
}
173241
container.state = &createdState{c: container}
174242
state, err := container.State()
175243
if err != nil {
@@ -188,6 +256,15 @@ func TestGetContainerState(t *testing.T) {
188256
if memPath := paths["memory"]; memPath != expectedMemoryPath {
189257
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
190258
}
259+
if intelrdt.IsIntelRdtEnabled() {
260+
path := state.IntelRdtPath
261+
if path == "" {
262+
t.Fatal("intel rdt path should not be empty")
263+
}
264+
if intelRdtPath := path; intelRdtPath != expectedIntelRdtPath {
265+
t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath)
266+
}
267+
}
191268
for _, ns := range container.config.Namespaces {
192269
path := state.NamespacePaths[ns.Type]
193270
if path == "" {

libcontainer/factory_linux.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
2020
"github.com/opencontainers/runc/libcontainer/configs"
2121
"github.com/opencontainers/runc/libcontainer/configs/validate"
22+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2223
"github.com/opencontainers/runc/libcontainer/resourcemanager"
2324
"github.com/opencontainers/runc/libcontainer/utils"
2425
)
@@ -89,6 +90,19 @@ func RootlessCgroups(l *LinuxFactory) error {
8990
return nil
9091
}
9192

93+
// IntelRdtfs is an options func to configure a LinuxFactory to return
94+
// containers that use the Intel RDT "resource control" filesystem to
95+
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
96+
func IntelRdtFs(l *LinuxFactory) error {
97+
l.NewIntelRdtManager = func(config *configs.Config, id string) intelrdt.Manager {
98+
return &intelrdt.IntelRdtManager{
99+
Config: config,
100+
Id: id,
101+
}
102+
}
103+
return nil
104+
}
105+
92106
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
93107
func TmpfsRoot(l *LinuxFactory) error {
94108
mounted, err := mount.Mounted(l.Root)
@@ -153,6 +167,9 @@ type LinuxFactory struct {
153167

154168
// NewCgroupsManager returns an initialized cgroups manager for a single container.
155169
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
170+
171+
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
172+
NewIntelRdtManager func(config *configs.Config, id string) intelrdt.Manager
156173
}
157174

158175
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@@ -197,6 +214,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
197214
}
198215
resourceManagers := make(map[string]resourcemanager.ResourceManager)
199216
resourceManagers["cgroups"] = l.NewCgroupsManager(config.Cgroups, nil)
217+
if intelrdt.IsIntelRdtEnabled() {
218+
resourceManagers["intelrdt"] = l.NewIntelRdtManager(config, id)
219+
}
200220
c.resourceManagers = resourceManagers
201221
c.state = &stoppedState{c: c}
202222
return c, nil
@@ -232,6 +252,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
232252
}
233253
resourceManagers := make(map[string]resourcemanager.ResourceManager)
234254
resourceManagers["cgroups"] = l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths)
255+
if intelrdt.IsIntelRdtEnabled() {
256+
resourceManagers["intelrdt"] = l.NewIntelRdtManager(&state.Config, id)
257+
}
235258
c.resourceManagers = resourceManagers
236259
c.state = &loadedState{c: c}
237260
if err := c.refreshState(); err != nil {

libcontainer/factory_linux_test.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,32 @@ func TestFactoryNew(t *testing.T) {
4949
}
5050
}
5151

52+
func TestFactoryNewIntelRdt(t *testing.T) {
53+
root, rerr := newTestRoot()
54+
if rerr != nil {
55+
t.Fatal(rerr)
56+
}
57+
defer os.RemoveAll(root)
58+
factory, err := New(root, Cgroupfs, IntelRdtFs)
59+
if err != nil {
60+
t.Fatal(err)
61+
}
62+
if factory == nil {
63+
t.Fatal("factory should not be nil")
64+
}
65+
lfactory, ok := factory.(*LinuxFactory)
66+
if !ok {
67+
t.Fatal("expected linux factory returned on linux based systems")
68+
}
69+
if lfactory.Root != root {
70+
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
71+
}
72+
73+
if factory.Type() != "libcontainer" {
74+
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
75+
}
76+
}
77+
5278
func TestFactoryNewTmpfs(t *testing.T) {
5379
root, rerr := newTestRoot()
5480
if rerr != nil {
@@ -163,7 +189,7 @@ func TestFactoryLoadContainer(t *testing.T) {
163189
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
164190
t.Fatal(err)
165191
}
166-
factory, err := New(root, Cgroupfs)
192+
factory, err := New(root, Cgroupfs, IntelRdtFs)
167193
if err != nil {
168194
t.Fatal(err)
169195
}

0 commit comments

Comments
 (0)