Skip to content

Commit 2d4d43f

Browse files
committed
libcontainer: add support for Intel RDT/CAT in runc
About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Intel Resource Director Technology (RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which currently supports L3 cache resource allocation. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual. About Intel RDT/CAT kernel interface: In Linux kernel, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | |-- cbm_mask | |-- min_cbm_bits | |-- num_closids |-- cpus |-- schemata |-- tasks |-- <container_id> |-- cpus |-- schemata |-- tasks For runc, we can make use of `tasks` and `schemata` configuration for L3 cache resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. If a pid is not in any sub group, it Is in root group. The file `schemata` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a group should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. For more information about Intel RDT/CAT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: There are two L3 caches in the two-socket machine, the default CBM is 0xfffff and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache id 0 and the whole L3 cache id 1 for the container: "linux": { "resources": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=fffff" } } } Signed-off-by: Xiaochen Shen <[email protected]>
1 parent 4cc6759 commit 2d4d43f

File tree

14 files changed

+890
-11
lines changed

14 files changed

+890
-11
lines changed

events.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ type event struct {
2424

2525
// stats is the runc specific stats structure for stability when encoding and decoding stats.
2626
type stats struct {
27-
CPU cpu `json:"cpu"`
28-
Memory memory `json:"memory"`
29-
Pids pids `json:"pids"`
30-
Blkio blkio `json:"blkio"`
31-
Hugetlb map[string]hugetlb `json:"hugetlb"`
27+
CPU cpu `json:"cpu"`
28+
Memory memory `json:"memory"`
29+
Pids pids `json:"pids"`
30+
Blkio blkio `json:"blkio"`
31+
Hugetlb map[string]hugetlb `json:"hugetlb"`
32+
IntelRdt intelRdt `json:"intelRdt"`
3233
}
3334

3435
type hugetlb struct {
@@ -95,6 +96,12 @@ type memory struct {
9596
Raw map[string]uint64 `json:"raw,omitempty"`
9697
}
9798

99+
type intelRdt struct {
100+
// The read-only default "schema" in root, for reference
101+
L3CacheSchemaRoot string `json:"l3CacheSchemaRoot,omitempty"`
102+
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
103+
}
104+
98105
var eventsCommand = cli.Command{
99106
Name: "events",
100107
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@@ -226,6 +233,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
226233
for k, v := range cg.HugetlbStats {
227234
s.Hugetlb[k] = convertHugtlb(v)
228235
}
236+
237+
is := ls.IntelRdtStats
238+
if is == nil {
239+
return &s
240+
}
241+
s.IntelRdt.L3CacheSchemaRoot = is.IntelRdtRootStats.L3CacheSchema
242+
s.IntelRdt.L3CacheSchema = is.IntelRdtStats.L3CacheSchema
243+
229244
return &s
230245
}
231246

libcontainer/configs/cgroup_unix.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,8 @@ type Resources struct {
121121

122122
// Set class identifier for container's network packets
123123
NetClsClassid uint32 `json:"net_cls_classid_u"`
124+
125+
// Intel RDT: the schema for L3 cache id and capacity bitmask (CBM)
126+
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
127+
IntelRdtL3CacheSchema string `json:"intel_rdt_l3_cache_schema"`
124128
}

libcontainer/container_linux.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/opencontainers/runc/libcontainer/cgroups"
2323
"github.com/opencontainers/runc/libcontainer/configs"
2424
"github.com/opencontainers/runc/libcontainer/criurpc"
25+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2526
"github.com/opencontainers/runc/libcontainer/resourcemanager"
2627
"github.com/opencontainers/runc/libcontainer/system"
2728
"github.com/opencontainers/runc/libcontainer/utils"
@@ -62,6 +63,9 @@ type State struct {
6263

6364
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
6465
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
66+
67+
// Intel RDT "resource control" filesystem path
68+
IntelRdtPath string `json:"intel_rdt_path"`
6569
}
6670

6771
// Container is a libcontainer container object.
@@ -160,6 +164,13 @@ func (c *linuxContainer) Stats() (*Stats, error) {
160164
if err != nil {
161165
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
162166
}
167+
if intelRdtManager, ok := c.resourceManagers["intelrdt"]; ok == true {
168+
intelRdtStats, err := intelRdtManager.GetStats()
169+
if err != nil {
170+
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
171+
}
172+
stats.IntelRdtStats = intelRdtStats.(*intelrdt.Stats)
173+
}
163174
for _, iface := range c.config.Networks {
164175
switch iface.Type {
165176
case "veth":
@@ -430,11 +441,16 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
430441
if err != nil {
431442
return nil, err
432443
}
444+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
445+
if err != nil {
446+
intelRdtPath = ""
447+
}
433448
// TODO: set on container for process management
434449
p.consoleChan = make(chan *os.File, 1)
435450
return &setnsProcess{
436451
cmd: cmd,
437452
cgroupPaths: c.resourceManagers["cgroups"].GetPaths(),
453+
intelRdtPath: intelRdtPath,
438454
childPipe: childPipe,
439455
parentPipe: parentPipe,
440456
config: c.newInitConfig(p),
@@ -1328,6 +1344,10 @@ func (c *linuxContainer) currentState() (*State, error) {
13281344
startTime, _ = c.initProcess.startTime()
13291345
externalDescriptors = c.initProcess.externalDescriptors()
13301346
}
1347+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1348+
if err != nil {
1349+
intelRdtPath = ""
1350+
}
13311351
state := &State{
13321352
BaseState: BaseState{
13331353
ID: c.ID(),
@@ -1337,6 +1357,7 @@ func (c *linuxContainer) currentState() (*State, error) {
13371357
Created: c.created,
13381358
},
13391359
CgroupPaths: c.resourceManagers["cgroups"].GetPaths(),
1360+
IntelRdtPath: intelRdtPath,
13401361
NamespacePaths: make(map[configs.NamespaceType]string),
13411362
ExternalDescriptors: externalDescriptors,
13421363
}

libcontainer/container_linux_test.go

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"github.com/opencontainers/runc/libcontainer/cgroups"
1111
"github.com/opencontainers/runc/libcontainer/configs"
12+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1213
"github.com/opencontainers/runc/libcontainer/resourcemanager"
1314
)
1415

@@ -19,6 +20,13 @@ type mockCgroupManager struct {
1920
paths map[string]string
2021
}
2122

23+
type mockIntelRdtManager struct {
24+
pids []int
25+
allPids []int
26+
stats *intelrdt.Stats
27+
path string
28+
}
29+
2230
func (m *mockCgroupManager) GetPids() ([]int, error) {
2331
return m.pids, nil
2432
}
@@ -51,6 +59,40 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
5159
return nil
5260
}
5361

62+
func (m *mockIntelRdtManager) GetPids() ([]int, error) {
63+
return m.pids, nil
64+
}
65+
66+
func (m *mockIntelRdtManager) GetAllPids() ([]int, error) {
67+
return m.allPids, nil
68+
}
69+
70+
func (m *mockIntelRdtManager) GetStats() (interface{}, error) {
71+
return m.stats, nil
72+
}
73+
74+
func (m *mockIntelRdtManager) Apply(pid int) error {
75+
return nil
76+
}
77+
78+
func (m *mockIntelRdtManager) Set(container *configs.Config) error {
79+
return nil
80+
}
81+
82+
func (m *mockIntelRdtManager) Destroy() error {
83+
return nil
84+
}
85+
86+
func (m *mockIntelRdtManager) GetPaths() map[string]string {
87+
paths := make(map[string]string)
88+
paths["intelrdt"] = m.path
89+
return paths
90+
}
91+
92+
func (m *mockIntelRdtManager) Freeze(state configs.FreezerState) error {
93+
return nil
94+
}
95+
5496
type mockProcess struct {
5597
_pid int
5698
started string
@@ -121,6 +163,14 @@ func TestGetContainerStats(t *testing.T) {
121163
},
122164
},
123165
}
166+
container.resourceManagers["intelrdt"] = &mockIntelRdtManager{
167+
pids: []int{1, 2, 3},
168+
stats: &intelrdt.Stats{
169+
IntelRdtStats: intelrdt.IntelRdtStats{
170+
L3CacheSchema: "L3:0=ffff0;1=fff00",
171+
},
172+
},
173+
}
124174
stats, err := container.Stats()
125175
if err != nil {
126176
t.Fatal(err)
@@ -131,13 +181,22 @@ func TestGetContainerStats(t *testing.T) {
131181
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
132182
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
133183
}
184+
if intelrdt.IsIntelRdtEnabled() {
185+
if stats.IntelRdtStats == nil {
186+
t.Fatal("intel rdt stats are nil")
187+
}
188+
if stats.IntelRdtStats.IntelRdtStats.L3CacheSchema != "L3:0=ffff0;1=fff00" {
189+
t.Fatalf("expected L3CacheSchema L3:0=ffff0;1=fff00 but recevied %s", stats.IntelRdtStats.IntelRdtStats.L3CacheSchema)
190+
}
191+
}
134192
}
135193

136194
func TestGetContainerState(t *testing.T) {
137195
var (
138-
pid = os.Getpid()
139-
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
140-
expectedNetworkPath = "/networks/fd"
196+
pid = os.Getpid()
197+
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
198+
expectedNetworkPath = "/networks/fd"
199+
expectedIntelRdtPath = "/sys/fs/resctrl/myid"
141200
)
142201
container := &linuxContainer{
143202
id: "myid",
@@ -170,6 +229,15 @@ func TestGetContainerState(t *testing.T) {
170229
"memory": expectedMemoryPath,
171230
},
172231
}
232+
container.resourceManagers["intelrdt"] = &mockIntelRdtManager{
233+
pids: []int{1, 2, 3},
234+
stats: &intelrdt.Stats{
235+
IntelRdtStats: intelrdt.IntelRdtStats{
236+
L3CacheSchema: "L3:0=ffff0;1=fff00",
237+
},
238+
},
239+
path: expectedIntelRdtPath,
240+
}
173241
container.state = &createdState{c: container}
174242
state, err := container.State()
175243
if err != nil {
@@ -188,6 +256,15 @@ func TestGetContainerState(t *testing.T) {
188256
if memPath := paths["memory"]; memPath != expectedMemoryPath {
189257
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
190258
}
259+
if intelrdt.IsIntelRdtEnabled() {
260+
path := state.IntelRdtPath
261+
if path == "" {
262+
t.Fatal("intel rdt path should not be empty")
263+
}
264+
if intelRdtPath := path; intelRdtPath != expectedIntelRdtPath {
265+
t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath)
266+
}
267+
}
191268
for _, ns := range container.config.Namespaces {
192269
path := state.NamespacePaths[ns.Type]
193270
if path == "" {

libcontainer/factory_linux.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
1919
"github.com/opencontainers/runc/libcontainer/configs"
2020
"github.com/opencontainers/runc/libcontainer/configs/validate"
21+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2122
"github.com/opencontainers/runc/libcontainer/resourcemanager"
2223
"github.com/opencontainers/runc/libcontainer/utils"
2324
)
@@ -74,6 +75,19 @@ func Cgroupfs(l *LinuxFactory) error {
7475
return nil
7576
}
7677

78+
// IntelRdtfs is an options func to configure a LinuxFactory to return
79+
// containers that use the Intel RDT "resource control" filesystem to
80+
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
81+
func IntelRdtFs(l *LinuxFactory) error {
82+
l.NewIntelRdtManager = func(config *configs.Config, id string) intelrdt.Manager {
83+
return &intelrdt.IntelRdtManager{
84+
Config: config,
85+
Id: id,
86+
}
87+
}
88+
return nil
89+
}
90+
7791
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
7892
func TmpfsRoot(l *LinuxFactory) error {
7993
mounted, err := mount.Mounted(l.Root)
@@ -138,6 +152,9 @@ type LinuxFactory struct {
138152

139153
// NewCgroupsManager returns an initialized cgroups manager for a single container.
140154
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
155+
156+
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
157+
NewIntelRdtManager func(config *configs.Config, id string) intelrdt.Manager
141158
}
142159

143160
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@@ -179,6 +196,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
179196
}
180197
resourceManagers := make(map[string]resourcemanager.ResourceManager)
181198
resourceManagers["cgroups"] = l.NewCgroupsManager(config.Cgroups, nil)
199+
if intelrdt.IsIntelRdtEnabled() {
200+
resourceManagers["intelrdt"] = l.NewIntelRdtManager(config, id)
201+
}
182202
c.resourceManagers = resourceManagers
183203
c.state = &stoppedState{c: c}
184204
return c, nil
@@ -210,6 +230,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
210230
}
211231
resourceManagers := make(map[string]resourcemanager.ResourceManager)
212232
resourceManagers["cgroups"] = l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths)
233+
if intelrdt.IsIntelRdtEnabled() {
234+
resourceManagers["intelrdt"] = l.NewIntelRdtManager(&state.Config, id)
235+
}
213236
c.resourceManagers = resourceManagers
214237
c.state = &loadedState{c: c}
215238
if err := c.refreshState(); err != nil {

libcontainer/factory_linux_test.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,32 @@ func TestFactoryNew(t *testing.T) {
4949
}
5050
}
5151

52+
func TestFactoryNewIntelRdt(t *testing.T) {
53+
root, rerr := newTestRoot()
54+
if rerr != nil {
55+
t.Fatal(rerr)
56+
}
57+
defer os.RemoveAll(root)
58+
factory, err := New(root, Cgroupfs, IntelRdtFs)
59+
if err != nil {
60+
t.Fatal(err)
61+
}
62+
if factory == nil {
63+
t.Fatal("factory should not be nil")
64+
}
65+
lfactory, ok := factory.(*LinuxFactory)
66+
if !ok {
67+
t.Fatal("expected linux factory returned on linux based systems")
68+
}
69+
if lfactory.Root != root {
70+
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
71+
}
72+
73+
if factory.Type() != "libcontainer" {
74+
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
75+
}
76+
}
77+
5278
func TestFactoryNewTmpfs(t *testing.T) {
5379
root, rerr := newTestRoot()
5480
if rerr != nil {
@@ -163,7 +189,7 @@ func TestFactoryLoadContainer(t *testing.T) {
163189
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
164190
t.Fatal(err)
165191
}
166-
factory, err := New(root, Cgroupfs)
192+
factory, err := New(root, Cgroupfs, IntelRdtFs)
167193
if err != nil {
168194
t.Fatal(err)
169195
}

0 commit comments

Comments
 (0)