Skip to content

Commit 692f6e1

Browse files
committed
libcontainer: add support for Intel RDT/CAT in runc
About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Intel Resource Director Technology (RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which currently supports L3 cache resource allocation. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual. About Intel RDT/CAT kernel interface: In Linux 4.10 kernel or newer, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | |-- cbm_mask | |-- min_cbm_bits | |-- num_closids |-- cpus |-- schemata |-- tasks |-- <container_id> |-- cpus |-- schemata |-- tasks For runc, we can make use of `tasks` and `schemata` configuration for L3 cache resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. If a pid is not in any sub group, it Is in root group. The file `schemata` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a group should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. For more information about Intel RDT/CAT kernel interface: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt An example for runc: Consider a two-socket machine with two L3 caches where the default CBM is 0xfffff and the max CBM length is 20 bits. With this configuration, tasks inside the container only have access to the "upper" 80% of L3 cache id 0 and the "lower" 50% L3 cache id 1: "linux": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=3ff" } } Signed-off-by: Xiaochen Shen <[email protected]>
1 parent af3b0d9 commit 692f6e1

File tree

13 files changed

+757
-26
lines changed

13 files changed

+757
-26
lines changed

events.go

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/opencontainers/runc/libcontainer"
1313
"github.com/opencontainers/runc/libcontainer/cgroups"
14+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1415

1516
"github.com/sirupsen/logrus"
1617
"github.com/urfave/cli"
@@ -25,11 +26,12 @@ type event struct {
2526

2627
// stats is the runc specific stats structure for stability when encoding and decoding stats.
2728
type stats struct {
28-
CPU cpu `json:"cpu"`
29-
Memory memory `json:"memory"`
30-
Pids pids `json:"pids"`
31-
Blkio blkio `json:"blkio"`
32-
Hugetlb map[string]hugetlb `json:"hugetlb"`
29+
CPU cpu `json:"cpu"`
30+
Memory memory `json:"memory"`
31+
Pids pids `json:"pids"`
32+
Blkio blkio `json:"blkio"`
33+
Hugetlb map[string]hugetlb `json:"hugetlb"`
34+
IntelRdt intelRdt `json:"intel_rdt"`
3335
}
3436

3537
type hugetlb struct {
@@ -96,6 +98,23 @@ type memory struct {
9698
Raw map[string]uint64 `json:"raw,omitempty"`
9799
}
98100

101+
type l3CacheInfo struct {
102+
CbmMask string `json:"cbm_mask,omitempty"`
103+
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
104+
NumClosids uint64 `json:"num_closids,omitempty"`
105+
}
106+
107+
type intelRdt struct {
108+
// The read-only L3 cache information
109+
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
110+
111+
// The read-only L3 cache schema in root
112+
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
113+
114+
// The L3 cache schema in 'container_id' group
115+
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
116+
}
117+
99118
var eventsCommand = cli.Command{
100119
Name: "events",
101120
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@@ -227,6 +246,13 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
227246
for k, v := range cg.HugetlbStats {
228247
s.Hugetlb[k] = convertHugtlb(v)
229248
}
249+
250+
if is := ls.IntelRdtStats; is != nil {
251+
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
252+
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
253+
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
254+
}
255+
230256
return &s
231257
}
232258

@@ -259,3 +285,11 @@ func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
259285
}
260286
return out
261287
}
288+
289+
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
290+
return &l3CacheInfo{
291+
CbmMask: i.CbmMask,
292+
MinCbmBits: i.MinCbmBits,
293+
NumClosids: i.NumClosids,
294+
}
295+
}

libcontainer/configs/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ type Config struct {
187187

188188
// Rootless specifies whether the container is a rootless container.
189189
Rootless bool `json:"rootless"`
190+
191+
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
192+
// to limit the resources (e.g., L3 cache) the container has available
193+
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
190194
}
191195

192196
type Hooks struct {

libcontainer/configs/intelrdt.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package configs
2+
3+
type IntelRdt struct {
4+
// The schema for L3 cache id and capacity bitmask (CBM)
5+
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
6+
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
7+
}

libcontainer/configs/validate/validator.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"strings"
88

99
"github.com/opencontainers/runc/libcontainer/configs"
10+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1011
selinux "github.com/opencontainers/selinux/go-selinux"
1112
)
1213

@@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
4041
if err := v.sysctl(config); err != nil {
4142
return err
4243
}
44+
if err := v.intelrdt(config); err != nil {
45+
return err
46+
}
4347
if config.Rootless {
4448
if err := v.rootless(config); err != nil {
4549
return err
@@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
153157
return nil
154158
}
155159

160+
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
161+
if config.IntelRdt != nil {
162+
if !intelrdt.IsIntelRdtEnabled() {
163+
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
164+
}
165+
if config.IntelRdt.L3CacheSchema == "" {
166+
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
167+
}
168+
}
169+
170+
return nil
171+
}
172+
156173
func isSymbolicLink(path string) (bool, error) {
157174
fi, err := os.Lstat(path)
158175
if err != nil {

libcontainer/container_linux.go

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/opencontainers/runc/libcontainer/cgroups"
2222
"github.com/opencontainers/runc/libcontainer/configs"
2323
"github.com/opencontainers/runc/libcontainer/criurpc"
24+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2425
"github.com/opencontainers/runc/libcontainer/system"
2526
"github.com/opencontainers/runc/libcontainer/utils"
2627

@@ -38,6 +39,7 @@ type linuxContainer struct {
3839
root string
3940
config *configs.Config
4041
cgroupManager cgroups.Manager
42+
intelRdtManager intelrdt.Manager
4143
initArgs []string
4244
initProcess parentProcess
4345
initProcessStartTime uint64
@@ -67,6 +69,9 @@ type State struct {
6769

6870
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
6971
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
72+
73+
// Intel RDT "resource control" filesystem path
74+
IntelRdtPath string `json:"intel_rdt_path"`
7075
}
7176

7277
// Container is a libcontainer container object.
@@ -163,6 +168,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
163168
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
164169
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
165170
}
171+
if c.intelRdtManager != nil {
172+
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
173+
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
174+
}
175+
}
166176
for _, iface := range c.config.Networks {
167177
switch iface.Type {
168178
case "veth":
@@ -193,6 +203,15 @@ func (c *linuxContainer) Set(config configs.Config) error {
193203
}
194204
return err
195205
}
206+
if c.intelRdtManager != nil {
207+
if err := c.intelRdtManager.Set(&config); err != nil {
208+
// Set configs back
209+
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
210+
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
211+
}
212+
return err
213+
}
214+
}
196215
// After config setting succeed, update config and states
197216
c.config = &config
198217
_, err = c.updateState(nil)
@@ -434,15 +453,16 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
434453
return nil, err
435454
}
436455
return &initProcess{
437-
cmd: cmd,
438-
childPipe: childPipe,
439-
parentPipe: parentPipe,
440-
manager: c.cgroupManager,
441-
config: c.newInitConfig(p),
442-
container: c,
443-
process: p,
444-
bootstrapData: data,
445-
sharePidns: sharePidns,
456+
cmd: cmd,
457+
childPipe: childPipe,
458+
parentPipe: parentPipe,
459+
manager: c.cgroupManager,
460+
intelRdtManager: c.intelRdtManager,
461+
config: c.newInitConfig(p),
462+
container: c,
463+
process: p,
464+
bootstrapData: data,
465+
sharePidns: sharePidns,
446466
}, nil
447467
}
448468

@@ -461,6 +481,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
461481
return &setnsProcess{
462482
cmd: cmd,
463483
cgroupPaths: c.cgroupManager.GetPaths(),
484+
intelRdtPath: state.IntelRdtPath,
464485
childPipe: childPipe,
465486
parentPipe: parentPipe,
466487
config: c.newInitConfig(p),
@@ -1519,6 +1540,10 @@ func (c *linuxContainer) currentState() (*State, error) {
15191540
startTime, _ = c.initProcess.startTime()
15201541
externalDescriptors = c.initProcess.externalDescriptors()
15211542
}
1543+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1544+
if err != nil {
1545+
intelRdtPath = ""
1546+
}
15221547
state := &State{
15231548
BaseState: BaseState{
15241549
ID: c.ID(),
@@ -1529,6 +1554,7 @@ func (c *linuxContainer) currentState() (*State, error) {
15291554
},
15301555
Rootless: c.config.Rootless,
15311556
CgroupPaths: c.cgroupManager.GetPaths(),
1557+
IntelRdtPath: intelRdtPath,
15321558
NamespacePaths: make(map[configs.NamespaceType]string),
15331559
ExternalDescriptors: externalDescriptors,
15341560
}

libcontainer/factory_linux.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
1919
"github.com/opencontainers/runc/libcontainer/configs"
2020
"github.com/opencontainers/runc/libcontainer/configs/validate"
21+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2122
"github.com/opencontainers/runc/libcontainer/utils"
2223

2324
"golang.org/x/sys/unix"
@@ -86,6 +87,20 @@ func RootlessCgroups(l *LinuxFactory) error {
8687
return nil
8788
}
8889

90+
// IntelRdtfs is an options func to configure a LinuxFactory to return
91+
// containers that use the Intel RDT "resource control" filesystem to
92+
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
93+
func IntelRdtFs(l *LinuxFactory) error {
94+
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
95+
return &intelrdt.IntelRdtManager{
96+
Config: config,
97+
Id: id,
98+
Path: path,
99+
}
100+
}
101+
return nil
102+
}
103+
89104
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
90105
func TmpfsRoot(l *LinuxFactory) error {
91106
mounted, err := mount.Mounted(l.Root)
@@ -150,6 +165,9 @@ type LinuxFactory struct {
150165

151166
// NewCgroupsManager returns an initialized cgroups manager for a single container.
152167
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
168+
169+
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
170+
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
153171
}
154172

155173
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@@ -185,6 +203,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
185203
criuPath: l.CriuPath,
186204
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
187205
}
206+
c.intelRdtManager = nil
207+
if intelrdt.IsIntelRdtEnabled() && c.config.IntelRdt != nil {
208+
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
209+
}
188210
c.state = &stoppedState{c: c}
189211
return c, nil
190212
}
@@ -222,6 +244,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
222244
if err := c.refreshState(); err != nil {
223245
return nil, err
224246
}
247+
c.intelRdtManager = nil
248+
if intelrdt.IsIntelRdtEnabled() && c.config.IntelRdt != nil {
249+
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
250+
}
225251
return c, nil
226252
}
227253

0 commit comments

Comments
 (0)