Skip to content

Commit 5274430

Browse files
author
Mrunal Patel
authored
Merge pull request #1279 from xiaochenshen/rdt-cat-resource-manager-v1
libcontainer: add support for Intel RDT/CAT in runc
2 parents aea4f21 + 4d2756c commit 5274430

18 files changed

+1039
-30
lines changed

events.go

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
"github.com/opencontainers/runc/libcontainer"
1313
"github.com/opencontainers/runc/libcontainer/cgroups"
14+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1415

1516
"github.com/sirupsen/logrus"
1617
"github.com/urfave/cli"
@@ -25,11 +26,12 @@ type event struct {
2526

2627
// stats is the runc specific stats structure for stability when encoding and decoding stats.
2728
type stats struct {
28-
CPU cpu `json:"cpu"`
29-
Memory memory `json:"memory"`
30-
Pids pids `json:"pids"`
31-
Blkio blkio `json:"blkio"`
32-
Hugetlb map[string]hugetlb `json:"hugetlb"`
29+
CPU cpu `json:"cpu"`
30+
Memory memory `json:"memory"`
31+
Pids pids `json:"pids"`
32+
Blkio blkio `json:"blkio"`
33+
Hugetlb map[string]hugetlb `json:"hugetlb"`
34+
IntelRdt intelRdt `json:"intel_rdt"`
3335
}
3436

3537
type hugetlb struct {
@@ -96,6 +98,23 @@ type memory struct {
9698
Raw map[string]uint64 `json:"raw,omitempty"`
9799
}
98100

101+
type l3CacheInfo struct {
102+
CbmMask string `json:"cbm_mask,omitempty"`
103+
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
104+
NumClosids uint64 `json:"num_closids,omitempty"`
105+
}
106+
107+
type intelRdt struct {
108+
// The read-only L3 cache information
109+
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
110+
111+
// The read-only L3 cache schema in root
112+
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
113+
114+
// The L3 cache schema in 'container_id' group
115+
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
116+
}
117+
99118
var eventsCommand = cli.Command{
100119
Name: "events",
101120
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@@ -227,6 +246,13 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
227246
for k, v := range cg.HugetlbStats {
228247
s.Hugetlb[k] = convertHugtlb(v)
229248
}
249+
250+
if is := ls.IntelRdtStats; is != nil {
251+
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
252+
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
253+
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
254+
}
255+
230256
return &s
231257
}
232258

@@ -259,3 +285,11 @@ func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
259285
}
260286
return out
261287
}
288+
289+
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
290+
return &l3CacheInfo{
291+
CbmMask: i.CbmMask,
292+
MinCbmBits: i.MinCbmBits,
293+
NumClosids: i.NumClosids,
294+
}
295+
}

libcontainer/SPEC.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,90 @@ that no processes or threads escape the cgroups. This sync is
154154
done via a pipe ( specified in the runtime section below ) that the container's
155155
init process will block waiting for the parent to finish setup.
156156

157+
### IntelRdt
158+
159+
Intel platforms with new Xeon CPU support Intel Resource Director Technology
160+
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
161+
currently supports L3 cache resource allocation.
162+
163+
This feature provides a way for the software to restrict cache allocation to a
164+
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
165+
The different subsets are identified by class of service (CLOS) and each CLOS
166+
has a capacity bitmask (CBM).
167+
168+
It can be used to handle L3 cache resource allocation for containers if
169+
hardware and kernel support Intel RDT/CAT.
170+
171+
In Linux 4.10 kernel or newer, the interface is defined and exposed via
172+
"resource control" filesystem, which is a "cgroup-like" interface.
173+
174+
Comparing with cgroups, it has similar process management lifecycle and
175+
interfaces in a container. But unlike cgroups' hierarchy, it has single level
176+
filesystem layout.
177+
178+
Intel RDT "resource control" filesystem hierarchy:
179+
```
180+
mount -t resctrl resctrl /sys/fs/resctrl
181+
tree /sys/fs/resctrl
182+
/sys/fs/resctrl/
183+
|-- info
184+
| |-- L3
185+
| |-- cbm_mask
186+
| |-- min_cbm_bits
187+
| |-- num_closids
188+
|-- cpus
189+
|-- schemata
190+
|-- tasks
191+
|-- <container_id>
192+
|-- cpus
193+
|-- schemata
194+
|-- tasks
195+
196+
```
197+
198+
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
199+
resource constraints.
200+
201+
The file `tasks` has a list of tasks that belongs to this group (e.g.,
202+
<container_id>" group). Tasks can be added to a group by writing the task ID
203+
to the "tasks" file (which will automatically remove them from the previous
204+
group to which they belonged). New tasks created by fork(2) and clone(2) are
205+
added to the same group as their parent. If a pid is not in any sub group, it
206+
is in root group.
207+
208+
The file `schemata` has allocation masks/values for L3 cache on each socket,
209+
which contains L3 cache id and capacity bitmask (CBM).
210+
```
211+
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
212+
```
213+
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
214+
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
215+
216+
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
217+
be set is less than the max bit. The max bits in the CBM is varied among
218+
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
219+
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
220+
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
221+
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
222+
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
223+
224+
For more information about Intel RDT/CAT kernel interface:
225+
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
226+
227+
An example for runc:
228+
```
229+
Consider a two-socket machine with two L3 caches where the default CBM is
230+
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
231+
inside the container only have access to the "upper" 80% of L3 cache id 0 and
232+
the "lower" 50% L3 cache id 1:
233+
234+
"linux": {
235+
"intelRdt": {
236+
"l3CacheSchema": "L3:0=ffff0;1=3ff"
237+
}
238+
}
239+
```
240+
157241
### Security
158242

159243
The standard set of Linux capabilities that are set in a container

libcontainer/configs/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ type Config struct {
187187

188188
// Rootless specifies whether the container is a rootless container.
189189
Rootless bool `json:"rootless"`
190+
191+
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
192+
// to limit the resources (e.g., L3 cache) the container has available
193+
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
190194
}
191195

192196
type Hooks struct {

libcontainer/configs/intelrdt.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package configs
2+
3+
type IntelRdt struct {
4+
// The schema for L3 cache id and capacity bitmask (CBM)
5+
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
6+
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
7+
}

libcontainer/configs/validate/validator.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"strings"
88

99
"github.com/opencontainers/runc/libcontainer/configs"
10+
"github.com/opencontainers/runc/libcontainer/intelrdt"
1011
selinux "github.com/opencontainers/selinux/go-selinux"
1112
)
1213

@@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
4041
if err := v.sysctl(config); err != nil {
4142
return err
4243
}
44+
if err := v.intelrdt(config); err != nil {
45+
return err
46+
}
4347
if config.Rootless {
4448
if err := v.rootless(config); err != nil {
4549
return err
@@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
153157
return nil
154158
}
155159

160+
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
161+
if config.IntelRdt != nil {
162+
if !intelrdt.IsIntelRdtEnabled() {
163+
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
164+
}
165+
if config.IntelRdt.L3CacheSchema == "" {
166+
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
167+
}
168+
}
169+
170+
return nil
171+
}
172+
156173
func isSymbolicLink(path string) (bool, error) {
157174
fi, err := os.Lstat(path)
158175
if err != nil {

libcontainer/container_linux.go

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/opencontainers/runc/libcontainer/cgroups"
2222
"github.com/opencontainers/runc/libcontainer/configs"
2323
"github.com/opencontainers/runc/libcontainer/criurpc"
24+
"github.com/opencontainers/runc/libcontainer/intelrdt"
2425
"github.com/opencontainers/runc/libcontainer/system"
2526
"github.com/opencontainers/runc/libcontainer/utils"
2627

@@ -38,6 +39,7 @@ type linuxContainer struct {
3839
root string
3940
config *configs.Config
4041
cgroupManager cgroups.Manager
42+
intelRdtManager intelrdt.Manager
4143
initArgs []string
4244
initProcess parentProcess
4345
initProcessStartTime uint64
@@ -67,6 +69,9 @@ type State struct {
6769

6870
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
6971
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
72+
73+
// Intel RDT "resource control" filesystem path
74+
IntelRdtPath string `json:"intel_rdt_path"`
7075
}
7176

7277
// Container is a libcontainer container object.
@@ -163,6 +168,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
163168
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
164169
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
165170
}
171+
if c.intelRdtManager != nil {
172+
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
173+
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
174+
}
175+
}
166176
for _, iface := range c.config.Networks {
167177
switch iface.Type {
168178
case "veth":
@@ -193,6 +203,15 @@ func (c *linuxContainer) Set(config configs.Config) error {
193203
}
194204
return err
195205
}
206+
if c.intelRdtManager != nil {
207+
if err := c.intelRdtManager.Set(&config); err != nil {
208+
// Set configs back
209+
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
210+
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
211+
}
212+
return err
213+
}
214+
}
196215
// After config setting succeed, update config and states
197216
c.config = &config
198217
_, err = c.updateState(nil)
@@ -434,15 +453,16 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
434453
return nil, err
435454
}
436455
return &initProcess{
437-
cmd: cmd,
438-
childPipe: childPipe,
439-
parentPipe: parentPipe,
440-
manager: c.cgroupManager,
441-
config: c.newInitConfig(p),
442-
container: c,
443-
process: p,
444-
bootstrapData: data,
445-
sharePidns: sharePidns,
456+
cmd: cmd,
457+
childPipe: childPipe,
458+
parentPipe: parentPipe,
459+
manager: c.cgroupManager,
460+
intelRdtManager: c.intelRdtManager,
461+
config: c.newInitConfig(p),
462+
container: c,
463+
process: p,
464+
bootstrapData: data,
465+
sharePidns: sharePidns,
446466
}, nil
447467
}
448468

@@ -461,6 +481,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
461481
return &setnsProcess{
462482
cmd: cmd,
463483
cgroupPaths: c.cgroupManager.GetPaths(),
484+
intelRdtPath: state.IntelRdtPath,
464485
childPipe: childPipe,
465486
parentPipe: parentPipe,
466487
config: c.newInitConfig(p),
@@ -1519,6 +1540,10 @@ func (c *linuxContainer) currentState() (*State, error) {
15191540
startTime, _ = c.initProcess.startTime()
15201541
externalDescriptors = c.initProcess.externalDescriptors()
15211542
}
1543+
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1544+
if err != nil {
1545+
intelRdtPath = ""
1546+
}
15221547
state := &State{
15231548
BaseState: BaseState{
15241549
ID: c.ID(),
@@ -1529,6 +1554,7 @@ func (c *linuxContainer) currentState() (*State, error) {
15291554
},
15301555
Rootless: c.config.Rootless,
15311556
CgroupPaths: c.cgroupManager.GetPaths(),
1557+
IntelRdtPath: intelRdtPath,
15321558
NamespacePaths: make(map[configs.NamespaceType]string),
15331559
ExternalDescriptors: externalDescriptors,
15341560
}

0 commit comments

Comments
 (0)