Skip to content

Commit 12877b2

Browse files
committed
libcontainer: add support for Intel RDT/CAT in runc
About Intel RDT/CAT feature: Intel platforms with new Xeon CPU support Intel Resource Director Technology (RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which currently supports L3 cache resource allocation. This feature provides a way for the software to restrict cache allocation to a defined 'subset' of L3 cache which may be overlapping with other 'subsets'. The different subsets are identified by class of service (CLOS) and each CLOS has a capacity bitmask (CBM). For more information about Intel RDT/CAT can be found in the section 17.17 of Intel Software Developer Manual. About Intel RDT/CAT kernel interface: In Linux kernel, the interface is defined and exposed via "resource control" filesystem, which is a "cgroup-like" interface. Comparing with cgroups, it has similar process management lifecycle and interfaces in a container. But unlike cgroups' hierarchy, it has single level filesystem layout. Intel RDT "resource control" filesystem hierarchy: mount -t resctrl resctrl /sys/fs/resctrl tree /sys/fs/resctrl /sys/fs/resctrl/ |-- info | |-- L3 | |-- cbm_mask | |-- num_closids |-- cpus |-- schemata |-- tasks |-- <container_id> |-- cpus |-- schemata |-- tasks For runc, we can make use of `tasks` and `schemata` configuration for L3 cache resource constraints. The file `tasks` has a list of tasks that belongs to this group (e.g., <container_id>" group). Tasks can be added to a group by writing the task ID to the "tasks" file (which will automatically remove them from the previous group to which they belonged). New tasks created by fork(2) and clone(2) are added to the same group as their parent. If a pid is not in any sub group, it Is in root group. The file `schemata` has allocation bitmasks/values for L3 cache on each socket, which contains L3 cache id and capacity bitmask (CBM). Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..." For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. The valid L3 cache CBM is a *contiguous bits set* and number of bits that can be set is less than the max bit. The max bits in the CBM is varied among supported Intel Xeon platforms. In Intel RDT "resource control" filesystem layout, the CBM in a group should be a subset of the CBM in root. Kernel will check if it is valid when writing. e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. For more information about Intel RDT/CAT kernel interface: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=x86/cache&id=f20e57892806ad244eaec7a7ae365e78fee53377 An example for runc: There are two L3 caches in the two-socket machine, the default CBM is 0xfffff and the max CBM length is 20 bits. This configuration assigns 4/5 of L3 cache id 0 and the whole L3 cache id 1 for the container: "linux": { "resources": { "intelRdt": { "l3CacheSchema": "L3:0=ffff0;1=fffff" } } } Signed-off-by: Xiaochen Shen <[email protected]>
1 parent e6adcba commit 12877b2

File tree

15 files changed

+726
-66
lines changed

15 files changed

+726
-66
lines changed

events.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ type event struct {
2424

2525
// stats is the runc specific stats structure for stability when encoding and decoding stats.
2626
type stats struct {
27-
Cpu cpu `json:"cpu"`
28-
Memory memory `json:"memory"`
29-
Pids pids `json:"pids"`
30-
Blkio blkio `json:"blkio"`
31-
Hugetlb map[string]hugetlb `json:"hugetlb"`
27+
Cpu cpu `json:"cpu"`
28+
Memory memory `json:"memory"`
29+
Pids pids `json:"pids"`
30+
Blkio blkio `json:"blkio"`
31+
Hugetlb map[string]hugetlb `json:"hugetlb"`
32+
IntelRdt intelRdt `json:"intelRdt"`
3233
}
3334

3435
type hugetlb struct {
@@ -95,6 +96,12 @@ type memory struct {
9596
Raw map[string]uint64 `json:"raw,omitempty"`
9697
}
9798

99+
type intelRdt struct {
100+
// The read-only default "schemas" in root, for reference
101+
L3CacheSchemaRoot string `json:"l3CacheSchemaRoot,omitempty"`
102+
L3CacheSchema string `json:"l3CacheSchema,omitempty"`
103+
}
104+
98105
var eventsCommand = cli.Command{
99106
Name: "events",
100107
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@@ -223,6 +230,10 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
223230
for k, v := range cg.HugetlbStats {
224231
s.Hugetlb[k] = convertHugtlb(v)
225232
}
233+
234+
is := cg.IntelRdtStats
235+
s.IntelRdt.L3CacheSchemaRoot = is.IntelRdtRootStats.L3CacheSchema
236+
s.IntelRdt.L3CacheSchema = is.IntelRdtGroupStats.L3CacheSchema
226237
return &s
227238
}
228239

libcontainer/cgroups/cgroups.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ type Manager interface {
3939

4040
// Sets the cgroup as configured.
4141
Set(container *configs.Config) error
42+
43+
// Get non-cgroup resource path
44+
GetResourcePath() string
4245
}
4346

4447
type NotFoundError struct {

libcontainer/cgroups/fs/apply_raw.go

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var (
3131
&PerfEventGroup{},
3232
&FreezerGroup{},
3333
&NameGroup{GroupName: "name=systemd", Join: true},
34+
// If Intel RDT is enabled, will append IntelRdtGroup later
3435
}
3536
HugePageSizes, _ = cgroups.GetHugePageSize()
3637
)
@@ -62,9 +63,11 @@ type subsystem interface {
6263
}
6364

6465
type Manager struct {
65-
mu sync.Mutex
66-
Cgroups *configs.Cgroup
67-
Paths map[string]string
66+
mu sync.Mutex
67+
Cgroups *configs.Cgroup
68+
Paths map[string]string
69+
ContainerId string
70+
ResourcePath string
6871
}
6972

7073
// The absolute path to the root of the cgroup hierarchies.
@@ -94,10 +97,11 @@ func getCgroupRoot() (string, error) {
9497
}
9598

9699
type cgroupData struct {
97-
root string
98-
innerPath string
99-
config *configs.Cgroup
100-
pid int
100+
root string
101+
innerPath string
102+
config *configs.Cgroup
103+
pid int
104+
containerId string
101105
}
102106

103107
func (m *Manager) Apply(pid int) (err error) {
@@ -109,7 +113,7 @@ func (m *Manager) Apply(pid int) (err error) {
109113

110114
var c = m.Cgroups
111115

112-
d, err := getCgroupData(m.Cgroups, pid)
116+
d, err := getCgroupData(m.Cgroups, pid, m.ContainerId)
113117
if err != nil {
114118
return err
115119
}
@@ -131,23 +135,38 @@ func (m *Manager) Apply(pid int) (err error) {
131135
}
132136

133137
paths := make(map[string]string)
138+
139+
// If Intel RDT is enabled, append IntelRdtGroup to subsystems
140+
if IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" {
141+
subsystems = append(subsystems, &IntelRdtGroup{})
142+
intelRdtPath, err := GetIntelRdtPath(m.ContainerId)
143+
if err != nil {
144+
return err
145+
}
146+
m.ResourcePath = intelRdtPath
147+
}
148+
134149
for _, sys := range subsystems {
135150
if err := sys.Apply(d); err != nil {
136151
return err
137152
}
138-
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
139-
// create and join phase so that the cgroup hierarchy for a container can be
140-
// created then join consists of writing the process pids to cgroup.procs
141-
p, err := d.path(sys.Name())
142-
if err != nil {
143-
// The non-presence of the devices subsystem is
144-
// considered fatal for security reasons.
145-
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
146-
continue
153+
154+
// Intel RDT "resource control" filesystem is not in cgroup path
155+
if sys.Name() != "intel_rdt" {
156+
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
157+
// create and join phase so that the cgroup hierarchy for a container can be
158+
// created then join consists of writing the process pids to cgroup.procs
159+
p, err := d.path(sys.Name())
160+
if err != nil {
161+
// The non-presence of the devices subsystem is
162+
// considered fatal for security reasons.
163+
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
164+
continue
165+
}
166+
return err
147167
}
148-
return err
168+
paths[sys.Name()] = p
149169
}
150-
paths[sys.Name()] = p
151170
}
152171
m.Paths = paths
153172
return nil
@@ -163,6 +182,12 @@ func (m *Manager) Destroy() error {
163182
return err
164183
}
165184
m.Paths = make(map[string]string)
185+
186+
// Intel RDT "resource control" filesystem
187+
if m.ResourcePath != "" {
188+
return os.RemoveAll(m.ResourcePath)
189+
}
190+
m.ResourcePath = ""
166191
return nil
167192
}
168193

@@ -173,6 +198,13 @@ func (m *Manager) GetPaths() map[string]string {
173198
return paths
174199
}
175200

201+
func (m *Manager) GetResourcePath() string {
202+
m.mu.Lock()
203+
path := m.ResourcePath
204+
m.mu.Unlock()
205+
return path
206+
}
207+
176208
func (m *Manager) GetStats() (*cgroups.Stats, error) {
177209
m.mu.Lock()
178210
defer m.mu.Unlock()
@@ -186,6 +218,24 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
186218
return nil, err
187219
}
188220
}
221+
222+
// Intel RDT "resource control" filesystem stats
223+
if IsIntelRdtEnabled() && m.Cgroups.Resources.IntelRdtL3CacheSchema != "" {
224+
intelRdtPath, err := GetIntelRdtPath(m.ContainerId)
225+
if err != nil || !cgroups.PathExists(intelRdtPath) {
226+
return nil, err
227+
}
228+
sys, err := subsystems.Get("intel_rdt")
229+
if err == errSubsystemDoesNotExist {
230+
// In case IntelRdtGroup is not appended to subsystems
231+
subsystems = append(subsystems, &IntelRdtGroup{})
232+
}
233+
sys, _ = subsystems.Get("intel_rdt")
234+
if err := sys.GetStats(intelRdtPath, stats); err != nil {
235+
return nil, err
236+
}
237+
}
238+
189239
return stats, nil
190240
}
191241

@@ -199,6 +249,9 @@ func (m *Manager) Set(container *configs.Config) error {
199249
paths := m.GetPaths()
200250
for _, sys := range subsystems {
201251
path := paths[sys.Name()]
252+
if sys.Name() == "intel_rdt" {
253+
path = m.GetResourcePath()
254+
}
202255
if err := sys.Set(path, container.Cgroups); err != nil {
203256
return err
204257
}
@@ -241,7 +294,7 @@ func (m *Manager) GetAllPids() ([]int, error) {
241294
return cgroups.GetAllPids(paths["devices"])
242295
}
243296

244-
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
297+
func getCgroupData(c *configs.Cgroup, pid int, containerId string) (*cgroupData, error) {
245298
root, err := getCgroupRoot()
246299
if err != nil {
247300
return nil, err
@@ -262,10 +315,11 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
262315
}
263316

264317
return &cgroupData{
265-
root: root,
266-
innerPath: innerPath,
267-
config: c,
268-
pid: pid,
318+
root: root,
319+
innerPath: innerPath,
320+
config: c,
321+
pid: pid,
322+
containerId: containerId,
269323
}, nil
270324
}
271325

libcontainer/cgroups/fs/apply_raw_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func TestInvalidCgroupPath(t *testing.T) {
2020
Path: "../../../../../../../../../../some/path",
2121
}
2222

23-
data, err := getCgroupData(config, 0)
23+
data, err := getCgroupData(config, 0, "")
2424
if err != nil {
2525
t.Errorf("couldn't get cgroup data: %v", err)
2626
}
@@ -51,7 +51,7 @@ func TestInvalidAbsoluteCgroupPath(t *testing.T) {
5151
Path: "/../../../../../../../../../../some/path",
5252
}
5353

54-
data, err := getCgroupData(config, 0)
54+
data, err := getCgroupData(config, 0, "")
5555
if err != nil {
5656
t.Errorf("couldn't get cgroup data: %v", err)
5757
}
@@ -84,7 +84,7 @@ func TestInvalidCgroupParent(t *testing.T) {
8484
Name: "name",
8585
}
8686

87-
data, err := getCgroupData(config, 0)
87+
data, err := getCgroupData(config, 0, "")
8888
if err != nil {
8989
t.Errorf("couldn't get cgroup data: %v", err)
9090
}
@@ -117,7 +117,7 @@ func TestInvalidAbsoluteCgroupParent(t *testing.T) {
117117
Name: "name",
118118
}
119119

120-
data, err := getCgroupData(config, 0)
120+
data, err := getCgroupData(config, 0, "")
121121
if err != nil {
122122
t.Errorf("couldn't get cgroup data: %v", err)
123123
}
@@ -150,7 +150,7 @@ func TestInvalidCgroupName(t *testing.T) {
150150
Name: "../../../../../../../../../../some/path",
151151
}
152152

153-
data, err := getCgroupData(config, 0)
153+
data, err := getCgroupData(config, 0, "")
154154
if err != nil {
155155
t.Errorf("couldn't get cgroup data: %v", err)
156156
}
@@ -184,7 +184,7 @@ func TestInvalidAbsoluteCgroupName(t *testing.T) {
184184
Name: "/../../../../../../../../../../some/path",
185185
}
186186

187-
data, err := getCgroupData(config, 0)
187+
data, err := getCgroupData(config, 0, "")
188188
if err != nil {
189189
t.Errorf("couldn't get cgroup data: %v", err)
190190
}
@@ -217,7 +217,7 @@ func TestInvalidCgroupNameAndParent(t *testing.T) {
217217
Name: "../../../../../../../../../../some/path",
218218
}
219219

220-
data, err := getCgroupData(config, 0)
220+
data, err := getCgroupData(config, 0, "")
221221
if err != nil {
222222
t.Errorf("couldn't get cgroup data: %v", err)
223223
}
@@ -250,7 +250,7 @@ func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
250250
Name: "/../../../../../../../../../../some/path",
251251
}
252252

253-
data, err := getCgroupData(config, 0)
253+
data, err := getCgroupData(config, 0, "")
254254
if err != nil {
255255
t.Errorf("couldn't get cgroup data: %v", err)
256256
}

0 commit comments

Comments
 (0)