Skip to content

Commit 78ef28e

Browse files
Merge pull request #1632 from xiaochenshen/rdt-mba
libcontainer: intelrdt: add support for Intel RDT/MBA in runc
2 parents a00bf01 + d59b17d commit 78ef28e

File tree

17 files changed

+551
-189
lines changed

17 files changed

+551
-189
lines changed

events.go

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ type l3CacheInfo struct {
104104
NumClosids uint64 `json:"num_closids,omitempty"`
105105
}
106106

107+
type memBwInfo struct {
108+
BandwidthGran uint64 `json:"bandwidth_gran,omitempty"`
109+
DelayLinear uint64 `json:"delay_linear,omitempty"`
110+
MinBandwidth uint64 `json:"min_bandwidth,omitempty"`
111+
NumClosids uint64 `json:"num_closids,omitempty"`
112+
}
113+
107114
type intelRdt struct {
108115
// The read-only L3 cache information
109116
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
@@ -113,6 +120,15 @@ type intelRdt struct {
113120

114121
// The L3 cache schema in 'container_id' group
115122
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
123+
124+
// The read-only memory bandwidth information
125+
MemBwInfo *memBwInfo `json:"mem_bw_info,omitempty"`
126+
127+
// The read-only memory bandwidth schema in root
128+
MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"`
129+
130+
// The memory bandwidth schema in 'container_id' group
131+
MemBwSchema string `json:"mem_bw_schema,omitempty"`
116132
}
117133

118134
var eventsCommand = cli.Command{
@@ -248,9 +264,16 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
248264
}
249265

250266
if is := ls.IntelRdtStats; is != nil {
251-
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
252-
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
253-
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
267+
if intelrdt.IsCatEnabled() {
268+
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
269+
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
270+
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
271+
}
272+
if intelrdt.IsMbaEnabled() {
273+
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
274+
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
275+
s.IntelRdt.MemBwSchema = is.MemBwSchema
276+
}
254277
}
255278

256279
return &s
@@ -293,3 +316,12 @@ func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
293316
NumClosids: i.NumClosids,
294317
}
295318
}
319+
320+
func convertMemBwInfo(i *intelrdt.MemBwInfo) *memBwInfo {
321+
return &memBwInfo{
322+
BandwidthGran: i.BandwidthGran,
323+
DelayLinear: i.DelayLinear,
324+
MinBandwidth: i.MinBandwidth,
325+
NumClosids: i.NumClosids,
326+
}
327+
}

libcontainer/SPEC.md

Lines changed: 69 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -156,17 +156,21 @@ init process will block waiting for the parent to finish setup.
156156

157157
### IntelRdt
158158

159-
Intel platforms with new Xeon CPU support Intel Resource Director Technology
160-
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
161-
currently supports L3 cache resource allocation.
159+
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
160+
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
161+
two sub-features of RDT.
162162

163-
This feature provides a way for the software to restrict cache allocation to a
164-
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
165-
The different subsets are identified by class of service (CLOS) and each CLOS
166-
has a capacity bitmask (CBM).
163+
Cache Allocation Technology (CAT) provides a way for the software to restrict
164+
cache allocation to a defined 'subset' of L3 cache which may be overlapping
165+
with other 'subsets'. The different subsets are identified by class of
166+
service (CLOS) and each CLOS has a capacity bitmask (CBM).
167167

168-
It can be used to handle L3 cache resource allocation for containers if
169-
hardware and kernel support Intel RDT/CAT.
168+
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
169+
over memory bandwidth for the software. A user controls the resource by
170+
indicating the percentage of maximum memory bandwidth.
171+
172+
It can be used to handle L3 cache and memory bandwidth resources allocation
173+
for containers if hardware and kernel support Intel RDT CAT and MBA features.
170174

171175
In Linux 4.10 kernel or newer, the interface is defined and exposed via
172176
"resource control" filesystem, which is a "cgroup-like" interface.
@@ -175,66 +179,95 @@ Comparing with cgroups, it has similar process management lifecycle and
175179
interfaces in a container. But unlike cgroups' hierarchy, it has single level
176180
filesystem layout.
177181

182+
CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
183+
"resource control" filesystem.
184+
178185
Intel RDT "resource control" filesystem hierarchy:
179186
```
180187
mount -t resctrl resctrl /sys/fs/resctrl
181188
tree /sys/fs/resctrl
182189
/sys/fs/resctrl/
183190
|-- info
184191
| |-- L3
185-
| |-- cbm_mask
186-
| |-- min_cbm_bits
192+
| | |-- cbm_mask
193+
| | |-- min_cbm_bits
194+
| | |-- num_closids
195+
| |-- MB
196+
| |-- bandwidth_gran
197+
| |-- delay_linear
198+
| |-- min_bandwidth
187199
| |-- num_closids
188-
|-- cpus
200+
|-- ...
189201
|-- schemata
190202
|-- tasks
191203
|-- <container_id>
192-
|-- cpus
204+
|-- ...
193205
|-- schemata
194206
|-- tasks
195-
196207
```
197208

198-
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
199-
resource constraints.
209+
For runc, we can make use of `tasks` and `schemata` configuration for L3
210+
cache and memory bandwidth resources constraints.
200211

201212
The file `tasks` has a list of tasks that belongs to this group (e.g.,
202213
<container_id>" group). Tasks can be added to a group by writing the task ID
203-
to the "tasks" file (which will automatically remove them from the previous
214+
to the "tasks" file (which will automatically remove them from the previous
204215
group to which they belonged). New tasks created by fork(2) and clone(2) are
205-
added to the same group as their parent. If a pid is not in any sub group, it
206-
is in root group.
216+
added to the same group as their parent.
207217

208-
The file `schemata` has allocation masks/values for L3 cache on each socket,
209-
which contains L3 cache id and capacity bitmask (CBM).
218+
The file `schemata` has a list of all the resources available to this group.
219+
Each resource (L3 cache, memory bandwidth) has its own line and format.
220+
221+
L3 cache schema:
222+
It has allocation bitmasks/values for L3 cache on each socket, which
223+
contains L3 cache id and capacity bitmask (CBM).
210224
```
211225
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
212226
```
213-
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
214-
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
227+
For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
228+
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
215229

216230
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
217231
be set is less than the max bit. The max bits in the CBM is varied among
218-
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
219-
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
220-
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
221-
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
222-
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
232+
supported Intel CPU models. Kernel will check if it is valid when writing.
233+
e.g., default value 0xfffff in root indicates the max bits of CBM is 20
234+
bits, which mapping to entire L3 cache capacity. Some valid CBM values to
235+
set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
236+
237+
Memory bandwidth schema:
238+
It has allocation values for memory bandwidth on each socket, which contains
239+
L3 cache id and memory bandwidth percentage.
240+
```
241+
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
242+
```
243+
For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
244+
245+
The minimum bandwidth percentage value for each CPU model is predefined and
246+
can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
247+
that is allocated is also dependent on the CPU model and can be looked up at
248+
"info/MB/bandwidth_gran". The available bandwidth control steps are:
249+
min_bw + N * bw_gran. Intermediate values are rounded to the next control
250+
step available on the hardware.
223251

224-
For more information about Intel RDT/CAT kernel interface:
252+
For more information about Intel RDT kernel interface:
225253
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
226254

227-
An example for runc:
228255
```
256+
An example for runc:
229257
Consider a two-socket machine with two L3 caches where the default CBM is
230-
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
231-
inside the container only have access to the "upper" 80% of L3 cache id 0 and
232-
the "lower" 50% L3 cache id 1:
258+
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
259+
with a memory bandwidth granularity of 10%.
260+
261+
Tasks inside the container only have access to the "upper" 7/11 of L3 cache
262+
on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
263+
maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
233264
234265
"linux": {
235-
"intelRdt": {
236-
"l3CacheSchema": "L3:0=ffff0;1=3ff"
237-
}
266+
"intelRdt": {
267+
"closID": "guaranteed_group",
268+
"l3CacheSchema": "L3:0=7f0;1=1f",
269+
"memBwSchema": "MB:0=20;1=70"
270+
}
238271
}
239272
```
240273

libcontainer/configs/config.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ type Config struct {
186186
// callers keyring in this case.
187187
NoNewKeyring bool `json:"no_new_keyring"`
188188

189-
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
190-
// to limit the resources (e.g., L3 cache) the container has available
189+
// IntelRdt specifies settings for Intel RDT group that the container is placed into
190+
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
191191
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
192192

193193
// RootlessEUID is set when the runc was launched with non-zero EUID.

libcontainer/configs/intelrdt.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,8 @@ type IntelRdt struct {
44
// The schema for L3 cache id and capacity bitmask (CBM)
55
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
66
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
7+
8+
// The schema of memory bandwidth percentage per L3 cache id
9+
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
10+
MemBwSchema string `json:"memBwSchema,omitempty"`
711
}

libcontainer/configs/validate/validator.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,22 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
169169

170170
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
171171
if config.IntelRdt != nil {
172-
if !intelrdt.IsEnabled() {
173-
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
172+
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
173+
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
174174
}
175-
if config.IntelRdt.L3CacheSchema == "" {
176-
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
175+
176+
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
177+
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
178+
}
179+
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
180+
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
181+
}
182+
183+
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
184+
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
185+
}
186+
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
187+
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
177188
}
178189
}
179190

libcontainer/container_linux_test.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ func TestGetContainerStats(t *testing.T) {
148148
intelRdtManager: &mockIntelRdtManager{
149149
stats: &intelrdt.Stats{
150150
L3CacheSchema: "L3:0=f;1=f0",
151+
MemBwSchema: "MB:0=20;1=70",
151152
},
152153
},
153154
}
@@ -161,14 +162,22 @@ func TestGetContainerStats(t *testing.T) {
161162
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
162163
t.Fatalf("expected memory usage 1024 but received %d", stats.CgroupStats.MemoryStats.Usage.Usage)
163164
}
164-
if intelrdt.IsEnabled() {
165+
if intelrdt.IsCatEnabled() {
165166
if stats.IntelRdtStats == nil {
166167
t.Fatal("intel rdt stats are nil")
167168
}
168169
if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" {
169170
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
170171
}
171172
}
173+
if intelrdt.IsMbaEnabled() {
174+
if stats.IntelRdtStats == nil {
175+
t.Fatal("intel rdt stats are nil")
176+
}
177+
if stats.IntelRdtStats.MemBwSchema != "MB:0=20;1=70" {
178+
t.Fatalf("expected MemBwSchema MB:0=20;1=70 but recevied %s", stats.IntelRdtStats.MemBwSchema)
179+
}
180+
}
172181
}
173182

174183
func TestGetContainerState(t *testing.T) {
@@ -210,6 +219,7 @@ func TestGetContainerState(t *testing.T) {
210219
intelRdtManager: &mockIntelRdtManager{
211220
stats: &intelrdt.Stats{
212221
L3CacheSchema: "L3:0=f0;1=f",
222+
MemBwSchema: "MB:0=70;1=20",
213223
},
214224
path: expectedIntelRdtPath,
215225
},
@@ -232,7 +242,7 @@ func TestGetContainerState(t *testing.T) {
232242
if memPath := paths["memory"]; memPath != expectedMemoryPath {
233243
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
234244
}
235-
if intelrdt.IsEnabled() {
245+
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
236246
intelRdtPath := state.IntelRdtPath
237247
if intelRdtPath == "" {
238248
t.Fatal("intel rdt path should not be empty")

libcontainer/factory_linux.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ func RootlessCgroupfs(l *LinuxFactory) error {
9292

9393
// IntelRdtfs is an options func to configure a LinuxFactory to return
9494
// containers that use the Intel RDT "resource control" filesystem to
95-
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
95+
// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth).
9696
func IntelRdtFs(l *LinuxFactory) error {
9797
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
9898
return &intelrdt.IntelRdtManager{
@@ -222,7 +222,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
222222
newgidmapPath: l.NewgidmapPath,
223223
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
224224
}
225-
if intelrdt.IsEnabled() {
225+
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
226226
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
227227
}
228228
c.state = &stoppedState{c: c}
@@ -268,7 +268,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
268268
if err := c.refreshState(); err != nil {
269269
return nil, err
270270
}
271-
if intelrdt.IsEnabled() {
271+
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
272272
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
273273
}
274274
return c, nil

0 commit comments

Comments
 (0)