Skip to content

Commit 3c7db38

Browse files
authored
Merge pull request opencontainers#2883 from flouthoc/master
Add support for rdma cgroup introduced in Linux Kernel 4.11
2 parents 62ec6dc + b3d1448 commit 3c7db38

File tree

10 files changed

+246
-0
lines changed

10 files changed

+246
-0
lines changed

libcontainer/cgroups/fs/fs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var (
3131
&NetPrioGroup{},
3232
&PerfEventGroup{},
3333
&FreezerGroup{},
34+
&RdmaGroup{},
3435
&NameGroup{GroupName: "name=systemd", Join: true},
3536
}
3637
HugePageSizes, _ = cgroups.GetHugePageSize()

libcontainer/cgroups/fs/rdma.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package fs
2+
3+
import (
4+
"github.com/opencontainers/runc/libcontainer/cgroups"
5+
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
6+
"github.com/opencontainers/runc/libcontainer/configs"
7+
)
8+
9+
type RdmaGroup struct{}
10+
11+
func (s *RdmaGroup) Name() string {
12+
return "rdma"
13+
}
14+
15+
func (s *RdmaGroup) Apply(path string, d *cgroupData) error {
16+
return join(path, d.pid)
17+
}
18+
19+
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
20+
return fscommon.RdmaSet(path, r)
21+
}
22+
23+
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
24+
return fscommon.RdmaGetStats(path, stats)
25+
}

libcontainer/cgroups/fs2/fs2.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ func (m *manager) GetStats() (*cgroups.Stats, error) {
125125
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
126126
errs = append(errs, err)
127127
}
128+
// rdma (since kernel 4.11)
129+
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
130+
errs = append(errs, err)
131+
}
128132
if len(errs) > 0 && !m.rootless {
129133
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
130134
}
@@ -183,6 +187,10 @@ func (m *manager) Set(r *configs.Resources) error {
183187
if err := setHugeTlb(m.dirPath, r); err != nil {
184188
return err
185189
}
190+
// rdma (since kernel 4.11)
191+
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
192+
return err
193+
}
186194
// freezer (since kernel 5.2, pseudo-controller)
187195
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
188196
return err
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package fscommon
2+
3+
import (
4+
"bufio"
5+
"errors"
6+
"math"
7+
"os"
8+
"strconv"
9+
"strings"
10+
11+
"github.com/opencontainers/runc/libcontainer/cgroups"
12+
"github.com/opencontainers/runc/libcontainer/configs"
13+
"golang.org/x/sys/unix"
14+
)
15+
16+
// parseRdmaKV parses raw string to RdmaEntry.
17+
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
18+
var value uint32
19+
20+
parts := strings.SplitN(raw, "=", 3)
21+
22+
if len(parts) != 2 {
23+
return errors.New("Unable to parse RDMA entry")
24+
}
25+
26+
k, v := parts[0], parts[1]
27+
28+
if v == "max" {
29+
value = math.MaxUint32
30+
} else {
31+
val64, err := strconv.ParseUint(v, 10, 32)
32+
if err != nil {
33+
return err
34+
}
35+
value = uint32(val64)
36+
}
37+
if k == "hca_handle" {
38+
entry.HcaHandles = value
39+
} else if k == "hca_object" {
40+
entry.HcaObjects = value
41+
}
42+
43+
return nil
44+
}
45+
46+
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
47+
// example entry: mlx4_0 hca_handle=2 hca_object=2000
48+
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
49+
rdmaEntries := make([]cgroups.RdmaEntry, 0)
50+
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
51+
if err != nil {
52+
return nil, err
53+
}
54+
defer fd.Close() //nolint:errorlint
55+
scanner := bufio.NewScanner(fd)
56+
for scanner.Scan() {
57+
parts := strings.SplitN(scanner.Text(), " ", 4)
58+
if len(parts) == 3 {
59+
entry := new(cgroups.RdmaEntry)
60+
entry.Device = parts[0]
61+
err = parseRdmaKV(parts[1], entry)
62+
if err != nil {
63+
continue
64+
}
65+
err = parseRdmaKV(parts[2], entry)
66+
if err != nil {
67+
continue
68+
}
69+
70+
rdmaEntries = append(rdmaEntries, *entry)
71+
}
72+
}
73+
return rdmaEntries, scanner.Err()
74+
}
75+
76+
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
77+
func RdmaGetStats(path string, stats *cgroups.Stats) error {
78+
currentEntries, err := readRdmaEntries(path, "rdma.current")
79+
if err != nil {
80+
if errors.Is(err, os.ErrNotExist) {
81+
err = nil
82+
}
83+
return err
84+
}
85+
maxEntries, err := readRdmaEntries(path, "rdma.max")
86+
if err != nil {
87+
return err
88+
}
89+
// If device got removed between reading two files, ignore returning stats.
90+
if len(currentEntries) != len(maxEntries) {
91+
return nil
92+
}
93+
94+
stats.RdmaStats = cgroups.RdmaStats{
95+
RdmaLimit: maxEntries,
96+
RdmaCurrent: currentEntries,
97+
}
98+
99+
return nil
100+
}
101+
102+
func createCmdString(device string, limits configs.LinuxRdma) string {
103+
cmdString := device
104+
if limits.HcaHandles != nil {
105+
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
106+
}
107+
if limits.HcaObjects != nil {
108+
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
109+
}
110+
return cmdString
111+
}
112+
113+
// RdmaSet sets RDMA resources.
114+
func RdmaSet(path string, r *configs.Resources) error {
115+
for device, limits := range r.Rdma {
116+
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
117+
return err
118+
}
119+
}
120+
return nil
121+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package fscommon
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/opencontainers/runc/libcontainer/configs"
9+
)
10+
11+
/* Roadmap for future */
12+
// (Low-priority) TODO: Check if it is possible to virtually mimic an actual RDMA device.
13+
// TODO: Think of more edge-cases to add.
14+
15+
// TestRdmaSet performs an E2E test of RdmaSet(), parseRdmaKV() using dummy device and a dummy cgroup file-system.
16+
// Note: Following test does not guarantees that your host supports RDMA since this mocks underlying infrastructure.
17+
func TestRdmaSet(t *testing.T) {
18+
testCgroupPath := filepath.Join(t.TempDir(), "rdma")
19+
20+
// Ensure the full mock cgroup path exists.
21+
err := os.Mkdir(testCgroupPath, 0o755)
22+
if err != nil {
23+
t.Fatal(err)
24+
}
25+
26+
rdmaDevice := "mlx5_1"
27+
maxHandles := uint32(100)
28+
maxObjects := uint32(300)
29+
30+
rdmaStubResource := &configs.Resources{
31+
Rdma: map[string]configs.LinuxRdma{
32+
rdmaDevice: {
33+
HcaHandles: &maxHandles,
34+
HcaObjects: &maxObjects,
35+
},
36+
},
37+
}
38+
39+
if err := RdmaSet(testCgroupPath, rdmaStubResource); err != nil {
40+
t.Fatal(err)
41+
}
42+
43+
// The default rdma.max must be written.
44+
rdmaEntries, err := readRdmaEntries(testCgroupPath, "rdma.max")
45+
if err != nil {
46+
t.Fatal(err)
47+
}
48+
if len(rdmaEntries) != 1 {
49+
t.Fatal("rdma_test: Got the wrong values while parsing entries from rdma.max")
50+
}
51+
if rdmaEntries[0].HcaHandles != maxHandles {
52+
t.Fatalf("rdma_test: Got the wrong value for hca_handles")
53+
}
54+
if rdmaEntries[0].HcaObjects != maxObjects {
55+
t.Fatalf("rdma_test: Got the wrong value for hca_Objects")
56+
}
57+
}

libcontainer/cgroups/stats.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,17 @@ type HugetlbStats struct {
146146
Failcnt uint64 `json:"failcnt"`
147147
}
148148

149+
type RdmaEntry struct {
150+
Device string `json:"device,omitempty"`
151+
HcaHandles uint32 `json:"hca_handles,omitempty"`
152+
HcaObjects uint32 `json:"hca_objects,omitempty"`
153+
}
154+
155+
type RdmaStats struct {
156+
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
157+
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
158+
}
159+
149160
type Stats struct {
150161
CpuStats CpuStats `json:"cpu_stats,omitempty"`
151162
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
@@ -154,6 +165,7 @@ type Stats struct {
154165
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
155166
// the map is in the format "size of hugepage: stats of the hugepage"
156167
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
168+
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
157169
}
158170

159171
func NewStats() *Stats {

libcontainer/cgroups/systemd/v1.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ var legacySubsystems = []subsystem{
5959
&fs.NetPrioGroup{},
6060
&fs.NetClsGroup{},
6161
&fs.NameGroup{GroupName: "name=systemd"},
62+
&fs.RdmaGroup{},
6263
}
6364

6465
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {

libcontainer/configs/cgroup_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ type Resources struct {
113113
// Set class identifier for container's network packets
114114
NetClsClassid uint32 `json:"net_cls_classid_u"`
115115

116+
// Rdma resource restriction configuration
117+
Rdma map[string]LinuxRdma `json:"rdma"`
118+
116119
// Used on cgroups v2:
117120

118121
// CpuWeight sets a proportional bandwidth limit.

libcontainer/configs/rdma.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package configs
2+
3+
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
4+
type LinuxRdma struct {
5+
// Maximum number of HCA handles that can be opened. Default is "no limit".
6+
HcaHandles *uint32 `json:"hca_handles,omitempty"`
7+
// Maximum number of HCA objects that can be created. Default is "no limit".
8+
HcaObjects *uint32 `json:"hca_objects,omitempty"`
9+
}

libcontainer/specconv/spec_linux.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,15 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
608608
Limit: l.Limit,
609609
})
610610
}
611+
if len(r.Rdma) > 0 {
612+
c.Resources.Rdma = make(map[string]configs.LinuxRdma, len(r.Rdma))
613+
for k, v := range r.Rdma {
614+
c.Resources.Rdma[k] = configs.LinuxRdma{
615+
HcaHandles: v.HcaHandles,
616+
HcaObjects: v.HcaObjects,
617+
}
618+
}
619+
}
611620
if r.Network != nil {
612621
if r.Network.ClassID != nil {
613622
c.Resources.NetClsClassid = *r.Network.ClassID

0 commit comments

Comments
 (0)