Skip to content

Commit a672a5f

Browse files
committed
merge #4726 into opencontainers/runc:main
Antti Kervinen (1): Add memory policy support LGTMs: lifubang AkihiroSuda cyphar
2 parents bc432ce + eda7bdf commit a672a5f

File tree

11 files changed

+286
-6
lines changed

11 files changed

+286
-6
lines changed

features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ var featuresCommand = cli.Command{
5959
Enabled: &t,
6060
Schemata: &t,
6161
},
62+
MemoryPolicy: &features.MemoryPolicy{
63+
Modes: specconv.KnownMemoryPolicyModes(),
64+
Flags: specconv.KnownMemoryPolicyFlags(),
65+
},
6266
MountExtensions: &features.MountExtensions{
6367
IDMap: &features.IDMap{
6468
Enabled: &t,

internal/linux/linux.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -72,3 +73,15 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7273
})
7374
return os.NewSyscallError("sendmsg", err)
7475
}
76+
77+
// SetMempolicy wraps set_mempolicy.
78+
func SetMempolicy(mode uint, mask *unix.CPUSet) error {
79+
err := retryOnEINTR(func() error {
80+
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), unsafe.Sizeof(*mask)*8)
81+
if errno != 0 {
82+
return errno
83+
}
84+
return nil
85+
})
86+
return os.NewSyscallError("set_mempolicy", err)
87+
}

libcontainer/configs/config.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ type Config struct {
214214
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
215215
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
216216

217+
// MemoryPolicy specifies NUMA memory policy for the container.
218+
MemoryPolicy *LinuxMemoryPolicy `json:"memory_policy,omitempty"`
219+
217220
// RootlessEUID is set when the runc was launched with non-zero EUID.
218221
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
219222
// When RootlessEUID is set, runc creates a new userns for the container.
@@ -305,7 +308,8 @@ type CPUAffinity struct {
305308
Initial, Final *unix.CPUSet
306309
}
307310

308-
func toCPUSet(str string) (*unix.CPUSet, error) {
311+
// ToCPUSet parses a string in list format into a unix.CPUSet, e.g. "0-3,5,7-9".
312+
func ToCPUSet(str string) (*unix.CPUSet, error) {
309313
if str == "" {
310314
return nil, nil
311315
}
@@ -356,7 +360,7 @@ func toCPUSet(str string) (*unix.CPUSet, error) {
356360
}
357361
}
358362
if s.Count() == 0 {
359-
return nil, fmt.Errorf("no CPUs found in %q", str)
363+
return nil, fmt.Errorf("no members found in set %q", str)
360364
}
361365

362366
return s, nil
@@ -367,11 +371,11 @@ func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
367371
if sa == nil {
368372
return nil, nil
369373
}
370-
initial, err := toCPUSet(sa.Initial)
374+
initial, err := ToCPUSet(sa.Initial)
371375
if err != nil {
372376
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
373377
}
374-
final, err := toCPUSet(sa.Final)
378+
final, err := ToCPUSet(sa.Final)
375379
if err != nil {
376380
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
377381
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package configs
2+
3+
import "golang.org/x/sys/unix"
4+
5+
// Memory policy modes and flags as defined in /usr/include/linux/mempolicy.h
6+
7+
//nolint:revive,staticcheck,nolintlint // ignore ALL_CAPS errors in consts from numaif.h, will match unix.* in the future
8+
const (
9+
MPOL_DEFAULT = 0
10+
MPOL_PREFERRED = 1
11+
MPOL_BIND = 2
12+
MPOL_INTERLEAVE = 3
13+
MPOL_LOCAL = 4
14+
MPOL_PREFERRED_MANY = 5
15+
MPOL_WEIGHTED_INTERLEAVE = 6
16+
17+
MPOL_F_STATIC_NODES = 1 << 15
18+
MPOL_F_RELATIVE_NODES = 1 << 14
19+
MPOL_F_NUMA_BALANCING = 1 << 13
20+
)
21+
22+
// LinuxMemoryPolicy contains memory policy configuration.
23+
type LinuxMemoryPolicy struct {
24+
// Mode specifies memory policy mode without mode flags. See
25+
// set_mempolicy() documentation for details.
26+
Mode uint `json:"mode,omitempty"`
27+
// Flags contains mode flags.
28+
Flags uint `json:"flags,omitempty"`
29+
// Nodes contains NUMA nodes to which the mode applies.
30+
Nodes *unix.CPUSet `json:"nodes,omitempty"`
31+
}

libcontainer/configs/tocpuset_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ func TestToCPUSet(t *testing.T) {
5858

5959
for _, tc := range testCases {
6060
t.Run(tc.in, func(t *testing.T) {
61-
out, err := toCPUSet(tc.in)
62-
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
61+
out, err := ToCPUSet(tc.in)
62+
t.Logf("ToCPUSet(%q) = %v (error: %v)", tc.in, out, err)
6363
// Check the error.
6464
if tc.isErr {
6565
if err == nil {

libcontainer/configs/validate/validator.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func Validate(config *configs.Config) error {
3333
mountsStrict,
3434
scheduler,
3535
ioPriority,
36+
memoryPolicy,
3637
}
3738
for _, c := range checks {
3839
if err := c(config); err != nil {
@@ -482,3 +483,26 @@ func ioPriority(config *configs.Config) error {
482483

483484
return nil
484485
}
486+
487+
func memoryPolicy(config *configs.Config) error {
488+
mpol := config.MemoryPolicy
489+
if mpol == nil {
490+
return nil
491+
}
492+
switch mpol.Mode {
493+
case configs.MPOL_DEFAULT, configs.MPOL_LOCAL:
494+
if mpol.Nodes != nil && mpol.Nodes.Count() != 0 {
495+
return fmt.Errorf("memory policy mode requires 0 nodes but got %d", mpol.Nodes.Count())
496+
}
497+
case configs.MPOL_BIND, configs.MPOL_INTERLEAVE,
498+
configs.MPOL_PREFERRED_MANY, configs.MPOL_WEIGHTED_INTERLEAVE:
499+
if mpol.Nodes == nil || mpol.Nodes.Count() == 0 {
500+
return fmt.Errorf("memory policy mode requires at least one node but got 0")
501+
}
502+
case configs.MPOL_PREFERRED:
503+
// Zero or more nodes are allowed by the kernel.
504+
default:
505+
return fmt.Errorf("invalid memory policy mode: %d", mpol.Mode)
506+
}
507+
return nil
508+
}

libcontainer/init_linux.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,14 @@ func setupIOPriority(config *initConfig) error {
659659
return nil
660660
}
661661

662+
func setupMemoryPolicy(config *configs.Config) error {
663+
mpol := config.MemoryPolicy
664+
if mpol == nil {
665+
return nil
666+
}
667+
return linux.SetMempolicy(mpol.Mode|mpol.Flags, config.MemoryPolicy.Nodes)
668+
}
669+
662670
func setupPersonality(config *configs.Config) error {
663671
return system.SetLinuxPersonality(config.Personality.Domain)
664672
}

libcontainer/setns_init_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ func (l *linuxSetnsInit) Init() error {
8888
}
8989
}
9090

91+
if err := setupMemoryPolicy(l.config.Config); err != nil {
92+
return err
93+
}
94+
9195
// Tell our parent that we're ready to exec. This must be done before the
9296
// Seccomp rules have been applied, because we need to be able to read and
9397
// write to a socket.

libcontainer/specconv/spec_linux.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"maps"
99
"os"
1010
"path/filepath"
11+
"slices"
1112
"sort"
1213
"strings"
1314
"sync"
@@ -41,6 +42,8 @@ var (
4142
flag int
4243
}
4344
complexFlags map[string]func(*configs.Mount)
45+
mpolModeMap map[string]uint
46+
mpolModeFMap map[string]uint
4447
)
4548

4649
func initMaps() {
@@ -148,6 +151,22 @@ func initMaps() {
148151
m.IDMapping.Recursive = true
149152
},
150153
}
154+
155+
mpolModeMap = map[string]uint{
156+
string(specs.MpolDefault): configs.MPOL_DEFAULT,
157+
string(specs.MpolPreferred): configs.MPOL_PREFERRED,
158+
string(specs.MpolBind): configs.MPOL_BIND,
159+
string(specs.MpolInterleave): configs.MPOL_INTERLEAVE,
160+
string(specs.MpolLocal): configs.MPOL_LOCAL,
161+
string(specs.MpolPreferredMany): configs.MPOL_PREFERRED_MANY,
162+
string(specs.MpolWeightedInterleave): configs.MPOL_WEIGHTED_INTERLEAVE,
163+
}
164+
165+
mpolModeFMap = map[string]uint{
166+
string(specs.MpolFStaticNodes): configs.MPOL_F_STATIC_NODES,
167+
string(specs.MpolFRelativeNodes): configs.MPOL_F_RELATIVE_NODES,
168+
string(specs.MpolFNumaBalancing): configs.MPOL_F_NUMA_BALANCING,
169+
}
151170
})
152171
}
153172

@@ -184,6 +203,20 @@ func KnownMountOptions() []string {
184203
return res
185204
}
186205

206+
// KnownMemoryPolicyModes returns the list of the known memory policy modes.
207+
// Used by `runc features`.
208+
func KnownMemoryPolicyModes() []string {
209+
initMaps()
210+
return slices.Sorted(maps.Keys(mpolModeMap))
211+
}
212+
213+
// KnownMemoryPolicyFlags returns the list of the known memory policy mode flags.
214+
// Used by `runc features`.
215+
func KnownMemoryPolicyFlags() []string {
216+
initMaps()
217+
return slices.Sorted(maps.Keys(mpolModeFMap))
218+
}
219+
187220
// AllowedDevices is the set of devices which are automatically included for
188221
// all containers.
189222
//
@@ -468,6 +501,28 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
468501
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
469502
}
470503
}
504+
if spec.Linux.MemoryPolicy != nil {
505+
var ok bool
506+
var err error
507+
specMp := spec.Linux.MemoryPolicy
508+
confMp := &configs.LinuxMemoryPolicy{}
509+
confMp.Mode, ok = mpolModeMap[string(specMp.Mode)]
510+
if !ok {
511+
return nil, fmt.Errorf("invalid memory policy mode %q", specMp.Mode)
512+
}
513+
confMp.Nodes, err = configs.ToCPUSet(specMp.Nodes)
514+
if err != nil {
515+
return nil, fmt.Errorf("invalid memory policy nodes %q: %w", specMp.Nodes, err)
516+
}
517+
for _, specFlag := range specMp.Flags {
518+
confFlag, ok := mpolModeFMap[string(specFlag)]
519+
if !ok {
520+
return nil, fmt.Errorf("invalid memory policy flag %q", specFlag)
521+
}
522+
confMp.Flags |= confFlag
523+
}
524+
config.MemoryPolicy = confMp
525+
}
471526
if spec.Linux.Personality != nil {
472527
if len(spec.Linux.Personality.Flags) > 0 {
473528
logrus.Warnf("ignoring unsupported personality flags: %+v because personality flag has not supported at this time", spec.Linux.Personality.Flags)

libcontainer/standard_init_linux.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ func (l *linuxStandardInit) Init() error {
171171
}
172172
}
173173

174+
if err := setupMemoryPolicy(l.config.Config); err != nil {
175+
return err
176+
}
177+
174178
// Tell our parent that we're ready to exec. This must be done before the
175179
// Seccomp rules have been applied, because we need to be able to read and
176180
// write to a socket.

0 commit comments

Comments
 (0)