Skip to content

Commit 7546f2e

Browse files
committed
solver: add per-step CPU and memory resource limits
Add support for setting cgroup resource limits (memory, memory-swap, cpu-shares, cpu-period, cpu-quota, cpuset-cpus, cpuset-mems) on individual build steps. Signed-off-by: Jiří Moravčík <jiri.moravcik@gmail.com>
1 parent eaa4de0 commit 7546f2e

File tree

27 files changed

+1620
-155
lines changed

27 files changed

+1620
-155
lines changed

client/client_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ var allTests = []func(t *testing.T, sb integration.Sandbox){
154154
testShmSize,
155155
testUlimit,
156156
testCgroupParent,
157+
testLinuxResources,
158+
testLinuxResourcesMergeOnDedup,
157159
testNetworkMode,
158160
testFrontendMetadataReturn,
159161
testFrontendUseSolveResults,
@@ -1242,6 +1244,131 @@ func testCgroupParent(t *testing.T, sb integration.Sandbox) {
12421244
require.Equal(t, "", strings.TrimSpace(string(dt)))
12431245
}
12441246

1247+
func testLinuxResources(t *testing.T, sb integration.Sandbox) {
1248+
integration.SkipOnPlatform(t, "windows")
1249+
if sb.Rootless() {
1250+
t.SkipNow()
1251+
}
1252+
1253+
if _, err := os.Lstat("/sys/fs/cgroup/cgroup.subtree_control"); os.IsNotExist(err) {
1254+
t.Skipf("test requires cgroup v2")
1255+
}
1256+
1257+
c, err := New(sb.Context(), sb.Address())
1258+
require.NoError(t, err)
1259+
defer c.Close()
1260+
1261+
img := llb.Image("alpine:latest")
1262+
st := llb.Scratch()
1263+
1264+
run := func(cmd string, ro ...llb.RunOption) {
1265+
st = img.Run(append(ro, llb.Shlex(cmd), llb.Dir("/wd"))...).AddMount("/wd", st)
1266+
}
1267+
1268+
// Test memory limit: set 64MiB and verify via cgroup
1269+
run(`sh -c "cat /sys/fs/cgroup/memory.max > mem_limited"`, llb.MemoryLimit(64*1024*1024))
1270+
run(`sh -c "cat /sys/fs/cgroup/memory.max > mem_default"`)
1271+
1272+
// Test CPU quota: set quota=50000 period=100000 (50% CPU) and verify
1273+
run(`sh -c "cat /sys/fs/cgroup/cpu.max > cpu_limited"`, llb.CPUQuota(50000), llb.CPUPeriod(100000))
1274+
1275+
def, err := st.Marshal(sb.Context())
1276+
require.NoError(t, err)
1277+
1278+
destDir := t.TempDir()
1279+
1280+
_, err = c.Solve(sb.Context(), def, SolveOpt{
1281+
Exports: []ExportEntry{
1282+
{
1283+
Type: ExporterLocal,
1284+
OutputDir: destDir,
1285+
},
1286+
},
1287+
}, nil)
1288+
require.NoError(t, err)
1289+
1290+
dt, err := os.ReadFile(filepath.Join(destDir, "mem_limited"))
1291+
require.NoError(t, err)
1292+
require.Equal(t, "67108864", strings.TrimSpace(string(dt)))
1293+
1294+
dt2, err := os.ReadFile(filepath.Join(destDir, "mem_default"))
1295+
require.NoError(t, err)
1296+
require.Equal(t, "max", strings.TrimSpace(string(dt2)))
1297+
1298+
dt3, err := os.ReadFile(filepath.Join(destDir, "cpu_limited"))
1299+
require.NoError(t, err)
1300+
require.Equal(t, "50000 100000", strings.TrimSpace(string(dt3)))
1301+
}
1302+
1303+
// testLinuxResourcesMergeOnDedup verifies that when two concurrent builds share
1304+
// the same RUN step but specify different resource limits, the most relaxed
1305+
// (least restrictive) limit is applied.
1306+
func testLinuxResourcesMergeOnDedup(t *testing.T, sb integration.Sandbox) {
1307+
integration.SkipOnPlatform(t, "windows")
1308+
if sb.Rootless() {
1309+
t.SkipNow()
1310+
}
1311+
1312+
if _, err := os.Lstat("/sys/fs/cgroup/cgroup.subtree_control"); os.IsNotExist(err) {
1313+
t.Skipf("test requires cgroup v2")
1314+
}
1315+
1316+
c, err := New(sb.Context(), sb.Address())
1317+
require.NoError(t, err)
1318+
defer c.Close()
1319+
1320+
// Both builds share the exact same RUN command so the solver deduplicates
1321+
// them into one vertex. They differ only in memory limits (OpMetadata).
1322+
// Dedup is guaranteed because loadUnlocked (which merges resources) loads
1323+
// the full vertex graph in microseconds, while image resolution that must
1324+
// complete before the RUN vertex can execute takes orders of magnitude longer.
1325+
// Both goroutines will have loaded and merged before the RUN step starts.
1326+
sharedCmd := `sh -c "cat /sys/fs/cgroup/memory.max > /wd/mem_limit"`
1327+
1328+
// Build 1: 64 MiB memory limit
1329+
st1 := llb.Image("alpine:latest").
1330+
Run(llb.Shlex(sharedCmd), llb.MemoryLimit(64*1024*1024), llb.Dir("/wd")).
1331+
AddMount("/wd", llb.Scratch())
1332+
def1, err := st1.Marshal(sb.Context())
1333+
require.NoError(t, err)
1334+
1335+
// Build 2: 128 MiB memory limit (more relaxed — should win)
1336+
st2 := llb.Image("alpine:latest").
1337+
Run(llb.Shlex(sharedCmd), llb.MemoryLimit(128*1024*1024), llb.Dir("/wd")).
1338+
AddMount("/wd", llb.Scratch())
1339+
def2, err := st2.Marshal(sb.Context())
1340+
require.NoError(t, err)
1341+
1342+
destDir1 := t.TempDir()
1343+
destDir2 := t.TempDir()
1344+
1345+
eg, egCtx := errgroup.WithContext(sb.Context())
1346+
eg.Go(func() error {
1347+
_, err := c.Solve(egCtx, def1, SolveOpt{
1348+
Exports: []ExportEntry{{Type: ExporterLocal, OutputDir: destDir1}},
1349+
}, nil)
1350+
return err
1351+
})
1352+
eg.Go(func() error {
1353+
_, err := c.Solve(egCtx, def2, SolveOpt{
1354+
Exports: []ExportEntry{{Type: ExporterLocal, OutputDir: destDir2}},
1355+
}, nil)
1356+
return err
1357+
})
1358+
err = eg.Wait()
1359+
require.NoError(t, err)
1360+
1361+
// Both builds share the deduplicated vertex, so both outputs come from
1362+
// the same container. The memory limit should be 128 MiB (most relaxed).
1363+
for _, dir := range []string{destDir1, destDir2} {
1364+
dt, err := os.ReadFile(filepath.Join(dir, "mem_limit"))
1365+
require.NoError(t, err)
1366+
memLimit := strings.TrimSpace(string(dt))
1367+
require.Equal(t, "134217728", memLimit,
1368+
"expected 128 MiB (most relaxed limit) but got %s in %s", memLimit, dir)
1369+
}
1370+
}
1371+
12451372
func testNetworkMode(t *testing.T, sb integration.Sandbox) {
12461373
integration.SkipOnPlatform(t, "windows")
12471374
c, err := New(sb.Context(), sb.Address())

client/llb/exec.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,10 @@ func (e *ExecOp) Marshal(ctx context.Context, c *Constraints) (digest.Digest, []
255255
meta.Ulimit = ul
256256
}
257257

258+
if e.constraints.Metadata.LinuxResources != nil {
259+
addCap(&e.constraints, pb.CapExecMetaLinuxResources)
260+
}
261+
258262
network, err := getNetwork(e.base)(ctx, c)
259263
if err != nil {
260264
return "", nil, nil, nil, err

client/llb/exec_test.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,97 @@ func TestValidGetMountIndex(t *testing.T) {
5151
require.Equal(t, pb.OutputIndex(1), mountIndex, "unexpected mount index")
5252
}
5353

54+
func TestLinuxResourcesMarshal(t *testing.T) {
55+
t.Parallel()
56+
57+
st := Image("busybox:latest").
58+
Run(
59+
Shlex("true"),
60+
MemoryLimit(64*1024*1024),
61+
CPUShares(512),
62+
CPUQuota(50000),
63+
CPUPeriod(100000),
64+
CPUsetCPUs("0-3"),
65+
CPUsetMems("0"),
66+
).Root()
67+
68+
def, err := st.Marshal(context.TODO())
69+
require.NoError(t, err)
70+
71+
// Resources should be in OpMetadata (not in the Op bytes / cache key)
72+
var found bool
73+
for _, md := range def.Metadata {
74+
if md.LinuxResources == nil {
75+
continue
76+
}
77+
found = true
78+
res := md.LinuxResources
79+
require.Equal(t, int64(64*1024*1024), res.Memory)
80+
require.Equal(t, uint64(512), res.CpuShares)
81+
require.Equal(t, int64(50000), res.CpuQuota)
82+
require.Equal(t, uint64(100000), res.CpuPeriod)
83+
require.Equal(t, "0-3", res.CpusetCpus)
84+
require.Equal(t, "0", res.CpusetMems)
85+
}
86+
require.True(t, found, "LinuxResources not found in OpMetadata")
87+
}
88+
89+
func TestLinuxResourcesNotInCacheKey(t *testing.T) {
90+
t.Parallel()
91+
92+
// Two ops with same command but different resource limits must produce the same digest
93+
st1 := Image("busybox:latest").
94+
Run(Shlex("echo hello"), MemoryLimit(64*1024*1024)).Root()
95+
96+
st2 := Image("busybox:latest").
97+
Run(Shlex("echo hello"), MemoryLimit(128*1024*1024)).Root()
98+
99+
st3 := Image("busybox:latest").
100+
Run(Shlex("echo hello")).Root()
101+
102+
def1, err := st1.Marshal(context.TODO())
103+
require.NoError(t, err)
104+
105+
def2, err := st2.Marshal(context.TODO())
106+
require.NoError(t, err)
107+
108+
def3, err := st3.Marshal(context.TODO())
109+
require.NoError(t, err)
110+
111+
// All three should produce the same definition bytes (same cache key)
112+
require.Equal(t, def1.Def, def2.Def, "different resource limits should produce same digest")
113+
require.Equal(t, def1.Def, def3.Def, "resource limits vs no limits should produce same digest")
114+
}
115+
116+
func TestLinuxResourcesMerge(t *testing.T) {
117+
t.Parallel()
118+
119+
// Test that individual resource limit functions merge correctly
120+
st := Image("busybox:latest").
121+
Run(
122+
Shlex("true"),
123+
MemoryLimit(64*1024*1024),
124+
CPUShares(512),
125+
).Root()
126+
127+
def, err := st.Marshal(context.TODO())
128+
require.NoError(t, err)
129+
130+
for _, md := range def.Metadata {
131+
if md.LinuxResources == nil {
132+
continue
133+
}
134+
res := md.LinuxResources
135+
require.Equal(t, int64(64*1024*1024), res.Memory)
136+
require.Equal(t, uint64(512), res.CpuShares)
137+
// Unset fields should be zero
138+
require.Equal(t, int64(0), res.CpuQuota)
139+
require.Equal(t, uint64(0), res.CpuPeriod)
140+
return
141+
}
142+
t.Fatal("LinuxResources not found in OpMetadata")
143+
}
144+
54145
func TestExecOpMarshalConsistency(t *testing.T) {
55146
var prevDef [][]byte
56147
st := Image("busybox:latest").

client/llb/meta.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,17 @@ func getCgroupParent(s State) func(context.Context, *Constraints) (string, error
324324
}
325325
}
326326

327+
// LinuxResources holds CPU and memory resource limits for containers.
328+
type LinuxResources struct {
329+
Memory int64
330+
MemorySwap int64
331+
CPUShares uint64
332+
CPUPeriod uint64
333+
CPUQuota int64
334+
CPUsetCPUs string
335+
CPUsetMems string
336+
}
337+
327338
// Network returns a [StateOption] which sets the network mode used for containers created by [State.Run].
328339
// This is the equivalent of [State.Network]
329340
// See [State.With] for where to use this.

0 commit comments

Comments
 (0)