Skip to content

Commit eb7080c

Browse files
authored
Merge pull request moby#3697 from AkihiroSuda/fix-3098
rootless: support Bottlerocket OS
2 parents 252e5a6 + c67176a commit eb7080c

File tree

8 files changed

+204
-0
lines changed

8 files changed

+204
-0
lines changed

cache/refs.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/containerd/containerd/images"
1515
"github.com/containerd/containerd/leases"
1616
"github.com/containerd/containerd/mount"
17+
"github.com/containerd/containerd/pkg/userns"
1718
"github.com/containerd/containerd/snapshots"
1819
"github.com/docker/docker/pkg/idtools"
1920
"github.com/hashicorp/go-multierror"
@@ -27,6 +28,7 @@ import (
2728
"github.com/moby/buildkit/util/flightcontrol"
2829
"github.com/moby/buildkit/util/leaseutil"
2930
"github.com/moby/buildkit/util/progress"
31+
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
3032
"github.com/moby/buildkit/util/winlayers"
3133
"github.com/moby/sys/mountinfo"
3234
digest "github.com/opencontainers/go-digest"
@@ -1640,6 +1642,12 @@ func (sm *sharableMountable) Mount() (_ []mount.Mount, _ func() error, retErr er
16401642
os.Remove(dir)
16411643
}
16421644
}()
1645+
if userns.RunningInUserNS() {
1646+
mounts, err = rootlessmountopts.FixUp(mounts)
1647+
if err != nil {
1648+
return nil, nil, err
1649+
}
1650+
}
16431651
if err := mount.All(mounts, dir); err != nil {
16441652
return nil, nil, err
16451653
}

docs/rootless.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ spec:
2424
2525
See also the [example manifests](#Kubernetes).
2626
27+
### Bottlerocket OS
28+
29+
Needs to run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.
30+
31+
See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).
32+
2733
<details>
2834
<summary>Old distributions</summary>
2935

@@ -104,6 +110,11 @@ See https://rootlesscontaine.rs/getting-started/common/subuid/
104110
### Error `Options:[rbind ro]}]: operation not permitted`
105111
Make sure to mount an `emptyDir` volume on `/home/user/.local/share/buildkit` .
106112

113+
### Error `fork/exec /proc/self/exe: no space left on device` with `level=warning msg="/proc/sys/user/max_user_namespaces needs to be set to non-zero."`
114+
Run `sysctl -w user.max_user_namespaces=N` (N=positive integer, like 63359) on the host nodes.
115+
116+
See [`../examples/kubernetes/sysctl-userns.privileged.yaml`](../examples/kubernetes/sysctl-userns.privileged.yaml).
117+
107118
## Containerized deployment
108119

109120
### Kubernetes
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Run `sysctl -w user.max_user_namespaces=63359` on all the nodes,
2+
# for errors like "/proc/sys/user/max_user_namespaces needs to be set to non-zero"
3+
# on running rootless buildkitd pods.
4+
#
5+
# This workaround is known to be needed on Bottlerocket OS.
6+
apiVersion: apps/v1
7+
kind: DaemonSet
8+
metadata:
9+
labels:
10+
app: sysctl-userns
11+
name: sysctl-userns
12+
spec:
13+
selector:
14+
matchLabels:
15+
app: sysctl-userns
16+
template:
17+
metadata:
18+
labels:
19+
app: sysctl-userns
20+
spec:
21+
containers:
22+
- name: sysctl-userns
23+
image: busybox
24+
command: ["sh", "-euxc", "sysctl -w user.max_user_namespaces=63359 && sleep infinity"]
25+
securityContext:
26+
privileged: true

executor/oci/spec.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@ import (
1111
"github.com/containerd/containerd/mount"
1212
"github.com/containerd/containerd/namespaces"
1313
"github.com/containerd/containerd/oci"
14+
"github.com/containerd/containerd/pkg/userns"
1415
"github.com/containerd/continuity/fs"
1516
"github.com/docker/docker/pkg/idtools"
1617
"github.com/mitchellh/hashstructure/v2"
1718
"github.com/moby/buildkit/executor"
1819
"github.com/moby/buildkit/snapshot"
1920
"github.com/moby/buildkit/util/network"
21+
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
2022
traceexec "github.com/moby/buildkit/util/tracing/exec"
2123
specs "github.com/opencontainers/runtime-spec/specs-go"
2224
"github.com/opencontainers/selinux/go-selinux"
@@ -193,6 +195,14 @@ func GenerateSpec(ctx context.Context, meta executor.Meta, mounts []executor.Mou
193195
}
194196

195197
s.Mounts = dedupMounts(s.Mounts)
198+
199+
if userns.RunningInUserNS() {
200+
s.Mounts, err = rootlessmountopts.FixUpOCI(s.Mounts)
201+
if err != nil {
202+
return nil, nil, err
203+
}
204+
}
205+
196206
return s, releaseAll, nil
197207
}
198208

snapshot/localmounter_unix.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"syscall"
99

1010
"github.com/containerd/containerd/mount"
11+
"github.com/containerd/containerd/pkg/userns"
12+
rootlessmountopts "github.com/moby/buildkit/util/rootless/mountopts"
1113
"github.com/pkg/errors"
1214
)
1315

@@ -24,6 +26,14 @@ func (lm *localMounter) Mount() (string, error) {
2426
lm.release = release
2527
}
2628

29+
if userns.RunningInUserNS() {
30+
var err error
31+
lm.mounts, err = rootlessmountopts.FixUp(lm.mounts)
32+
if err != nil {
33+
return "", err
34+
}
35+
}
36+
2737
if len(lm.mounts) == 1 && (lm.mounts[0].Type == "bind" || lm.mounts[0].Type == "rbind") {
2838
ro := false
2939
for _, opt := range lm.mounts[0].Options {
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package mountopts
2+
3+
import (
4+
"github.com/containerd/containerd/mount"
5+
"github.com/moby/buildkit/util/strutil"
6+
specs "github.com/opencontainers/runtime-spec/specs-go"
7+
"github.com/pkg/errors"
8+
"golang.org/x/sys/unix"
9+
)
10+
11+
// UnprivilegedMountFlags gets the set of mount flags that are set on the mount that contains the given
12+
// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
13+
// bind-mounting "with options" will not fail with user namespaces, due to
14+
// kernel restrictions that require user namespace mounts to preserve
15+
// CL_UNPRIVILEGED locked flags.
16+
//
17+
// From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460
18+
func UnprivilegedMountFlags(path string) ([]string, error) {
19+
var statfs unix.Statfs_t
20+
if err := unix.Statfs(path, &statfs); err != nil {
21+
return nil, err
22+
}
23+
24+
// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
25+
unprivilegedFlags := map[uint64]string{
26+
unix.MS_RDONLY: "ro",
27+
unix.MS_NODEV: "nodev",
28+
unix.MS_NOEXEC: "noexec",
29+
unix.MS_NOSUID: "nosuid",
30+
unix.MS_NOATIME: "noatime",
31+
unix.MS_RELATIME: "relatime",
32+
unix.MS_NODIRATIME: "nodiratime",
33+
}
34+
35+
var flags []string
36+
for mask, flag := range unprivilegedFlags {
37+
if uint64(statfs.Flags)&mask == mask {
38+
flags = append(flags, flag)
39+
}
40+
}
41+
42+
return flags, nil
43+
}
44+
45+
// FixUp is for https://github.com/moby/buildkit/issues/3098
46+
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
47+
for i, m := range mounts {
48+
var isBind bool
49+
for _, o := range m.Options {
50+
switch o {
51+
case "bind", "rbind":
52+
isBind = true
53+
}
54+
}
55+
if !isBind {
56+
continue
57+
}
58+
unpriv, err := UnprivilegedMountFlags(m.Source)
59+
if err != nil {
60+
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
61+
}
62+
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
63+
mounts[i] = m
64+
}
65+
return mounts, nil
66+
}
67+
68+
func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
69+
for i, m := range mounts {
70+
var isBind bool
71+
for _, o := range m.Options {
72+
switch o {
73+
case "bind", "rbind":
74+
isBind = true
75+
}
76+
}
77+
if !isBind {
78+
continue
79+
}
80+
unpriv, err := UnprivilegedMountFlags(m.Source)
81+
if err != nil {
82+
return nil, errors.Wrapf(err, "failed to get unprivileged mount flags for %+v", m)
83+
}
84+
m.Options = strutil.DedupeSlice(append(m.Options, unpriv...))
85+
mounts[i] = m
86+
}
87+
return mounts, nil
88+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//go:build !linux
2+
// +build !linux
3+
4+
package mountopts
5+
6+
import (
7+
"github.com/containerd/containerd/mount"
8+
specs "github.com/opencontainers/runtime-spec/specs-go"
9+
)
10+
11+
func UnprivilegedMountFlags(path string) ([]string, error) {
12+
return []string{}, nil
13+
}
14+
15+
func FixUp(mounts []mount.Mount) ([]mount.Mount, error) {
16+
return mounts, nil
17+
}
18+
19+
func FixUpOCI(mounts []specs.Mount) ([]specs.Mount, error) {
20+
return mounts, nil
21+
}

util/strutil/strutil.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package strutil
18+
19+
// DedupeSlice is from https://github.com/containerd/nerdctl/blob/v1.2.1/pkg/strutil/strutil.go#L72-L82
20+
func DedupeSlice(in []string) []string {
21+
m := make(map[string]struct{})
22+
var res []string
23+
for _, s := range in {
24+
if _, ok := m[s]; !ok {
25+
res = append(res, s)
26+
m[s] = struct{}{}
27+
}
28+
}
29+
return res
30+
}

0 commit comments

Comments
 (0)