Skip to content

Commit ac54047

Browse files
authored
Merge pull request containerd#9713 from AkihiroSuda/cri-rro
cri: make read-only mounts recursively read-only
2 parents 0dbe758 + b2f254f commit ac54047

File tree

11 files changed

+387
-8
lines changed

11 files changed

+387
-8
lines changed

RELEASES.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,25 @@ version = 2
461461

462462
</p></details>
463463

464+
## Other breaking changes
465+
### containerd v2.0
466+
#### CRI plugin treats read-only mounts recursively read-only
467+
Starting with containerd v2.0, the CRI plugin treats read-only mounts
468+
as recursively read-only mounts when running on Linux kernel v5.12 or later.
469+
470+
To rollback to the legacy behavior that corresponds to containerd v1.x,
471+
set the following config:
472+
```toml
473+
version = 2
474+
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
475+
# treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
476+
# treats read-only mounts as recursive read-only mounts.
477+
# An empty string means "IfPossible".
478+
# "Enabled" requires Linux kernel v5.12 or later.
479+
# This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
480+
treat_ro_mounts_as_rro = "Disabled"
481+
```
482+
464483
## Experimental features
465484

466485
Experimental features are new features added to containerd which do not have the

docs/cri/config.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,14 @@ version = 2
369369
# See https://github.com/containerd/containerd/issues/6657 for context.
370370
snapshotter = ""
371371

372+
# treat_ro_mounts_as_rro ("Enabled"|"IfPossible"|"Disabled")
373+
# treats read-only mounts as recursive read-only mounts.
374+
# An empty string means "IfPossible".
375+
# "Enabled" requires Linux kernel v5.12 or later.
376+
# Introduced in containerd v2.0.
377+
# This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
378+
treat_ro_mounts_as_rro = ""
379+
372380
# 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options' is options specific to
373381
# "io.containerd.runc.v1" and "io.containerd.runc.v2". Its corresponding options type is:
374382
# https://github.com/containerd/containerd/blob/v1.3.2/runtime/v2/runc/options/oci.pb.go#L26 .
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
Copyright The containerd Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package integration
18+
19+
import (
20+
"fmt"
21+
"os"
22+
"path/filepath"
23+
"syscall"
24+
"testing"
25+
"time"
26+
27+
"github.com/containerd/containerd/v2/core/mount"
28+
"github.com/containerd/containerd/v2/integration/images"
29+
"github.com/containerd/containerd/v2/pkg/kernelversion"
30+
"github.com/opencontainers/selinux/go-selinux"
31+
"github.com/stretchr/testify/assert"
32+
"github.com/stretchr/testify/require"
33+
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
34+
)
35+
36+
func testReadonlyMounts(t *testing.T, mode string, expectRRO bool) {
37+
workDir := t.TempDir()
38+
mntSrcDir := filepath.Join(workDir, "mnt") // "/mnt" in the container
39+
require.NoError(t, os.MkdirAll(mntSrcDir, 0755))
40+
tmpfsDir := filepath.Join(mntSrcDir, "tmpfs") // "/mnt/tmpfs" in the container
41+
require.NoError(t, os.MkdirAll(tmpfsDir, 0755))
42+
tmpfsMount := mount.Mount{
43+
Type: "tmpfs",
44+
Source: "none",
45+
}
46+
require.NoError(t, tmpfsMount.Mount(tmpfsDir))
47+
t.Cleanup(func() {
48+
require.NoError(t, mount.UnmountAll(tmpfsDir, 0))
49+
})
50+
51+
podLogDir := filepath.Join(workDir, "podLogDir")
52+
require.NoError(t, os.MkdirAll(podLogDir, 0755))
53+
54+
config := `version = 2
55+
`
56+
if mode != "" {
57+
config += fmt.Sprintf(`
58+
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
59+
treat_ro_mount_as_rro = %q
60+
`, mode)
61+
}
62+
require.NoError(t, os.WriteFile(filepath.Join(workDir, "config.toml"),
63+
[]byte(config), 0644))
64+
ctrdProc := newCtrdProc(t, "containerd", workDir)
65+
t.Cleanup(func() {
66+
cleanupPods(t, ctrdProc.criRuntimeService(t))
67+
require.NoError(t, ctrdProc.kill(syscall.SIGTERM))
68+
require.NoError(t, ctrdProc.wait(5*time.Minute))
69+
if t.Failed() {
70+
dumpFileContent(t, ctrdProc.logPath())
71+
}
72+
})
73+
runtimeServiceOrig, imageServiceOrig := runtimeService, imageService
74+
runtimeService, imageService = ctrdProc.criRuntimeService(t), ctrdProc.criImageService(t)
75+
t.Cleanup(func() {
76+
runtimeService, imageService = runtimeServiceOrig, imageServiceOrig
77+
})
78+
require.NoError(t, ctrdProc.isReady())
79+
80+
sb, sbConfig := PodSandboxConfigWithCleanup(t, "sandbox", "test-ro-mounts",
81+
WithPodLogDirectory(podLogDir),
82+
)
83+
84+
testImage := images.Get(images.BusyBox)
85+
EnsureImageExists(t, testImage)
86+
87+
containerName := "test-container"
88+
cnConfig := ContainerConfig(
89+
containerName,
90+
testImage,
91+
WithCommand("/bin/touch", "/mnt/tmpfs/file"),
92+
WithLogPath(containerName),
93+
func(c *runtime.ContainerConfig) {
94+
c.Mounts = append(c.Mounts, &runtime.Mount{
95+
HostPath: mntSrcDir,
96+
ContainerPath: "/mnt",
97+
SelinuxRelabel: selinux.GetEnabled(),
98+
Readonly: true,
99+
})
100+
},
101+
)
102+
103+
cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
104+
require.NoError(t, err)
105+
106+
t.Log("Start the container")
107+
require.NoError(t, runtimeService.StartContainer(cn))
108+
109+
t.Log("Wait for container to finish running")
110+
exitCode := -1
111+
require.NoError(t, Eventually(func() (bool, error) {
112+
s, err := runtimeService.ContainerStatus(cn)
113+
if err != nil {
114+
return false, err
115+
}
116+
if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
117+
exitCode = int(s.ExitCode)
118+
return true, nil
119+
}
120+
return false, nil
121+
}, time.Second, 30*time.Second))
122+
123+
output, err := os.ReadFile(filepath.Join(podLogDir, containerName))
124+
assert.NoError(t, err)
125+
t.Logf("exitCode=%d, output=%q", exitCode, output)
126+
127+
if expectRRO {
128+
require.NotEqual(t, 0, exitCode)
129+
require.Contains(t, string(output), "stderr F touch: /mnt/tmpfs/file: Read-only file system\n")
130+
} else {
131+
require.Equal(t, 0, exitCode)
132+
}
133+
}
134+
135+
func TestReadonlyMounts(t *testing.T) {
136+
kernelSupportsRRO, err := kernelversion.GreaterEqualThan(kernelversion.KernelVersion{Kernel: 5, Major: 12})
137+
require.NoError(t, err)
138+
t.Run("Default", func(t *testing.T) {
139+
testReadonlyMounts(t, "", kernelSupportsRRO)
140+
})
141+
t.Run("Disabled", func(t *testing.T) {
142+
testReadonlyMounts(t, "Disabled", false)
143+
})
144+
if kernelSupportsRRO {
145+
t.Run("Enabled", func(t *testing.T) {
146+
testReadonlyMounts(t, "Enabled", true)
147+
})
148+
}
149+
}

pkg/cri/config/config.go

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,15 @@ import (
2121
"errors"
2222
"fmt"
2323
"net/url"
24+
goruntime "runtime"
25+
"strconv"
2426
"time"
2527

28+
introspectionapi "github.com/containerd/containerd/v2/api/services/introspection/v1"
29+
apitypes "github.com/containerd/containerd/v2/api/types"
30+
"github.com/containerd/containerd/v2/protobuf"
2631
"github.com/containerd/log"
32+
"github.com/containerd/typeurl/v2"
2733
"github.com/pelletier/go-toml/v2"
2834
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
2935
"k8s.io/kubelet/pkg/cri/streaming"
@@ -34,8 +40,16 @@ import (
3440
"github.com/containerd/containerd/v2/pkg/deprecation"
3541
runtimeoptions "github.com/containerd/containerd/v2/pkg/runtimeoptions/v1"
3642
"github.com/containerd/containerd/v2/plugins"
43+
"github.com/opencontainers/image-spec/specs-go"
44+
"github.com/opencontainers/runtime-spec/specs-go/features"
3745
)
3846

47+
func init() {
48+
const prefix = "types.containerd.io"
49+
major := strconv.Itoa(specs.VersionMajor)
50+
typeurl.Register(&features.Features{}, prefix, "opencontainers/runtime-spec", major, "features", "Features")
51+
}
52+
3953
const (
4054
// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
4155
//
@@ -73,6 +87,17 @@ const (
7387
DefaultSandboxImage = "registry.k8s.io/pause:3.9"
7488
)
7589

90+
// Ternary represents a ternary value.
91+
// Ternary is needed because TOML does not accept "null" for boolean values.
92+
type Ternary = string
93+
94+
const (
95+
TernaryEmpty Ternary = "" // alias for IfPossible
96+
TernaryEnabled Ternary = "Enabled"
97+
TernaryIfPossible Ternary = "IfPossible"
98+
TernaryDisabled Ternary = "Disabled"
99+
)
100+
76101
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
77102
// and a runtime for untrusted workload.
78103
type Runtime struct {
@@ -116,6 +141,15 @@ type Runtime struct {
116141
// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
117142
// podsandbox - means use Controller implementation from sbserver podsandbox package.
118143
Sandboxer string `toml:"sandboxer" json:"sandboxer"`
144+
145+
// TreatRoMountsAsRro ("Enabled"|"IfPossible"|"Disabled")
146+
// treats read-only mounts as recursive read-only mounts.
147+
// An empty string means "IfPossible".
148+
// "Enabled" requires Linux kernel v5.12 or later.
149+
// Introduced in containerd v2.0.
150+
// This configuration does not apply to non-volume mounts such as "/sys/fs/cgroup".
151+
TreatRoMountsAsRro Ternary `toml:"treat_ro_mount_as_rro" json:"treatRoMountsAsRro"`
152+
TreatRoMountsAsRroResolved bool `toml:"-" json:"-"` // Do not set manually
119153
}
120154

121155
// ContainerdConfig contains toml config related to containerd
@@ -499,8 +533,120 @@ func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.War
499533
return warnings, nil
500534
}
501535

536+
func introspectRuntimeFeatures(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (*features.Features, error) {
537+
if introspectionClient == nil { // happens for unit tests
538+
return nil, errors.New("introspectionClient is nil")
539+
}
540+
infoReq := &introspectionapi.PluginInfoRequest{
541+
Type: string(plugins.RuntimePluginV2),
542+
ID: "task",
543+
}
544+
rr := &apitypes.RuntimeRequest{
545+
RuntimePath: r.Type,
546+
}
547+
if r.Path != "" {
548+
rr.RuntimePath = r.Path
549+
}
550+
options, err := GenerateRuntimeOptions(r)
551+
if err != nil {
552+
return nil, err
553+
}
554+
rr.Options, err = protobuf.MarshalAnyToProto(options)
555+
if err != nil {
556+
return nil, fmt.Errorf("failed to marshal %T: %w", options, err)
557+
}
558+
infoReq.Options, err = protobuf.MarshalAnyToProto(rr)
559+
if err != nil {
560+
return nil, fmt.Errorf("failed to marshal %T: %w", rr, err)
561+
}
562+
infoResp, err := introspectionClient.PluginInfo(ctx, infoReq)
563+
if err != nil {
564+
return nil, fmt.Errorf("failed to call PluginInfo: %w", err)
565+
}
566+
var info apitypes.RuntimeInfo
567+
if err := typeurl.UnmarshalTo(infoResp.Extra, &info); err != nil {
568+
return nil, fmt.Errorf("failed to get runtime info from plugin info: %w", err)
569+
}
570+
featuresX, err := typeurl.UnmarshalAny(info.Features)
571+
if err != nil {
572+
return nil, fmt.Errorf("failed to unmarshal Features (%T): %w", info.Features, err)
573+
}
574+
features, ok := featuresX.(*features.Features)
575+
if !ok {
576+
return nil, fmt.Errorf("unknown features type %T", featuresX)
577+
}
578+
return features, nil
579+
}
580+
581+
// resolveTreatRoMountsAsRro resolves r.TreatRoMountsAsRro string into a boolean.
582+
func resolveTreatRoMountsAsRro(ctx context.Context, introspectionClient introspectionapi.IntrospectionClient, r Runtime) (bool, error) {
583+
debugPrefix := "treat_ro_mounts_as_rro"
584+
if r.Type != "" {
585+
debugPrefix += fmt.Sprintf("[%s]", r.Type)
586+
}
587+
if binaryName := r.Options["BinaryName"]; binaryName != "" {
588+
debugPrefix += fmt.Sprintf("[%v]", binaryName)
589+
}
590+
debugPrefix += ": "
591+
592+
var runtimeSupportsRro bool
593+
if r.Type == plugins.RuntimeRuncV2 {
594+
features, err := introspectRuntimeFeatures(ctx, introspectionClient, r)
595+
if err != nil {
596+
log.G(ctx).WithError(err).Warnf(debugPrefix + "failed to introspect runtime features (binary is not compatible with runc v1.1?)")
597+
} else {
598+
log.G(ctx).Debugf(debugPrefix+"Features: %+v", features)
599+
for _, s := range features.MountOptions {
600+
if s == "rro" {
601+
runtimeSupportsRro = true
602+
break
603+
}
604+
}
605+
}
606+
}
607+
608+
switch r.TreatRoMountsAsRro {
609+
case TernaryDisabled:
610+
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly disabled")
611+
return false, nil
612+
case TernaryEnabled:
613+
log.G(ctx).Debug(debugPrefix + "rro mounts are explicitly enabled")
614+
if !kernelSupportsRro {
615+
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs Linux kernel v5.12 or later", TernaryEnabled)
616+
}
617+
if !runtimeSupportsRro {
618+
return true, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q: needs a runtime that is compatible with runc v1.1", TernaryEnabled)
619+
}
620+
return true, nil
621+
case TernaryEmpty, TernaryIfPossible:
622+
if r.Type != plugins.RuntimeRuncV2 {
623+
log.G(ctx).Debugf(debugPrefix+"rro mounts are not supported by runtime %q, disabling rro mounts", r.Type)
624+
return false, nil
625+
}
626+
if !kernelSupportsRro {
627+
msg := debugPrefix + "rro mounts are not supported by kernel, disabling rro mounts"
628+
if goruntime.GOOS == "linux" {
629+
msg += " (Hint: upgrade the kernel to v5.12 or later)"
630+
log.G(ctx).Warn(msg)
631+
} else {
632+
log.G(ctx).Debug(msg)
633+
}
634+
return false, nil
635+
}
636+
if !runtimeSupportsRro {
637+
log.G(ctx).Warn(debugPrefix + "rro mounts are not supported by runtime, disabling rro mounts (Hint: use a runtime that is compatible with runc v1.1)")
638+
return false, nil
639+
}
640+
log.G(ctx).Debug(debugPrefix + "rro mounts are implicitly enabled")
641+
return true, nil
642+
default:
643+
return false, fmt.Errorf("invalid `treat_ro_mounts_as_rro`: %q (must be %q, %q, or %q)",
644+
r.TreatRoMountsAsRro, TernaryDisabled, TernaryEnabled, TernaryIfPossible)
645+
}
646+
}
647+
502648
// ValidateRuntimeConfig validates the given runtime configuration.
503-
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
649+
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig, introspectionClient introspectionapi.IntrospectionClient) ([]deprecation.Warning, error) {
504650
var warnings []deprecation.Warning
505651
if c.ContainerdConfig.Runtimes == nil {
506652
c.ContainerdConfig.Runtimes = make(map[string]Runtime)
@@ -521,8 +667,15 @@ func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation
521667
// If empty, use default podSandbox mode
522668
if len(r.Sandboxer) == 0 {
523669
r.Sandboxer = string(ModePodSandbox)
524-
c.ContainerdConfig.Runtimes[k] = r
525670
}
671+
672+
// Resolve r.TreatRoMountsAsRro (string; empty value must not be ignored) into r.TreatRoMountsAsRroResolved (bool)
673+
var err error
674+
r.TreatRoMountsAsRroResolved, err = resolveTreatRoMountsAsRro(ctx, introspectionClient, r)
675+
if err != nil {
676+
return warnings, err
677+
}
678+
c.ContainerdConfig.Runtimes[k] = r
526679
}
527680

528681
// Validation for drain_exec_sync_io_timeout

0 commit comments

Comments
 (0)