Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 65 additions & 10 deletions pkg/securitycontextconstraints/sysctl/mustmatchpatterns.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,84 @@ package sysctl

import (
"fmt"
"slices"
"strings"

"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/apimachinery/pkg/util/version"
"k8s.io/klog/v2"
api "k8s.io/kubernetes/pkg/apis/core"
utilkernel "k8s.io/kubernetes/pkg/util/kernel"
)

type sysctl struct {
// the name of sysctl
name string
// the minimum kernel version where the sysctl is available
kernel string
}

// Legacy safe sysctls that were always allowed in previous releases.
// These must always be returned to avoid regressions: pods that depended on these
// sysctls should continue to work as before, regardless of kernel version detection.
var legacySafeSysctls = []string{
"kernel.shm_rmid_forced",
"net.ipv4.ip_local_port_range",
"net.ipv4.tcp_syncookies",
"net.ipv4.ping_group_range",
"net.ipv4.ip_unprivileged_port_start",
"net.ipv4.tcp_keepalive_time",
"net.ipv4.tcp_fin_timeout",
"net.ipv4.tcp_keepalive_intvl",
"net.ipv4.tcp_keepalive_probes",
}

// Newer sysctls that are safe only if the kernel version is new enough.
// We gate these to avoid exposing unsupported sysctls on older kernels.
var newerSysctls = []sysctl{
{
name: "net.ipv4.ip_local_reserved_ports",
kernel: utilkernel.IPLocalReservedPortsNamespacedKernelVersion,
}, {
name: "net.ipv4.tcp_rmem",
kernel: utilkernel.TCPReceiveMemoryNamespacedKernelVersion,
}, {
name: "net.ipv4.tcp_wmem",
kernel: utilkernel.TCPTransmitMemoryNamespacedKernelVersion,
},
}

// SafeSysctlAllowlist returns the allowlist of safe sysctls and safe sysctl patterns (ending in *).
//
// A sysctl is called safe iff
// - it is namespaced in the container or the pod
// - it is isolated, i.e. has no influence on any other pod on the same node.
func SafeSysctlAllowlist() []string {
return []string{
"kernel.shm_rmid_forced",
"net.ipv4.ip_local_port_range",
"net.ipv4.tcp_syncookies",
"net.ipv4.ping_group_range",
"net.ipv4.ip_unprivileged_port_start",
"net.ipv4.tcp_keepalive_time",
"net.ipv4.tcp_fin_timeout",
"net.ipv4.tcp_keepalive_intvl",
"net.ipv4.tcp_keepalive_probes",
return getSafeSysctlAllowlist(utilkernel.GetVersion)
}

// getSafeSysctlAllowlist returns the list of safe sysctls that can be used.
// To prevent regressions:
// 1. Always return the legacy list (known safe sysctls from previous releases).
// 2. Conditionally add newer sysctls only if the detected kernel version
// is at least as new as required.
func getSafeSysctlAllowlist(getVersion func() (*version.Version, error)) []string {
safeSysctlAllowlist := slices.Clone(legacySafeSysctls)

kernelVersion, err := getVersion()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pod SCC admission will be executing on a control plane node in a kube-apiserver process, and this is reading the local kernel version. Don't we want admission to consider the kernel version on the Node that the Pod is assigned to?

/hold

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benluddy / @jubittajohn anything I can help you two out to push https://issues.redhat.com/browse/OCPBUGS-61679 further?

as for this comment specifically, I think there is no way besides a webhook or node object inspection to figure this out. Do we really need this validation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tjungblu
Linking the discussion thread here: https://redhat-internal.slack.com/archives/CC3CZCQHM/p1758217953650509.
The main output of the discussion was to admit a sysctl if atleast one worker node supports it, and to add e2e test to capture the behavior.
#151 - Linking the draft PR I have with a slightly different approach which gets the minimum kernel version across currently available worker nodes. Not yet tested against a cluster or covered by unit tests.

if err != nil {
klog.Error(err, "failed to get kernel version, falling back to legacy safe sysctl list")
return safeSysctlAllowlist
}

for _, sc := range newerSysctls {
if kernelVersion.AtLeast(version.MustParseGeneric(sc.kernel)) {
safeSysctlAllowlist = append(safeSysctlAllowlist, sc.name)
} else {
klog.Info("kernel version is too old, dropping the sysctl from safe sysctl list", "kernelVersion", kernelVersion, "sysctl", sc.name)
}
}
return safeSysctlAllowlist
}

// mustMatchPatterns implements the SysctlsStrategy interface
Expand Down
62 changes: 62 additions & 0 deletions pkg/securitycontextconstraints/sysctl/mustmatchpatterns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ limitations under the License.
package sysctl

import (
"fmt"
"reflect"
"testing"

"k8s.io/apimachinery/pkg/util/version"
api "k8s.io/kubernetes/pkg/apis/core"
)

Expand Down Expand Up @@ -102,3 +105,62 @@ func TestValidate(t *testing.T) {
testDisallowed()
}
}

func TestGetSafeSysctlAllowlist(t *testing.T) {
var legacySafeSysctls = []string{
"kernel.shm_rmid_forced",
"net.ipv4.ip_local_port_range",
"net.ipv4.tcp_syncookies",
"net.ipv4.ping_group_range",
"net.ipv4.ip_unprivileged_port_start",
"net.ipv4.tcp_keepalive_time",
"net.ipv4.tcp_fin_timeout",
"net.ipv4.tcp_keepalive_intvl",
"net.ipv4.tcp_keepalive_probes",
}

tests := []struct {
name string
getVersion func() (*version.Version, error)
want []string
}{
{
name: "failed to get kernelVersion, only return the legacy safeSysctls list",
getVersion: func() (*version.Version, error) {
return nil, fmt.Errorf("fork error")
},
want: legacySafeSysctls,
},
{
name: "kernelVersion is 3.18.0, return the legacy safeSysctls list and net.ipv4.ip_local_reserved_ports",
getVersion: func() (*version.Version, error) {
kernelVersionStr := "3.18.0-957.27.2.el7.x86_64"
return version.ParseGeneric(kernelVersionStr)
},
want: append(
legacySafeSysctls,
"net.ipv4.ip_local_reserved_ports",
),
},
{
name: "kernelVersion is 5.15.0, return the legacy safeSysctls list and safeSysctls with kernelVersion below 5.15.0",
getVersion: func() (*version.Version, error) {
kernelVersionStr := "5.15.0-75-generic"
return version.ParseGeneric(kernelVersionStr)
},
want: append(
legacySafeSysctls,
"net.ipv4.ip_local_reserved_ports",
"net.ipv4.tcp_rmem",
"net.ipv4.tcp_wmem",
),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := getSafeSysctlAllowlist(tt.getVersion); !reflect.DeepEqual(got, tt.want) {
t.Errorf("getSafeSysctlAllowlist() = %v, want %v", got, tt.want)
}
})
}
}
8 changes: 8 additions & 0 deletions vendor/k8s.io/kubernetes/pkg/util/kernel/OWNERS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions vendor/k8s.io/kubernetes/pkg/util/kernel/constants.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions vendor/k8s.io/kubernetes/pkg/util/kernel/version.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vendor/modules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,7 @@ k8s.io/kubernetes/pkg/quota/v1/install
k8s.io/kubernetes/pkg/registry/rbac
k8s.io/kubernetes/pkg/registry/rbac/validation
k8s.io/kubernetes/pkg/securitycontext
k8s.io/kubernetes/pkg/util/kernel
k8s.io/kubernetes/pkg/util/parsers
k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac
# k8s.io/utils v0.0.0-20241210054802-24370beab758
Expand Down