From 5f1f9f1578141d54ac41f40dd3cabb2d999045c4 Mon Sep 17 00:00:00 2001 From: jubittajohn Date: Mon, 18 Aug 2025 17:11:44 -0400 Subject: [PATCH] pkg:add missing safe sysctls to list of SafeSysctlAllowlist Signed-off-by: jubittajohn --- .../sysctl/mustmatchpatterns.go | 75 ++++++++++++++++--- .../sysctl/mustmatchpatterns_test.go | 62 +++++++++++++++ .../k8s.io/kubernetes/pkg/util/kernel/OWNERS | 8 ++ .../kubernetes/pkg/util/kernel/constants.go | 60 +++++++++++++++ .../kubernetes/pkg/util/kernel/version.go | 48 ++++++++++++ vendor/modules.txt | 1 + 6 files changed, 244 insertions(+), 10 deletions(-) create mode 100644 vendor/k8s.io/kubernetes/pkg/util/kernel/OWNERS create mode 100644 vendor/k8s.io/kubernetes/pkg/util/kernel/constants.go create mode 100644 vendor/k8s.io/kubernetes/pkg/util/kernel/version.go diff --git a/pkg/securitycontextconstraints/sysctl/mustmatchpatterns.go b/pkg/securitycontextconstraints/sysctl/mustmatchpatterns.go index 4a67b043c..bd307b424 100644 --- a/pkg/securitycontextconstraints/sysctl/mustmatchpatterns.go +++ b/pkg/securitycontextconstraints/sysctl/mustmatchpatterns.go @@ -18,29 +18,84 @@ package sysctl import ( "fmt" + "slices" "strings" "k8s.io/apimachinery/pkg/util/validation/field" + "k8s.io/apimachinery/pkg/util/version" + "k8s.io/klog/v2" api "k8s.io/kubernetes/pkg/apis/core" + utilkernel "k8s.io/kubernetes/pkg/util/kernel" ) +type sysctl struct { + // the name of sysctl + name string + // the minimum kernel version where the sysctl is available + kernel string +} + +// Legacy safe sysctls that were always allowed in previous releases. +// These must always be returned to avoid regressions: pods that depended on these +// sysctls should continue to work as before, regardless of kernel version detection. +var legacySafeSysctls = []string{ + "kernel.shm_rmid_forced", + "net.ipv4.ip_local_port_range", + "net.ipv4.tcp_syncookies", + "net.ipv4.ping_group_range", + "net.ipv4.ip_unprivileged_port_start", + "net.ipv4.tcp_keepalive_time", + "net.ipv4.tcp_fin_timeout", + "net.ipv4.tcp_keepalive_intvl", + "net.ipv4.tcp_keepalive_probes", +} + +// Newer sysctls that are safe only if the kernel version is new enough. +// We gate these to avoid exposing unsupported sysctls on older kernels. +var newerSysctls = []sysctl{ + { + name: "net.ipv4.ip_local_reserved_ports", + kernel: utilkernel.IPLocalReservedPortsNamespacedKernelVersion, + }, { + name: "net.ipv4.tcp_rmem", + kernel: utilkernel.TCPReceiveMemoryNamespacedKernelVersion, + }, { + name: "net.ipv4.tcp_wmem", + kernel: utilkernel.TCPTransmitMemoryNamespacedKernelVersion, + }, +} + // SafeSysctlAllowlist returns the allowlist of safe sysctls and safe sysctl patterns (ending in *). // // A sysctl is called safe iff // - it is namespaced in the container or the pod // - it is isolated, i.e. has no influence on any other pod on the same node. func SafeSysctlAllowlist() []string { - return []string{ - "kernel.shm_rmid_forced", - "net.ipv4.ip_local_port_range", - "net.ipv4.tcp_syncookies", - "net.ipv4.ping_group_range", - "net.ipv4.ip_unprivileged_port_start", - "net.ipv4.tcp_keepalive_time", - "net.ipv4.tcp_fin_timeout", - "net.ipv4.tcp_keepalive_intvl", - "net.ipv4.tcp_keepalive_probes", + return getSafeSysctlAllowlist(utilkernel.GetVersion) +} + +// getSafeSysctlAllowlist returns the list of safe sysctls that can be used. +// To prevent regressions: +// 1. Always return the legacy list (known safe sysctls from previous releases). +// 2. Conditionally add newer sysctls only if the detected kernel version +// is at least as new as required. +func getSafeSysctlAllowlist(getVersion func() (*version.Version, error)) []string { + safeSysctlAllowlist := slices.Clone(legacySafeSysctls) + + kernelVersion, err := getVersion() + if err != nil { + klog.Error(err, "failed to get kernel version, falling back to legacy safe sysctl list") + return safeSysctlAllowlist + } + + for _, sc := range newerSysctls { + if kernelVersion.AtLeast(version.MustParseGeneric(sc.kernel)) { + safeSysctlAllowlist = append(safeSysctlAllowlist, sc.name) + } else { + klog.Info("kernel version is too old, dropping the sysctl from safe sysctl list", "kernelVersion", kernelVersion, "sysctl", sc.name) + } } + return safeSysctlAllowlist } // mustMatchPatterns implements the SysctlsStrategy interface diff --git a/pkg/securitycontextconstraints/sysctl/mustmatchpatterns_test.go b/pkg/securitycontextconstraints/sysctl/mustmatchpatterns_test.go index a1073d7d6..67a5f48f6 100644 --- a/pkg/securitycontextconstraints/sysctl/mustmatchpatterns_test.go +++ b/pkg/securitycontextconstraints/sysctl/mustmatchpatterns_test.go @@ -17,8 +17,11 @@ limitations under the License. package sysctl import ( + "fmt" + "reflect" "testing" + "k8s.io/apimachinery/pkg/util/version" api "k8s.io/kubernetes/pkg/apis/core" ) @@ -102,3 +105,62 @@ func TestValidate(t *testing.T) { testDisallowed() } } + +func TestGetSafeSysctlAllowlist(t *testing.T) { + var legacySafeSysctls = []string{ + "kernel.shm_rmid_forced", + "net.ipv4.ip_local_port_range", + "net.ipv4.tcp_syncookies", + "net.ipv4.ping_group_range", + "net.ipv4.ip_unprivileged_port_start", + "net.ipv4.tcp_keepalive_time", + "net.ipv4.tcp_fin_timeout", + "net.ipv4.tcp_keepalive_intvl", + "net.ipv4.tcp_keepalive_probes", + } + + tests := []struct { + name string + getVersion func() (*version.Version, error) + want []string + }{ + { + name: "failed to get kernelVersion, only return the legacy safeSysctls list", + getVersion: func() (*version.Version, error) { + return nil, fmt.Errorf("fork error") + }, + want: legacySafeSysctls, + }, + { + name: "kernelVersion is 3.18.0, return the legacy safeSysctls list and net.ipv4.ip_local_reserved_ports", + getVersion: func() (*version.Version, error) { + kernelVersionStr := "3.18.0-957.27.2.el7.x86_64" + return version.ParseGeneric(kernelVersionStr) + }, + want: append( + legacySafeSysctls, + "net.ipv4.ip_local_reserved_ports", + ), + }, + { + name: "kernelVersion is 5.15.0, return the legacy safeSysctls list and safeSysctls with kernelVersion below 5.15.0", + getVersion: func() (*version.Version, error) { + kernelVersionStr := "5.15.0-75-generic" + return version.ParseGeneric(kernelVersionStr) + }, + want: append( + legacySafeSysctls, + "net.ipv4.ip_local_reserved_ports", + "net.ipv4.tcp_rmem", + "net.ipv4.tcp_wmem", + ), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getSafeSysctlAllowlist(tt.getVersion); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getSafeSysctlAllowlist() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/vendor/k8s.io/kubernetes/pkg/util/kernel/OWNERS b/vendor/k8s.io/kubernetes/pkg/util/kernel/OWNERS new file mode 100644 index 000000000..9437a5858 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/kernel/OWNERS @@ -0,0 +1,8 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: + - sig-network-reviewers + - sig-node-reviewers +approvers: + - sig-network-approvers + - sig-node-approvers diff --git a/vendor/k8s.io/kubernetes/pkg/util/kernel/constants.go b/vendor/k8s.io/kubernetes/pkg/util/kernel/constants.go new file mode 100644 index 000000000..1467f6c22 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/kernel/constants.go @@ -0,0 +1,60 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kernel + +// IPLocalReservedPortsNamespacedKernelVersion is the kernel version in which net.ipv4.ip_local_reserved_ports was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/122ff243f5f104194750ecbc76d5946dd1eec934) +const IPLocalReservedPortsNamespacedKernelVersion = "3.16" + +// IPVSConnReuseModeMinSupportedKernelVersion is the minium kernel version supporting net.ipv4.vs.conn_reuse_mode. +// (ref: https://github.com/torvalds/linux/commit/d752c364571743d696c2a54a449ce77550c35ac5) +const IPVSConnReuseModeMinSupportedKernelVersion = "4.1" + +// TCPKeepAliveTimeNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_keepalive_time was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/13b287e8d1cad951634389f85b8c9b816bd3bb1e) +const TCPKeepAliveTimeNamespacedKernelVersion = "4.5" + +// TCPKeepAliveIntervalNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_keepalive_intvl was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/b840d15d39128d08ed4486085e5507d2617b9ae1) +const TCPKeepAliveIntervalNamespacedKernelVersion = "4.5" + +// TCPKeepAliveProbesNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_keepalive_probes was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/9bd6861bd4326e3afd3f14a9ec8a723771fb20bb) +const TCPKeepAliveProbesNamespacedKernelVersion = "4.5" + +// TCPFinTimeoutNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_fin_timeout was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/1e579caa18b96f9eb18f4f5416658cd15f37c062) +const TCPFinTimeoutNamespacedKernelVersion = "4.6" + +// IPVSConnReuseModeFixedKernelVersion is the kernel version in which net.ipv4.vs.conn_reuse_mode was fixed. +// (ref: https://github.com/torvalds/linux/commit/35dfb013149f74c2be1ff9c78f14e6a3cd1539d1) +const IPVSConnReuseModeFixedKernelVersion = "5.9" + +const TmpfsNoswapSupportKernelVersion = "6.4" + +// NFTablesKubeProxyKernelVersion is the lowest kernel version kube-proxy supports using +// nftables mode with by default. This is not directly related to any specific kernel +// commit; see https://issues.k8s.io/122743#issuecomment-1893922424 +const NFTablesKubeProxyKernelVersion = "5.13" + +// TCPReceiveMemoryNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_rmem was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/356d1833b638bd465672aefeb71def3ab93fc17d) +const TCPReceiveMemoryNamespacedKernelVersion = "4.15" + +// TCPTransmitMemoryNamespacedKernelVersion is the kernel version in which net.ipv4.tcp_wmem was namespaced(netns). +// (ref: https://github.com/torvalds/linux/commit/356d1833b638bd465672aefeb71def3ab93fc17d) +const TCPTransmitMemoryNamespacedKernelVersion = "4.15" diff --git a/vendor/k8s.io/kubernetes/pkg/util/kernel/version.go b/vendor/k8s.io/kubernetes/pkg/util/kernel/version.go new file mode 100644 index 000000000..79f0bf9a5 --- /dev/null +++ b/vendor/k8s.io/kubernetes/pkg/util/kernel/version.go @@ -0,0 +1,48 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kernel + +import ( + "fmt" + "os" + "strings" + + "k8s.io/apimachinery/pkg/util/version" +) + +type readFileFunc func(string) ([]byte, error) + +// GetVersion returns currently running kernel version. +func GetVersion() (*version.Version, error) { + return getVersion(os.ReadFile) +} + +// getVersion reads os release file from the give readFile function. +func getVersion(readFile readFileFunc) (*version.Version, error) { + kernelVersionFile := "/proc/sys/kernel/osrelease" + fileContent, err := readFile(kernelVersionFile) + if err != nil { + return nil, fmt.Errorf("failed to read os-release file: %s", err.Error()) + } + + kernelVersion, err := version.ParseGeneric(strings.TrimSpace(string(fileContent))) + if err != nil { + return nil, fmt.Errorf("failed to parse kernel version: %s", err.Error()) + } + + return kernelVersion, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 74018df2b..f7b0902ea 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -885,6 +885,7 @@ k8s.io/kubernetes/pkg/quota/v1/install k8s.io/kubernetes/pkg/registry/rbac k8s.io/kubernetes/pkg/registry/rbac/validation k8s.io/kubernetes/pkg/securitycontext +k8s.io/kubernetes/pkg/util/kernel k8s.io/kubernetes/pkg/util/parsers k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac # k8s.io/utils v0.0.0-20241210054802-24370beab758