Skip to content

Commit c39cf28

Browse files
committed
Moving Kubelet kernel-memgc-notification to configuration file
1 parent 82d27aa commit c39cf28

File tree

11 files changed

+40
-27
lines changed

11 files changed

+40
-27
lines changed

cluster/gce/config-test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,10 @@ TEST_CLUSTER_API_CONTENT_TYPE=${TEST_CLUSTER_API_CONTENT_TYPE:-}
220220

221221
KUBELET_TEST_ARGS="${KUBELET_TEST_ARGS:-} --serialize-image-pulls=false ${TEST_CLUSTER_API_CONTENT_TYPE}"
222222
if [[ "${NODE_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'ubuntu' ]] || [[ "${NODE_OS_DISTRIBUTION}" = 'custom' ]]; then
223-
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
223+
NODE_KUBELET_TEST_ARGS="${NODE_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
224224
fi
225225
if [[ "${MASTER_OS_DISTRIBUTION}" = 'gci' ]] || [[ "${MASTER_OS_DISTRIBUTION}" = 'ubuntu' ]]; then
226-
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --experimental-kernel-memcg-notification=true"
226+
MASTER_KUBELET_TEST_ARGS="${MASTER_KUBELET_TEST_ARGS:-} --kernel-memcg-notification=true"
227227
fi
228228
APISERVER_TEST_ARGS="${APISERVER_TEST_ARGS:-} --runtime-config=extensions/v1beta1,scheduling.k8s.io/v1alpha1,settings.k8s.io/v1alpha1 ${TEST_CLUSTER_DELETE_COLLECTION_WORKERS} ${TEST_CLUSTER_MAX_REQUESTS_INFLIGHT}"
229229
CONTROLLER_MANAGER_TEST_ARGS="${CONTROLLER_MANAGER_TEST_ARGS:-} ${TEST_CLUSTER_RESYNC_PERIOD} ${TEST_CLUSTER_API_CONTENT_TYPE}"

cluster/gce/util.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -891,7 +891,7 @@ function construct-windows-kubelet-flags {
891891
flags+=" --cgroups-per-qos=false --enforce-node-allocatable="
892892

893893
# Turn off kernel memory cgroup notification.
894-
flags+=" --experimental-kernel-memcg-notification=false"
894+
flags+=" --kernel-memcg-notification=false"
895895

896896
# TODO(#78628): Re-enable KubeletPodResources when the issue is fixed.
897897
# Force disable KubeletPodResources feature on Windows until #78628 is fixed.

cmd/kubelet/app/options/options.go

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,6 @@ type KubeletFlags struct {
117117
RemoteImageEndpoint string
118118
// experimentalMounterPath is the path of mounter binary. Leave empty to use the default mount path
119119
ExperimentalMounterPath string
120-
// If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.
121-
// +optional
122-
ExperimentalKernelMemcgNotification bool
123120
// This flag, if set, enables a check prior to mount operations to verify that the required components
124121
// (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled
125122
// and fails the mount operation fails.
@@ -180,20 +177,19 @@ func NewKubeletFlags() *KubeletFlags {
180177
}
181178

182179
return &KubeletFlags{
183-
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
184-
CertDirectory: "/var/lib/kubelet/pki",
185-
RootDirectory: defaultRootDir,
186-
MasterServiceNamespace: metav1.NamespaceDefault,
187-
MaxContainerCount: -1,
188-
MaxPerPodContainerCount: 1,
189-
MinimumGCAge: metav1.Duration{Duration: 0},
190-
NonMasqueradeCIDR: "10.0.0.0/8",
191-
RegisterSchedulable: true,
192-
ExperimentalKernelMemcgNotification: false,
193-
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
194-
NodeLabels: make(map[string]string),
195-
RegisterNode: true,
196-
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
180+
ContainerRuntimeOptions: *NewContainerRuntimeOptions(),
181+
CertDirectory: "/var/lib/kubelet/pki",
182+
RootDirectory: defaultRootDir,
183+
MasterServiceNamespace: metav1.NamespaceDefault,
184+
MaxContainerCount: -1,
185+
MaxPerPodContainerCount: 1,
186+
MinimumGCAge: metav1.Duration{Duration: 0},
187+
NonMasqueradeCIDR: "10.0.0.0/8",
188+
RegisterSchedulable: true,
189+
RemoteRuntimeEndpoint: remoteRuntimeEndpoint,
190+
NodeLabels: make(map[string]string),
191+
RegisterNode: true,
192+
SeccompProfileRoot: filepath.Join(defaultRootDir, "seccomp"),
197193
// prior to the introduction of this flag, there was a hardcoded cap of 50 images
198194
EnableCAdvisorJSONEndpoints: false,
199195
}
@@ -345,7 +341,6 @@ func (f *KubeletFlags) AddFlags(mainfs *pflag.FlagSet) {
345341
fs.Var(utiltaints.NewTaintsVar(&f.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma separated \"<key>=<value>:<effect>\"). No-op if register-node is false.")
346342

347343
// EXPERIMENTAL FLAGS
348-
fs.BoolVar(&f.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", f.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
349344
fs.StringVar(&f.RemoteRuntimeEndpoint, "container-runtime-endpoint", f.RemoteRuntimeEndpoint, "[Experimental] The endpoint of remote runtime service. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
350345
fs.StringVar(&f.RemoteImageEndpoint, "image-service-endpoint", f.RemoteImageEndpoint, "[Experimental] The endpoint of remote image service. If not specified, it will be the same with container-runtime-endpoint by default. Currently unix socket endpoint is supported on Linux, while npipe and tcp endpoints are supported on windows. Examples:'unix:///var/run/dockershim.sock', 'npipe:////./pipe/dockershim'")
351346
fs.BoolVar(&f.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-allocatable-ignore-eviction", f.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details. [default=false]")
@@ -519,6 +514,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
519514
fs.StringSliceVar(&c.AllowedUnsafeSysctls, "allowed-unsafe-sysctls", c.AllowedUnsafeSysctls, "Comma-separated whitelist of unsafe sysctls or unsafe sysctl patterns (ending in *). Use these at your own risk.")
520515

521516
fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
517+
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
522518

523519
// Flags intended for testing, not recommended used in production environments.
524520
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")
@@ -543,4 +539,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
543539
fs.StringSliceVar(&c.EnforceNodeAllocatable, "enforce-node-allocatable", c.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptable options are 'none', 'pods', 'system-reserved', and 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' and '--kube-reserved-cgroup' must also be set, respectively. If 'none' is specified, no additional options should be set. See https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/ for more details.")
544540
fs.StringVar(&c.SystemReservedCgroup, "system-reserved-cgroup", c.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
545541
fs.StringVar(&c.KubeReservedCgroup, "kube-reserved-cgroup", c.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
542+
543+
// Graduated experimental flags, kept for backward compatibility
544+
fs.BoolVar(&c.KernelMemcgNotification, "experimental-kernel-memcg-notification", c.KernelMemcgNotification, "Use kernelMemcgNotification configuration, this flag will be removed in 1.23.")
546545
}

cmd/kubelet/app/server.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ func RunKubelet(kubeServer *options.KubeletServer, kubeDeps *kubelet.Dependencie
11091109
kubeServer.RegisterWithTaints,
11101110
kubeServer.AllowedUnsafeSysctls,
11111111
kubeServer.ExperimentalMounterPath,
1112-
kubeServer.ExperimentalKernelMemcgNotification,
1112+
kubeServer.KernelMemcgNotification,
11131113
kubeServer.ExperimentalCheckNodeCapabilitiesBeforeMount,
11141114
kubeServer.ExperimentalNodeAllocatableIgnoreEvictionThreshold,
11151115
kubeServer.MinimumGCAge,
@@ -1183,7 +1183,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
11831183
registerWithTaints []api.Taint,
11841184
allowedUnsafeSysctls []string,
11851185
experimentalMounterPath string,
1186-
experimentalKernelMemcgNotification bool,
1186+
kernelMemcgNotification bool,
11871187
experimentalCheckNodeCapabilitiesBeforeMount bool,
11881188
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
11891189
minimumGCAge metav1.Duration,
@@ -1215,7 +1215,7 @@ func createAndInitKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
12151215
registerWithTaints,
12161216
allowedUnsafeSysctls,
12171217
experimentalMounterPath,
1218-
experimentalKernelMemcgNotification,
1218+
kernelMemcgNotification,
12191219
experimentalCheckNodeCapabilitiesBeforeMount,
12201220
experimentalNodeAllocatableIgnoreEvictionThreshold,
12211221
minimumGCAge,

pkg/kubelet/apis/config/fuzzer/fuzzer.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
5959
obj.ImageMinimumGCAge = metav1.Duration{Duration: 2 * time.Minute}
6060
obj.ImageGCHighThresholdPercent = 85
6161
obj.ImageGCLowThresholdPercent = 80
62+
obj.KernelMemcgNotification = false
6263
obj.MaxOpenFiles = 1000000
6364
obj.MaxPods = 110
6465
obj.PodPidsLimit = -1

pkg/kubelet/apis/config/helpers_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ var (
189189
"ImageGCHighThresholdPercent",
190190
"ImageGCLowThresholdPercent",
191191
"ImageMinimumGCAge.Duration",
192+
"KernelMemcgNotification",
192193
"KubeAPIBurst",
193194
"KubeAPIQPS",
194195
"KubeReservedCgroup",

pkg/kubelet/apis/config/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,9 @@ type KubeletConfiguration struct {
322322
// These sysctls are namespaced but not allowed by default. For example: "kernel.msg*,net.ipv4.route.min_pmtu"
323323
// +optional
324324
AllowedUnsafeSysctls []string
325+
// kernelMemcgNotification if enabled, the kubelet will integrate with the kernel memcg
326+
// notification to determine if memory eviction thresholds are crossed rather than polling.
327+
KernelMemcgNotification bool
325328

326329
/* the following fields are meant for Node Allocatable */
327330

pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/kubelet/kubelet.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
363363
registerWithTaints []api.Taint,
364364
allowedUnsafeSysctls []string,
365365
experimentalMounterPath string,
366-
experimentalKernelMemcgNotification bool,
366+
kernelMemcgNotification bool,
367367
experimentalCheckNodeCapabilitiesBeforeMount bool,
368368
experimentalNodeAllocatableIgnoreEvictionThreshold bool,
369369
minimumGCAge metav1.Duration,
@@ -432,7 +432,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
432432
PressureTransitionPeriod: kubeCfg.EvictionPressureTransitionPeriod.Duration,
433433
MaxPodGracePeriodSeconds: int64(kubeCfg.EvictionMaxPodGracePeriod),
434434
Thresholds: thresholds,
435-
KernelMemcgNotification: experimentalKernelMemcgNotification,
435+
KernelMemcgNotification: kernelMemcgNotification,
436436
PodCgroupRoot: kubeDeps.ContainerManager.GetPodCgroupRoot(),
437437
}
438438

staging/src/k8s.io/kubelet/config/v1beta1/types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,13 @@ type KubeletConfiguration struct {
787787
// Default: ""
788788
// +optional
789789
ProviderID string `json:"providerID,omitempty"`
790+
// kernelMemcgNotification, if set, the kubelet will integrate with the kernel memcg notification
791+
// to determine if memory eviction thresholds are crossed rather than polling.
792+
// Dynamic Kubelet Config (beta): If dynamically updating this field, consider that
793+
// it may impact the way Kubelet interacts with the kernel.
794+
// Default: false
795+
// +optional
796+
KernelMemcgNotification bool `json:"kernelMemcgNotification,omitempty"`
790797
}
791798

792799
type KubeletAuthorizationMode string

0 commit comments

Comments
 (0)