|
| 1 | +/* |
| 2 | +Copyright 2024 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package e2enode |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "fmt" |
| 22 | + "k8s.io/kubernetes/pkg/features" |
| 23 | + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" |
| 24 | + "os/exec" |
| 25 | + "path/filepath" |
| 26 | + "strings" |
| 27 | + "time" |
| 28 | + |
| 29 | + v1 "k8s.io/api/core/v1" |
| 30 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 31 | + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" |
| 32 | + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" |
| 33 | + evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" |
| 34 | + kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" |
| 35 | + "k8s.io/kubernetes/test/e2e/framework" |
| 36 | + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" |
| 37 | + "k8s.io/kubernetes/test/e2e/nodefeature" |
| 38 | + imageutils "k8s.io/kubernetes/test/utils/image" |
| 39 | + admissionapi "k8s.io/pod-security-admission/api" |
| 40 | + |
| 41 | + "github.com/onsi/ginkgo/v2" |
| 42 | + "github.com/onsi/gomega" |
| 43 | +) |
| 44 | + |
| 45 | +var _ = SIGDescribe("KubeletSeparateDiskGC", nodefeature.KubeletSeparateDiskGC, func() { |
| 46 | + f := framework.NewDefaultFramework("split-disk-test") |
| 47 | + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged |
| 48 | + pressureTimeout := 10 * time.Minute |
| 49 | + expectedNodeCondition := v1.NodeDiskPressure |
| 50 | + |
| 51 | + ginkgo.BeforeEach(func(ctx context.Context) { |
| 52 | + e2eskipper.SkipUnlessFeatureGateEnabled(features.KubeletSeparateDiskGC) |
| 53 | + if !hasSplitFileSystem(ctx) { |
| 54 | + ginkgo.Skip("it doesn't have split filesystem") |
| 55 | + } |
| 56 | + }) |
| 57 | + |
| 58 | + f.It("should display different stats for imageFs and containerFs", func(ctx context.Context) { |
| 59 | + summary := eventuallyGetSummary(ctx) |
| 60 | + gomega.Expect(summary.Node.Fs.AvailableBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.AvailableBytes)) |
| 61 | + gomega.Expect(summary.Node.Fs.CapacityBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.CapacityBytes)) |
| 62 | + // Node.Fs represents rootfs where /var/lib/kubelet is located. |
| 63 | + // Since graphroot is left as the default in storage.conf, it will use the same filesystem location as rootfs. |
| 64 | + // Therefore, Node.Fs should be the same as Runtime.ContainerFs. |
| 65 | + gomega.Expect(summary.Node.Fs.AvailableBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.AvailableBytes)) |
| 66 | + gomega.Expect(summary.Node.Fs.CapacityBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.CapacityBytes)) |
| 67 | + }) |
| 68 | + |
| 69 | + f.Context("when there is disk pressure", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), func() { |
| 70 | + f.Context("on imageFs", func() { |
| 71 | + tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { |
| 72 | + initialConfig.EvictionHard = map[string]string{ |
| 73 | + string(evictionapi.SignalNodeFsAvailable): "30%", |
| 74 | + string(evictionapi.SignalContainerFsAvailable): "30%", |
| 75 | + string(evictionapi.SignalImageFsAvailable): "30%", |
| 76 | + } |
| 77 | + initialConfig.EvictionMinimumReclaim = map[string]string{} |
| 78 | + ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard)) |
| 79 | + }) |
| 80 | + |
| 81 | + runImageFsPressureTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ |
| 82 | + { |
| 83 | + evictionPriority: 1, |
| 84 | + pod: innocentPod(), |
| 85 | + }, |
| 86 | + }) |
| 87 | + }) |
| 88 | + |
| 89 | + f.Context("on containerFs", func() { |
| 90 | + expectedStarvedResource := v1.ResourceEphemeralStorage |
| 91 | + diskTestInMb := 5000 |
| 92 | + |
| 93 | + tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { |
| 94 | + initialConfig.EvictionHard = map[string]string{ |
| 95 | + string(evictionapi.SignalNodeFsAvailable): "30%", |
| 96 | + string(evictionapi.SignalImageFsAvailable): "30%", |
| 97 | + } |
| 98 | + initialConfig.EvictionMinimumReclaim = map[string]string{} |
| 99 | + ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard)) |
| 100 | + }) |
| 101 | + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ |
| 102 | + { |
| 103 | + // This pod should exceed disk capacity on nodeFs since it writes a lot to writeable layer. |
| 104 | + evictionPriority: 1, |
| 105 | + pod: diskConsumingPod("container-emptydir-disk-limit", diskTestInMb, nil, |
| 106 | + v1.ResourceRequirements{}), |
| 107 | + }, |
| 108 | + }) |
| 109 | + }) |
| 110 | + }) |
| 111 | +}) |
| 112 | + |
| 113 | +// runImageFsPressureTest tests are similar to eviction tests but will skip the checks on eviction itself, |
| 114 | +// as we want to induce disk pressure on the imageFs filesystem. |
| 115 | +func runImageFsPressureTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(ctx context.Context), testSpecs []podEvictSpec) { |
| 116 | + // Place the remainder of the test within a context so that the kubelet config is set before and after the test. |
| 117 | + ginkgo.Context("", func() { |
| 118 | + ginkgo.BeforeEach(func(ctx context.Context) { |
| 119 | + // Reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure. |
| 120 | + reduceAllocatableMemoryUsageIfCgroupv1() |
| 121 | + // Nodes do not immediately report local storage capacity, |
| 122 | + // so wait a little to allow pods requesting local storage to be scheduled. |
| 123 | + time.Sleep(30 * time.Second) |
| 124 | + ginkgo.By("setting up pods to be used by tests") |
| 125 | + pods := []*v1.Pod{} |
| 126 | + for _, spec := range testSpecs { |
| 127 | + pods = append(pods, spec.pod) |
| 128 | + } |
| 129 | + e2epod.NewPodClient(f).CreateBatch(ctx, pods) |
| 130 | + }) |
| 131 | + |
| 132 | + ginkgo.It("should evict all of the correct pods", func(ctx context.Context) { |
| 133 | + _, is, err := getCRIClient() |
| 134 | + framework.ExpectNoError(err) |
| 135 | + resp, err := is.ImageFsInfo(ctx) |
| 136 | + framework.ExpectNoError(err) |
| 137 | + gomega.Expect(resp.ImageFilesystems).NotTo(gomega.BeEmpty()) |
| 138 | + gomega.Expect(resp.ImageFilesystems[0].FsId).NotTo(gomega.BeNil()) |
| 139 | + diskToPressure := filepath.Dir(resp.ImageFilesystems[0].FsId.Mountpoint) |
| 140 | + ginkgo.By(fmt.Sprintf("Got imageFs directory: %s", diskToPressure)) |
| 141 | + imagesLenBeforeGC := 1 |
| 142 | + sizeOfPressure := "8000" |
| 143 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 144 | + images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{}) |
| 145 | + imagesLenBeforeGC = len(images) |
| 146 | + return err |
| 147 | + }, 1*time.Minute, evictionPollInterval).Should(gomega.Succeed()) |
| 148 | + ginkgo.By(fmt.Sprintf("Number of images found before GC was %d", imagesLenBeforeGC)) |
| 149 | + ginkgo.By(fmt.Sprintf("Induce disk pressure on %s with size %s", diskToPressure, sizeOfPressure)) |
| 150 | + gomega.Expect(runDDOnFilesystem(diskToPressure, sizeOfPressure)).Should(gomega.Succeed()) |
| 151 | + ginkgo.By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition)) |
| 152 | + |
| 153 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 154 | + logFunc(ctx) |
| 155 | + if expectedNodeCondition == noPressure || hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 156 | + return nil |
| 157 | + } |
| 158 | + return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition) |
| 159 | + }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) |
| 160 | + |
| 161 | + ginkgo.By("Waiting for evictions to occur") |
| 162 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 163 | + if expectedNodeCondition != noPressure { |
| 164 | + if hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 165 | + framework.Logf("Node has condition: %s", expectedNodeCondition) |
| 166 | + } else { |
| 167 | + framework.Logf("Node does NOT have condition: %s", expectedNodeCondition) |
| 168 | + } |
| 169 | + } |
| 170 | + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) |
| 171 | + logFunc(ctx) |
| 172 | + return verifyEvictionOrdering(ctx, f, testSpecs) |
| 173 | + }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) |
| 174 | + |
| 175 | + ginkgo.By("checking for the expected pod conditions for evicted pods") |
| 176 | + verifyPodConditions(ctx, f, testSpecs) |
| 177 | + |
| 178 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 179 | + images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{}) |
| 180 | + if err != nil { |
| 181 | + return err |
| 182 | + } |
| 183 | + imagesLenAfterGC := len(images) |
| 184 | + if imagesLenAfterGC < imagesLenBeforeGC { |
| 185 | + return nil |
| 186 | + } |
| 187 | + return fmt.Errorf("garbage collection of images should have occurred. before: %d after: %d", imagesLenBeforeGC, imagesLenAfterGC) |
| 188 | + }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) |
| 189 | + |
| 190 | + gomega.Expect(removeDiskPressure(diskToPressure)).Should(gomega.Succeed(), "removing disk pressure should not fail") |
| 191 | + |
| 192 | + ginkgo.By("making sure pressure from test has surfaced before continuing") |
| 193 | + |
| 194 | + ginkgo.By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition)) |
| 195 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 196 | + logFunc(ctx) |
| 197 | + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) |
| 198 | + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 199 | + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) |
| 200 | + } |
| 201 | + return nil |
| 202 | + }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) |
| 203 | + |
| 204 | + ginkgo.By("checking for stable, pressure-free condition without unexpected pod failures") |
| 205 | + gomega.Consistently(ctx, func(ctx context.Context) error { |
| 206 | + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 207 | + return fmt.Errorf("condition %s disappeared and then reappeared", expectedNodeCondition) |
| 208 | + } |
| 209 | + logFunc(ctx) |
| 210 | + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) |
| 211 | + return verifyEvictionOrdering(ctx, f, testSpecs) |
| 212 | + }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed()) |
| 213 | + }) |
| 214 | + |
| 215 | + ginkgo.AfterEach(func(ctx context.Context) { |
| 216 | + prePullImagesIfNecessary := func() { |
| 217 | + if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages { |
| 218 | + // The disk eviction test may cause the pre-pulled images to be evicted, |
| 219 | + // so pre-pull those images again to ensure this test does not affect subsequent tests. |
| 220 | + err := PrePullAllImages() |
| 221 | + framework.ExpectNoError(err) |
| 222 | + } |
| 223 | + } |
| 224 | + // Run pre-pull for images using a `defer` to ensure that images are pulled even when the subsequent assertions fail. |
| 225 | + defer prePullImagesIfNecessary() |
| 226 | + |
| 227 | + ginkgo.By("deleting pods") |
| 228 | + for _, spec := range testSpecs { |
| 229 | + ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name)) |
| 230 | + e2epod.NewPodClient(f).DeleteSync(ctx, spec.pod.Name, metav1.DeleteOptions{}, 10*time.Minute) |
| 231 | + } |
| 232 | + |
| 233 | + // In case a test fails before verifying that NodeCondition no longer exist on the node, |
| 234 | + // we should wait for the NodeCondition to disappear. |
| 235 | + ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition)) |
| 236 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 237 | + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 238 | + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) |
| 239 | + } |
| 240 | + return nil |
| 241 | + }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) |
| 242 | + |
| 243 | + reduceAllocatableMemoryUsageIfCgroupv1() |
| 244 | + ginkgo.By("making sure we have all the required images for testing") |
| 245 | + prePullImagesIfNecessary() |
| 246 | + |
| 247 | + // Ensure that the NodeCondition hasn't returned after pulling images. |
| 248 | + ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition)) |
| 249 | + gomega.Eventually(ctx, func(ctx context.Context) error { |
| 250 | + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { |
| 251 | + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) |
| 252 | + } |
| 253 | + return nil |
| 254 | + }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) |
| 255 | + |
| 256 | + ginkgo.By("making sure we can start a new pod after the test") |
| 257 | + podName := "test-admit-pod" |
| 258 | + e2epod.NewPodClient(f).CreateSync(ctx, &v1.Pod{ |
| 259 | + ObjectMeta: metav1.ObjectMeta{ |
| 260 | + Name: podName, |
| 261 | + }, |
| 262 | + Spec: v1.PodSpec{ |
| 263 | + RestartPolicy: v1.RestartPolicyNever, |
| 264 | + Containers: []v1.Container{ |
| 265 | + { |
| 266 | + Image: imageutils.GetPauseImageName(), |
| 267 | + Name: podName, |
| 268 | + }, |
| 269 | + }, |
| 270 | + }, |
| 271 | + }) |
| 272 | + |
| 273 | + if ginkgo.CurrentSpecReport().Failed() { |
| 274 | + if framework.TestContext.DumpLogsOnFailure { |
| 275 | + logPodEvents(ctx, f) |
| 276 | + logNodeEvents(ctx, f) |
| 277 | + } |
| 278 | + } |
| 279 | + }) |
| 280 | + }) |
| 281 | +} |
| 282 | + |
| 283 | +func runDDOnFilesystem(diskToPressure, sizeOfPressure string) error { |
| 284 | + script := strings.Split(fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure), " ") |
| 285 | + ginkgo.By(fmt.Sprintf("running dd with %s", fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure))) |
| 286 | + cmd := exec.Command("dd", script...) |
| 287 | + output, err := cmd.CombinedOutput() |
| 288 | + if err != nil { |
| 289 | + fmt.Println(string(output)) |
| 290 | + fmt.Println(err) |
| 291 | + } |
| 292 | + return err |
| 293 | +} |
| 294 | + |
| 295 | +func removeDiskPressure(diskToPressure string) error { |
| 296 | + fileToRemove := fmt.Sprintf("%s/file.txt", diskToPressure) |
| 297 | + ginkgo.By(fmt.Sprintf("calling rm %s", fileToRemove)) |
| 298 | + cmd := exec.Command("rm", fileToRemove) |
| 299 | + _, err := cmd.CombinedOutput() |
| 300 | + return err |
| 301 | +} |
| 302 | + |
| 303 | +func hasSplitFileSystem(ctx context.Context) bool { |
| 304 | + _, is, err := getCRIClient() |
| 305 | + framework.ExpectNoError(err) |
| 306 | + resp, err := is.ImageFsInfo(ctx) |
| 307 | + framework.ExpectNoError(err) |
| 308 | + if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 { |
| 309 | + return false |
| 310 | + } |
| 311 | + if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil { |
| 312 | + return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint |
| 313 | + } |
| 314 | + return false |
| 315 | +} |
0 commit comments