Skip to content

Commit ba742a9

Browse files
authored
Merge pull request kubernetes#129574 from p-shah256/imagefs-tests
Separate SeparateDiskTests from eviction
2 parents ce87977 + aa187d2 commit ba742a9

File tree

3 files changed

+338
-65
lines changed

3 files changed

+338
-65
lines changed

test/e2e/feature/feature.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,10 @@ var (
445445
// TODO: remove when SELinuxMount feature gate is enabled by default.
446446
SELinuxMountReadWriteOncePodOnly = framework.WithFeature(framework.ValidFeatures.Add("SELinuxMountReadWriteOncePodOnly"))
447447

448+
// SeparateDiskTest (SIG-node, used for testing separate container runtime filesystem)
449+
// The tests need separate disk settings on nodes and separate filesystems in storage.conf
450+
SeparateDisk = framework.WithFeature(framework.ValidFeatures.Add("SeparateDisk"))
451+
448452
// Owner: sig-network
449453
// Marks tests of KEP-1880 that require the `MultiCIDRServiceAllocator` feature gate
450454
// and the networking.k8s.io/v1alpha1 API.

test/e2e_node/eviction_test.go

Lines changed: 141 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"path/filepath"
23+
"regexp"
2324
"strconv"
2425
"strings"
2526
"time"
@@ -68,27 +69,80 @@ const (
6869
noStarvedResource = v1.ResourceName("none")
6970
)
7071

71-
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
72-
// Node disk pressure is induced by consuming all inodes on the node.
73-
var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
74-
f := framework.NewDefaultFramework("inode-eviction-test")
72+
type EvictionTestConfig struct {
73+
Signal string
74+
PressureTimeout time.Duration
75+
ExpectedNodeCondition v1.NodeConditionType
76+
ExpectedStarvedResource v1.ResourceName
77+
IsHardEviction bool // true for hard eviction, false for soft eviction
78+
ResourceGetter func(summary *kubeletstatsv1alpha1.Summary) uint64 // Gets available resources (bytes, inodes, etc.)
79+
ResourceThreshold uint64 // Consumed resources that trigger eviction
80+
ThresholdPercentage string // either uint64 or percentage
81+
EvictionGracePeriod string // Used for soft eviction
82+
MetricsLogger func(ctx context.Context)
83+
}
84+
85+
func testRunner(f *framework.Framework, config EvictionTestConfig, specs []podEvictSpec) {
86+
7587
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
76-
expectedNodeCondition := v1.NodeDiskPressure
77-
expectedStarvedResource := resourceInodes
78-
pressureTimeout := 15 * time.Minute
79-
inodesConsumed := uint64(200000)
80-
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
88+
89+
ginkgo.Context(fmt.Sprintf(testContextFmt, config.ExpectedNodeCondition), func() {
8190
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
82-
// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
8391
summary := eventuallyGetSummary(ctx)
84-
inodesFree := *summary.Node.Fs.InodesFree
85-
if inodesFree <= inodesConsumed {
86-
e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run")
92+
available := config.ResourceGetter(summary)
93+
94+
if config.ThresholdPercentage == "" && available <= config.ResourceThreshold {
95+
e2eskipper.Skipf("Too few resources free on the host for the eviction test to run")
8796
}
88-
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
97+
98+
var thresholdValue string
99+
if config.ThresholdPercentage != "" {
100+
thresholdValue = config.ThresholdPercentage
101+
} else {
102+
thresholdValue = fmt.Sprintf("%d", available-config.ResourceThreshold)
103+
}
104+
105+
if config.IsHardEviction {
106+
initialConfig.EvictionHard = map[string]string{config.Signal: thresholdValue}
107+
} else {
108+
initialConfig.EvictionSoft = map[string]string{config.Signal: thresholdValue}
109+
initialConfig.EvictionSoftGracePeriod = map[string]string{config.Signal: config.EvictionGracePeriod}
110+
initialConfig.EvictionMaxPodGracePeriod = 30
111+
}
112+
113+
// Add any special overrides for specific tests
89114
initialConfig.EvictionMinimumReclaim = map[string]string{}
115+
116+
// Ensure that pods are not evicted because of the eviction-hard threshold
117+
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
118+
if !config.IsHardEviction {
119+
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
120+
}
90121
})
91-
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
122+
123+
runEvictionTest(f, config.PressureTimeout, config.ExpectedNodeCondition,
124+
config.ExpectedStarvedResource, config.MetricsLogger, specs)
125+
})
126+
}
127+
128+
// InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
129+
// Node disk pressure is induced by consuming all inodes on the node.
130+
var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
131+
testRunner(
132+
framework.NewDefaultFramework("inode-eviction-test"),
133+
EvictionTestConfig{
134+
Signal: string(evictionapi.SignalNodeFsInodesFree),
135+
PressureTimeout: 15 * time.Minute,
136+
ExpectedNodeCondition: v1.NodeDiskPressure,
137+
ExpectedStarvedResource: resourceInodes,
138+
IsHardEviction: true,
139+
ResourceThreshold: uint64(200000), // Inodes consumed
140+
MetricsLogger: logInodeMetrics,
141+
ResourceGetter: func(summary *kubeletstatsv1alpha1.Summary) uint64 {
142+
return *summary.Node.Fs.InodesFree
143+
},
144+
},
145+
[]podEvictSpec{
92146
{
93147
evictionPriority: 1,
94148
// TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
@@ -100,7 +154,6 @@ var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(
100154
pod: innocentPod(),
101155
},
102156
})
103-
})
104157
})
105158

106159
// ImageGCNoEviction tests that the eviction manager is able to prevent eviction
@@ -227,41 +280,32 @@ var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.With
227280
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
228281
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
229282
var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
230-
f := framework.NewDefaultFramework("localstorage-eviction-test")
231-
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
232-
pressureTimeout := 10 * time.Minute
233-
expectedNodeCondition := v1.NodeDiskPressure
234-
expectedStarvedResource := v1.ResourceEphemeralStorage
235-
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
236-
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
237-
diskConsumed := resource.MustParse("4Gi")
238-
summary := eventuallyGetSummary(ctx)
239-
availableBytes := *(summary.Node.Fs.AvailableBytes)
240-
if availableBytes <= uint64(diskConsumed.Value()) {
241-
e2eskipper.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run")
242-
}
243-
initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
244-
initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "1m"}
245-
// Defer to the pod default grace period
246-
initialConfig.EvictionMaxPodGracePeriod = 30
247-
initialConfig.EvictionMinimumReclaim = map[string]string{}
248-
// Ensure that pods are not evicted because of the eviction-hard threshold
249-
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
250-
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
251-
})
252-
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
283+
diskConsumed := resource.MustParse("4Gi")
284+
testRunner(
285+
framework.NewDefaultFramework("localstorage-eviction-test"),
286+
EvictionTestConfig{
287+
Signal: string(evictionapi.SignalNodeFsAvailable),
288+
PressureTimeout: 10 * time.Minute,
289+
ExpectedNodeCondition: v1.NodeDiskPressure,
290+
ExpectedStarvedResource: v1.ResourceEphemeralStorage,
291+
ResourceThreshold: uint64(diskConsumed.Value()), // local storage
292+
IsHardEviction: false,
293+
EvictionGracePeriod: "1m",
294+
MetricsLogger: logDiskMetrics,
295+
ResourceGetter: func(summary *kubeletstatsv1alpha1.Summary) uint64 {
296+
return *summary.Node.Fs.AvailableBytes
297+
},
298+
},
299+
[]podEvictSpec{
253300
{
254301
evictionPriority: 1,
255-
// TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
256-
// causing the test to fail. We provision an emptyDir volume to avoid relying on the runtime behavior.
257-
pod: diskConsumingPod("container-disk-hog", lotsOfDisk, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, v1.ResourceRequirements{}),
302+
pod: diskConsumingPod("container-disk-hog", lotsOfDisk, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, v1.ResourceRequirements{}),
258303
},
259304
{
260305
evictionPriority: 0,
261306
pod: innocentPod(),
262307
},
263308
})
264-
})
265309
})
266310

267311
var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
@@ -304,20 +348,28 @@ var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodS
304348

305349
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
306350
var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.LocalStorageCapacityIsolationQuota, feature.Eviction, func() {
307-
f := framework.NewDefaultFramework("localstorage-eviction-test")
308-
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
309-
evictionTestTimeout := 10 * time.Minute
310-
ginkgo.Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
311-
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
312-
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
313-
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
314-
})
315-
sizeLimit := resource.MustParse("100Mi")
316-
useOverLimit := 101 /* Mb */
317-
useUnderLimit := 99 /* Mb */
318-
containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
319-
320-
runEvictionTest(f, evictionTestTimeout, noPressure, noStarvedResource, logDiskMetrics, []podEvictSpec{
351+
sizeLimit := resource.MustParse("40Mi")
352+
useOverLimit := 41 /* Mb */
353+
useUnderLimit := 39 /* Mb */
354+
containerLimit := v1.ResourceList{v1.ResourceEphemeralStorage: sizeLimit}
355+
356+
testRunner(
357+
framework.NewDefaultFramework("localstorage-eviction-test"),
358+
EvictionTestConfig{
359+
Signal: string(evictionapi.SignalMemoryAvailable),
360+
PressureTimeout: 10 * time.Minute,
361+
ExpectedNodeCondition: noPressure,
362+
ExpectedStarvedResource: noStarvedResource,
363+
IsHardEviction: true,
364+
ThresholdPercentage: "0%", // Disabling this threshold to focus on pod-level limits
365+
MetricsLogger: logDiskMetrics,
366+
ResourceGetter: func(summary *kubeletstatsv1alpha1.Summary) uint64 {
367+
// We're not using node-level resource checks for this test
368+
// Just need a non-zero value to pass the resource check
369+
return 1024 * 1024 * 1024 // 1 GB (arbitrary non-zero value)
370+
},
371+
},
372+
[]podEvictSpec{
321373
{
322374
evictionPriority: 1, // This pod should be evicted because emptyDir (default storage type) usage violation
323375
pod: diskConsumingPod("emptydir-disk-sizelimit", useOverLimit, &v1.VolumeSource{
@@ -350,7 +402,6 @@ var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(
350402
pod: diskConsumingPod("container-disk-below-sizelimit", useUnderLimit, nil, v1.ResourceRequirements{Limits: containerLimit}),
351403
},
352404
})
353-
})
354405
})
355406

356407
// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
@@ -589,6 +640,19 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
589640
// Nodes do not immediately report local storage capacity
590641
// Sleep so that pods requesting local storage do not fail to schedule
591642
time.Sleep(30 * time.Second)
643+
// Check for Pressure
644+
ginkgo.By("make sure node has no pressure before starting")
645+
gomega.Eventually(ctx, func(ctx context.Context) error {
646+
if expectedNodeCondition == noPressure || !hasNodeCondition(ctx, f, expectedNodeCondition) {
647+
return nil
648+
}
649+
return fmt.Errorf("NodeCondition: %s encountered", expectedNodeCondition)
650+
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.Succeed())
651+
652+
// prepull images only if its image-gc-eviction-test
653+
if regexp.MustCompile(`(?i)image-gc.*`).MatchString(f.BaseName) {
654+
gomega.Expect(PrePullAllImages(ctx)).Should(gomega.Succeed())
655+
}
592656
ginkgo.By("setting up pods to be used by tests")
593657
pods := []*v1.Pod{}
594658
for _, spec := range testSpecs {
@@ -656,10 +720,23 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
656720
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed())
657721

658722
ginkgo.By("checking for correctly formatted eviction events")
659-
verifyEvictionEvents(ctx, f, testSpecs, expectedStarvedResource)
723+
gomega.Eventually(ctx, func(ctx context.Context) error {
724+
return verifyEvictionEvents(ctx, f, testSpecs, expectedStarvedResource)
725+
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed())
660726
})
661727

662728
ginkgo.AfterEach(func(ctx context.Context) {
729+
prePullImagesIfNeccecary := func() {
730+
if framework.TestContext.PrepullImages {
731+
// The disk eviction test may cause the prepulled images to be evicted,
732+
// prepull those images again to ensure this test not affect following tests.
733+
err := PrePullAllImages(ctx)
734+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
735+
}
736+
}
737+
// Run prePull using a defer to make sure it is executed even when the assertions below fails
738+
defer prePullImagesIfNeccecary()
739+
663740
ginkgo.By("deleting pods")
664741
for _, spec := range testSpecs {
665742
ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
@@ -810,7 +887,7 @@ func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs
810887
}
811888
}
812889

813-
func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) {
890+
func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec, expectedStarvedResource v1.ResourceName) error {
814891
for _, spec := range testSpecs {
815892
pod := spec.pod
816893
if spec.evictionPriority != 0 {
@@ -824,24 +901,22 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
824901
framework.ExpectNoError(err, "getting events")
825902
gomega.Expect(podEvictEvents.Items).To(gomega.HaveLen(1), "Expected to find 1 eviction event for pod %s, got %d", pod.Name, len(podEvictEvents.Items))
826903
event := podEvictEvents.Items[0]
827-
828904
if expectedStarvedResource != noStarvedResource {
829905
// Check the eviction.StarvedResourceKey
830906
starved, found := event.Annotations[eviction.StarvedResourceKey]
831907
if !found {
832-
framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found",
908+
return fmt.Errorf("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found",
833909
pod.Name, expectedStarvedResource)
834910
}
835911
starvedResource := v1.ResourceName(starved)
836912
gomega.Expect(starvedResource).To(gomega.Equal(expectedStarvedResource), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead",
837913
pod.Name, expectedStarvedResource, starvedResource)
838-
839914
// We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
840915
if expectedStarvedResource == v1.ResourceMemory {
841916
// Check the eviction.OffendingContainersKey
842917
offendersString, found := event.Annotations[eviction.OffendingContainersKey]
843918
if !found {
844-
framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found",
919+
return fmt.Errorf("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found",
845920
pod.Name)
846921
}
847922
offendingContainers := strings.Split(offendersString, ",")
@@ -853,7 +928,7 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
853928
// Check the eviction.OffendingContainersUsageKey
854929
offendingUsageString, found := event.Annotations[eviction.OffendingContainersUsageKey]
855930
if !found {
856-
framework.Failf("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found",
931+
return fmt.Errorf("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found",
857932
pod.Name)
858933
}
859934
offendingContainersUsage := strings.Split(offendingUsageString, ",")
@@ -868,6 +943,7 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
868943
}
869944
}
870945
}
946+
return nil
871947
}
872948

873949
// Returns TRUE if the node has the node condition, FALSE otherwise

0 commit comments

Comments
 (0)