@@ -20,6 +20,7 @@ import (
20
20
"context"
21
21
"fmt"
22
22
"path/filepath"
23
+ "regexp"
23
24
"strconv"
24
25
"strings"
25
26
"time"
@@ -68,27 +69,80 @@ const (
68
69
noStarvedResource = v1 .ResourceName ("none" )
69
70
)
70
71
71
- // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
72
- // Node disk pressure is induced by consuming all inodes on the node.
73
- var _ = SIGDescribe ("InodeEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
74
- f := framework .NewDefaultFramework ("inode-eviction-test" )
72
+ type EvictionTestConfig struct {
73
+ Signal string
74
+ PressureTimeout time.Duration
75
+ ExpectedNodeCondition v1.NodeConditionType
76
+ ExpectedStarvedResource v1.ResourceName
77
+ IsHardEviction bool // true for hard eviction, false for soft eviction
78
+ ResourceGetter func (summary * kubeletstatsv1alpha1.Summary ) uint64 // Gets available resources (bytes, inodes, etc.)
79
+ ResourceThreshold uint64 // Consumed resources that trigger eviction
80
+ ThresholdPercentage string // either uint64 or percentage
81
+ EvictionGracePeriod string // Used for soft eviction
82
+ MetricsLogger func (ctx context.Context )
83
+ }
84
+
85
+ func testRunner (f * framework.Framework , config EvictionTestConfig , specs []podEvictSpec ) {
86
+
75
87
f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
76
- expectedNodeCondition := v1 .NodeDiskPressure
77
- expectedStarvedResource := resourceInodes
78
- pressureTimeout := 15 * time .Minute
79
- inodesConsumed := uint64 (200000 )
80
- ginkgo .Context (fmt .Sprintf (testContextFmt , expectedNodeCondition ), func () {
88
+
89
+ ginkgo .Context (fmt .Sprintf (testContextFmt , config .ExpectedNodeCondition ), func () {
81
90
tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
82
- // Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
83
91
summary := eventuallyGetSummary (ctx )
84
- inodesFree := * summary .Node .Fs .InodesFree
85
- if inodesFree <= inodesConsumed {
86
- e2eskipper .Skipf ("Too few inodes free on the host for the InodeEviction test to run" )
92
+ available := config .ResourceGetter (summary )
93
+
94
+ if config .ThresholdPercentage == "" && available <= config .ResourceThreshold {
95
+ e2eskipper .Skipf ("Too few resources free on the host for the eviction test to run" )
87
96
}
88
- initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalNodeFsInodesFree ): fmt .Sprintf ("%d" , inodesFree - inodesConsumed )}
97
+
98
+ var thresholdValue string
99
+ if config .ThresholdPercentage != "" {
100
+ thresholdValue = config .ThresholdPercentage
101
+ } else {
102
+ thresholdValue = fmt .Sprintf ("%d" , available - config .ResourceThreshold )
103
+ }
104
+
105
+ if config .IsHardEviction {
106
+ initialConfig .EvictionHard = map [string ]string {config .Signal : thresholdValue }
107
+ } else {
108
+ initialConfig .EvictionSoft = map [string ]string {config .Signal : thresholdValue }
109
+ initialConfig .EvictionSoftGracePeriod = map [string ]string {config .Signal : config .EvictionGracePeriod }
110
+ initialConfig .EvictionMaxPodGracePeriod = 30
111
+ }
112
+
113
+ // Add any special overrides for specific tests
89
114
initialConfig .EvictionMinimumReclaim = map [string ]string {}
115
+
116
+ // Ensure that pods are not evicted because of the eviction-hard threshold
117
+ // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
118
+ if ! config .IsHardEviction {
119
+ initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
120
+ }
90
121
})
91
- runEvictionTest (f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logInodeMetrics , []podEvictSpec {
122
+
123
+ runEvictionTest (f , config .PressureTimeout , config .ExpectedNodeCondition ,
124
+ config .ExpectedStarvedResource , config .MetricsLogger , specs )
125
+ })
126
+ }
127
+
128
+ // InodeEviction tests that the node responds to node disk pressure by evicting only responsible pods.
129
+ // Node disk pressure is induced by consuming all inodes on the node.
130
+ var _ = SIGDescribe ("InodeEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
131
+ testRunner (
132
+ framework .NewDefaultFramework ("inode-eviction-test" ),
133
+ EvictionTestConfig {
134
+ Signal : string (evictionapi .SignalNodeFsInodesFree ),
135
+ PressureTimeout : 15 * time .Minute ,
136
+ ExpectedNodeCondition : v1 .NodeDiskPressure ,
137
+ ExpectedStarvedResource : resourceInodes ,
138
+ IsHardEviction : true ,
139
+ ResourceThreshold : uint64 (200000 ), // Inodes consumed
140
+ MetricsLogger : logInodeMetrics ,
141
+ ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
142
+ return * summary .Node .Fs .InodesFree
143
+ },
144
+ },
145
+ []podEvictSpec {
92
146
{
93
147
evictionPriority : 1 ,
94
148
// TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
@@ -100,7 +154,6 @@ var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(
100
154
pod : innocentPod (),
101
155
},
102
156
})
103
- })
104
157
})
105
158
106
159
// ImageGCNoEviction tests that the eviction manager is able to prevent eviction
@@ -227,41 +280,32 @@ var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.With
227
280
// Disk pressure is induced by running pods which consume disk space, which exceed the soft eviction threshold.
228
281
// Note: This test's purpose is to test Soft Evictions. Local storage was chosen since it is the least costly to run.
229
282
var _ = SIGDescribe ("LocalStorageSoftEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
230
- f := framework .NewDefaultFramework ("localstorage-eviction-test" )
231
- f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
232
- pressureTimeout := 10 * time .Minute
233
- expectedNodeCondition := v1 .NodeDiskPressure
234
- expectedStarvedResource := v1 .ResourceEphemeralStorage
235
- ginkgo .Context (fmt .Sprintf (testContextFmt , expectedNodeCondition ), func () {
236
- tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
237
- diskConsumed := resource .MustParse ("4Gi" )
238
- summary := eventuallyGetSummary (ctx )
239
- availableBytes := * (summary .Node .Fs .AvailableBytes )
240
- if availableBytes <= uint64 (diskConsumed .Value ()) {
241
- e2eskipper .Skipf ("Too little disk free on the host for the LocalStorageSoftEviction test to run" )
242
- }
243
- initialConfig .EvictionSoft = map [string ]string {string (evictionapi .SignalNodeFsAvailable ): fmt .Sprintf ("%d" , availableBytes - uint64 (diskConsumed .Value ()))}
244
- initialConfig .EvictionSoftGracePeriod = map [string ]string {string (evictionapi .SignalNodeFsAvailable ): "1m" }
245
- // Defer to the pod default grace period
246
- initialConfig .EvictionMaxPodGracePeriod = 30
247
- initialConfig .EvictionMinimumReclaim = map [string ]string {}
248
- // Ensure that pods are not evicted because of the eviction-hard threshold
249
- // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
250
- initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
251
- })
252
- runEvictionTest (f , pressureTimeout , expectedNodeCondition , expectedStarvedResource , logDiskMetrics , []podEvictSpec {
283
+ diskConsumed := resource .MustParse ("4Gi" )
284
+ testRunner (
285
+ framework .NewDefaultFramework ("localstorage-eviction-test" ),
286
+ EvictionTestConfig {
287
+ Signal : string (evictionapi .SignalNodeFsAvailable ),
288
+ PressureTimeout : 10 * time .Minute ,
289
+ ExpectedNodeCondition : v1 .NodeDiskPressure ,
290
+ ExpectedStarvedResource : v1 .ResourceEphemeralStorage ,
291
+ ResourceThreshold : uint64 (diskConsumed .Value ()), // local storage
292
+ IsHardEviction : false ,
293
+ EvictionGracePeriod : "1m" ,
294
+ MetricsLogger : logDiskMetrics ,
295
+ ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
296
+ return * summary .Node .Fs .AvailableBytes
297
+ },
298
+ },
299
+ []podEvictSpec {
253
300
{
254
301
evictionPriority : 1 ,
255
- // TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
256
- // causing the test to fail. We provision an emptyDir volume to avoid relying on the runtime behavior.
257
- pod : diskConsumingPod ("container-disk-hog" , lotsOfDisk , & v1.VolumeSource {EmptyDir : & v1.EmptyDirVolumeSource {}}, v1.ResourceRequirements {}),
302
+ pod : diskConsumingPod ("container-disk-hog" , lotsOfDisk , & v1.VolumeSource {EmptyDir : & v1.EmptyDirVolumeSource {}}, v1.ResourceRequirements {}),
258
303
},
259
304
{
260
305
evictionPriority : 0 ,
261
306
pod : innocentPod (),
262
307
},
263
308
})
264
- })
265
309
})
266
310
267
311
var _ = SIGDescribe ("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .Eviction , func () {
@@ -304,20 +348,28 @@ var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodS
304
348
305
349
// LocalStorageCapacityIsolationEviction tests that container and volume local storage limits are enforced through evictions
306
350
var _ = SIGDescribe ("LocalStorageCapacityIsolationEviction" , framework .WithSlow (), framework .WithSerial (), framework .WithDisruptive (), feature .LocalStorageCapacityIsolationQuota , feature .Eviction , func () {
307
- f := framework .NewDefaultFramework ("localstorage-eviction-test" )
308
- f .NamespacePodSecurityLevel = admissionapi .LevelPrivileged
309
- evictionTestTimeout := 10 * time .Minute
310
- ginkgo .Context (fmt .Sprintf (testContextFmt , "evictions due to pod local storage violations" ), func () {
311
- tempSetCurrentKubeletConfig (f , func (ctx context.Context , initialConfig * kubeletconfig.KubeletConfiguration ) {
312
- // setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
313
- initialConfig .EvictionHard = map [string ]string {string (evictionapi .SignalMemoryAvailable ): "0%" }
314
- })
315
- sizeLimit := resource .MustParse ("100Mi" )
316
- useOverLimit := 101 /* Mb */
317
- useUnderLimit := 99 /* Mb */
318
- containerLimit := v1.ResourceList {v1 .ResourceEphemeralStorage : sizeLimit }
319
-
320
- runEvictionTest (f , evictionTestTimeout , noPressure , noStarvedResource , logDiskMetrics , []podEvictSpec {
351
+ sizeLimit := resource .MustParse ("40Mi" )
352
+ useOverLimit := 41 /* Mb */
353
+ useUnderLimit := 39 /* Mb */
354
+ containerLimit := v1.ResourceList {v1 .ResourceEphemeralStorage : sizeLimit }
355
+
356
+ testRunner (
357
+ framework .NewDefaultFramework ("localstorage-eviction-test" ),
358
+ EvictionTestConfig {
359
+ Signal : string (evictionapi .SignalMemoryAvailable ),
360
+ PressureTimeout : 10 * time .Minute ,
361
+ ExpectedNodeCondition : noPressure ,
362
+ ExpectedStarvedResource : noStarvedResource ,
363
+ IsHardEviction : true ,
364
+ ThresholdPercentage : "0%" , // Disabling this threshold to focus on pod-level limits
365
+ MetricsLogger : logDiskMetrics ,
366
+ ResourceGetter : func (summary * kubeletstatsv1alpha1.Summary ) uint64 {
367
+ // We're not using node-level resource checks for this test
368
+ // Just need a non-zero value to pass the resource check
369
+ return 1024 * 1024 * 1024 // 1 GB (arbitrary non-zero value)
370
+ },
371
+ },
372
+ []podEvictSpec {
321
373
{
322
374
evictionPriority : 1 , // This pod should be evicted because emptyDir (default storage type) usage violation
323
375
pod : diskConsumingPod ("emptydir-disk-sizelimit" , useOverLimit , & v1.VolumeSource {
@@ -350,7 +402,6 @@ var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(
350
402
pod : diskConsumingPod ("container-disk-below-sizelimit" , useUnderLimit , nil , v1.ResourceRequirements {Limits : containerLimit }),
351
403
},
352
404
})
353
- })
354
405
})
355
406
356
407
// PriorityMemoryEvictionOrdering tests that the node responds to node memory pressure by evicting pods.
@@ -589,6 +640,19 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
589
640
// Nodes do not immediately report local storage capacity
590
641
// Sleep so that pods requesting local storage do not fail to schedule
591
642
time .Sleep (30 * time .Second )
643
+ // Check for Pressure
644
+ ginkgo .By ("make sure node has no pressure before starting" )
645
+ gomega .Eventually (ctx , func (ctx context.Context ) error {
646
+ if expectedNodeCondition == noPressure || ! hasNodeCondition (ctx , f , expectedNodeCondition ) {
647
+ return nil
648
+ }
649
+ return fmt .Errorf ("NodeCondition: %s encountered" , expectedNodeCondition )
650
+ }, pressureDisappearTimeout , evictionPollInterval ).Should (gomega .Succeed ())
651
+
652
+ // prepull images only if its image-gc-eviction-test
653
+ if regexp .MustCompile (`(?i)image-gc.*` ).MatchString (f .BaseName ) {
654
+ gomega .Expect (PrePullAllImages (ctx )).Should (gomega .Succeed ())
655
+ }
592
656
ginkgo .By ("setting up pods to be used by tests" )
593
657
pods := []* v1.Pod {}
594
658
for _ , spec := range testSpecs {
@@ -656,10 +720,23 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
656
720
}, postTestConditionMonitoringPeriod , evictionPollInterval ).Should (gomega .Succeed ())
657
721
658
722
ginkgo .By ("checking for correctly formatted eviction events" )
659
- verifyEvictionEvents (ctx , f , testSpecs , expectedStarvedResource )
723
+ gomega .Eventually (ctx , func (ctx context.Context ) error {
724
+ return verifyEvictionEvents (ctx , f , testSpecs , expectedStarvedResource )
725
+ }, postTestConditionMonitoringPeriod , evictionPollInterval ).Should (gomega .Succeed ())
660
726
})
661
727
662
728
ginkgo .AfterEach (func (ctx context.Context ) {
729
+ prePullImagesIfNeccecary := func () {
730
+ if framework .TestContext .PrepullImages {
731
+ // The disk eviction test may cause the prepulled images to be evicted,
732
+ // prepull those images again to ensure this test not affect following tests.
733
+ err := PrePullAllImages (ctx )
734
+ gomega .Expect (err ).ShouldNot (gomega .HaveOccurred ())
735
+ }
736
+ }
737
+ // Run prePull using a defer to make sure it is executed even when the assertions below fails
738
+ defer prePullImagesIfNeccecary ()
739
+
663
740
ginkgo .By ("deleting pods" )
664
741
for _ , spec := range testSpecs {
665
742
ginkgo .By (fmt .Sprintf ("deleting pod: %s" , spec .pod .Name ))
@@ -810,7 +887,7 @@ func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs
810
887
}
811
888
}
812
889
813
- func verifyEvictionEvents (ctx context.Context , f * framework.Framework , testSpecs []podEvictSpec , expectedStarvedResource v1.ResourceName ) {
890
+ func verifyEvictionEvents (ctx context.Context , f * framework.Framework , testSpecs []podEvictSpec , expectedStarvedResource v1.ResourceName ) error {
814
891
for _ , spec := range testSpecs {
815
892
pod := spec .pod
816
893
if spec .evictionPriority != 0 {
@@ -824,24 +901,22 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
824
901
framework .ExpectNoError (err , "getting events" )
825
902
gomega .Expect (podEvictEvents .Items ).To (gomega .HaveLen (1 ), "Expected to find 1 eviction event for pod %s, got %d" , pod .Name , len (podEvictEvents .Items ))
826
903
event := podEvictEvents .Items [0 ]
827
-
828
904
if expectedStarvedResource != noStarvedResource {
829
905
// Check the eviction.StarvedResourceKey
830
906
starved , found := event .Annotations [eviction .StarvedResourceKey ]
831
907
if ! found {
832
- framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found" ,
908
+ return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the starved resource %s, but it was not found" ,
833
909
pod .Name , expectedStarvedResource )
834
910
}
835
911
starvedResource := v1 .ResourceName (starved )
836
912
gomega .Expect (starvedResource ).To (gomega .Equal (expectedStarvedResource ), "Expected to the starved_resource annotation on pod %s to contain %s, but got %s instead" ,
837
913
pod .Name , expectedStarvedResource , starvedResource )
838
-
839
914
// We only check these keys for memory, because ephemeral storage evictions may be due to volume usage, in which case these values are not present
840
915
if expectedStarvedResource == v1 .ResourceMemory {
841
916
// Check the eviction.OffendingContainersKey
842
917
offendersString , found := event .Annotations [eviction .OffendingContainersKey ]
843
918
if ! found {
844
- framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found" ,
919
+ return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers, but it was not found" ,
845
920
pod .Name )
846
921
}
847
922
offendingContainers := strings .Split (offendersString , "," )
@@ -853,7 +928,7 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
853
928
// Check the eviction.OffendingContainersUsageKey
854
929
offendingUsageString , found := event .Annotations [eviction .OffendingContainersUsageKey ]
855
930
if ! found {
856
- framework . Failf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found" ,
931
+ return fmt . Errorf ("Expected to find an annotation on the eviction event for pod %s containing the offending containers' usage, but it was not found" ,
857
932
pod .Name )
858
933
}
859
934
offendingContainersUsage := strings .Split (offendingUsageString , "," )
@@ -868,6 +943,7 @@ func verifyEvictionEvents(ctx context.Context, f *framework.Framework, testSpecs
868
943
}
869
944
}
870
945
}
946
+ return nil
871
947
}
872
948
873
949
// Returns TRUE if the node has the node condition, FALSE otherwise
0 commit comments