Skip to content

Commit d6bb550

Browse files
authored
Merge pull request kubernetes#122890 from HirazawaUi/fix-pod-grace-period
[kubelet]: Fix the bug where pod grace period will be overwritten
2 parents 211d67a + 49058ee commit d6bb550

File tree

7 files changed

+88
-4
lines changed

7 files changed

+88
-4
lines changed

pkg/features/kube_features.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ const (
4545
// with DNS names.
4646
AllowDNSOnlyNodeCSR featuregate.Feature = "AllowDNSOnlyNodeCSR"
4747

48+
// owner: @HirazawaUi
49+
// Deprecated: v1.32
50+
//
51+
// Allow spec.terminationGracePeriodSeconds to be overridden by MaxPodGracePeriodSeconds in soft evictions.
52+
AllowOverwriteTerminationGracePeriodSeconds featuregate.Feature = "AllowOverwriteTerminationGracePeriodSeconds"
53+
4854
// owner: @bswartz
4955
// alpha: v1.18
5056
// beta: v1.24

pkg/features/versioned_kube_features.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ import (
3333
//
3434
// Entries are alphabetized.
3535
var defaultVersionedKubernetesFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{
36+
AllowOverwriteTerminationGracePeriodSeconds: {
37+
{Version: version.MustParse("1.32"), Default: false, PreRelease: featuregate.Deprecated},
38+
},
3639
AnyVolumeDataSource: {
3740
{Version: version.MustParse("1.18"), Default: false, PreRelease: featuregate.Alpha},
3841
{Version: version.MustParse("1.24"), Default: true, PreRelease: featuregate.Beta},

pkg/generated/openapi/zz_generated.openapi.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/kubelet/eviction/eviction_manager.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,11 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
411411
gracePeriodOverride := int64(immediateEvictionGracePeriodSeconds)
412412
if !isHardEvictionThreshold(thresholdToReclaim) {
413413
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
414+
if pod.Spec.TerminationGracePeriodSeconds != nil && !utilfeature.DefaultFeatureGate.Enabled(features.AllowOverwriteTerminationGracePeriodSeconds) {
415+
gracePeriodOverride = min(m.config.MaxPodGracePeriodSeconds, *pod.Spec.TerminationGracePeriodSeconds)
416+
}
414417
}
418+
415419
message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc, thresholds, observations)
416420
condition := &v1.PodCondition{
417421
Type: v1.DisruptionTarget,

staging/src/k8s.io/kubelet/config/v1beta1/types.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,9 +527,6 @@ type KubeletConfiguration struct {
527527
// evictionMaxPodGracePeriod is the maximum allowed grace period (in seconds) to use
528528
// when terminating pods in response to a soft eviction threshold being met. This value
529529
// effectively caps the Pod's terminationGracePeriodSeconds value during soft evictions.
530-
// Note: Due to issue #64530, the behavior has a bug where this value currently just
531-
// overrides the grace period during soft eviction, which can increase the grace
532-
// period from what is set on the Pod. This bug will be fixed in a future release.
533530
// Default: 0
534531
// +optional
535532
EvictionMaxPodGracePeriod int32 `json:"evictionMaxPodGracePeriod,omitempty"`

test/e2e_node/eviction_test.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,44 @@ var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework.
248248
})
249249
})
250250

251+
var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), nodefeature.Eviction, func() {
252+
f := framework.NewDefaultFramework("localstorage-eviction-test")
253+
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
254+
pressureTimeout := 10 * time.Minute
255+
expectedNodeCondition := v1.NodeDiskPressure
256+
expectedStarvedResource := v1.ResourceEphemeralStorage
257+
258+
evictionMaxPodGracePeriod := 30
259+
evictionSoftGracePeriod := 30
260+
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
261+
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
262+
diskConsumed := resource.MustParse("4Gi")
263+
summary := eventuallyGetSummary(ctx)
264+
availableBytes := *(summary.Node.Fs.AvailableBytes)
265+
if availableBytes <= uint64(diskConsumed.Value()) {
266+
e2eskipper.Skipf("Too little disk free on the host for the LocalStorageSoftEviction test to run")
267+
}
268+
initialConfig.EvictionSoft = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
269+
initialConfig.EvictionSoftGracePeriod = map[string]string{string(evictionapi.SignalNodeFsAvailable): "30s"}
270+
// Defer to the pod default grace period
271+
initialConfig.EvictionMaxPodGracePeriod = int32(evictionMaxPodGracePeriod)
272+
initialConfig.EvictionMinimumReclaim = map[string]string{}
273+
// Ensure that pods are not evicted because of the eviction-hard threshold
274+
// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
275+
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalMemoryAvailable): "0%"}
276+
})
277+
278+
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
279+
{
280+
evictionMaxPodGracePeriod: evictionSoftGracePeriod,
281+
evictionSoftGracePeriod: evictionMaxPodGracePeriod,
282+
evictionPriority: 1,
283+
pod: diskConsumingPod("container-disk-hog", lotsOfDisk, nil, v1.ResourceRequirements{}),
284+
},
285+
})
286+
})
287+
})
288+
251289
// This test validates that in-memory EmptyDir's are evicted when the Kubelet does
252290
// not have Sized Memory Volumes enabled. When Sized volumes are enabled, it's
253291
// not possible to exhaust the quota.
@@ -551,6 +589,9 @@ type podEvictSpec struct {
551589
evictionPriority int
552590
pod *v1.Pod
553591
wantPodDisruptionCondition *v1.PodConditionType
592+
593+
evictionMaxPodGracePeriod int
594+
evictionSoftGracePeriod int
554595
}
555596

556597
// runEvictionTest sets up a testing environment given the provided pods, and checks a few things:
@@ -589,16 +630,21 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
589630
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
590631

591632
ginkgo.By("Waiting for evictions to occur")
633+
nodeUnreadyTime := time.Now()
634+
592635
gomega.Eventually(ctx, func(ctx context.Context) error {
593636
if expectedNodeCondition != noPressure {
594637
if hasNodeCondition(ctx, f, expectedNodeCondition) {
595638
framework.Logf("Node has %s", expectedNodeCondition)
596639
} else {
597640
framework.Logf("Node does NOT have %s", expectedNodeCondition)
641+
nodeUnreadyTime = time.Now()
598642
}
599643
}
600644
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
601645
logFunc(ctx)
646+
647+
verifyEvictionPeriod(ctx, f, testSpecs, nodeUnreadyTime)
602648
return verifyEvictionOrdering(ctx, f, testSpecs)
603649
}, pressureTimeout, evictionPollInterval).Should(gomega.Succeed())
604650

@@ -770,6 +816,28 @@ func verifyEvictionOrdering(ctx context.Context, f *framework.Framework, testSpe
770816
return fmt.Errorf("pods that should be evicted are still running: %#v", pendingPods)
771817
}
772818

819+
func verifyEvictionPeriod(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec, nodeUnreadyTime time.Time) {
820+
for i, spec := range testSpecs {
821+
if spec.evictionMaxPodGracePeriod == 0 && spec.evictionSoftGracePeriod == 0 {
822+
continue
823+
}
824+
softEvictionPeriod := spec.evictionMaxPodGracePeriod + spec.evictionSoftGracePeriod
825+
826+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, spec.pod.Name, metav1.GetOptions{})
827+
framework.ExpectNoError(err, "Failed to get the recent pod object for name: %q", pod.Name)
828+
829+
minSoftEvictionPeriod := min(float64(softEvictionPeriod), float64(*spec.pod.Spec.TerminationGracePeriodSeconds+int64(spec.evictionSoftGracePeriod)))
830+
if pod.Status.Phase == v1.PodFailed {
831+
if time.Since(nodeUnreadyTime).Seconds() > minSoftEvictionPeriod+15 {
832+
framework.Failf("pod %s should be evicted within %f seconds, but it has not been evicted for %f seconds.", pod.Name, minSoftEvictionPeriod, time.Since(nodeUnreadyTime).Seconds())
833+
} else {
834+
testSpecs[i].evictionMaxPodGracePeriod = 0
835+
testSpecs[i].evictionSoftGracePeriod = 0
836+
}
837+
}
838+
}
839+
}
840+
773841
func verifyPodConditions(ctx context.Context, f *framework.Framework, testSpecs []podEvictSpec) {
774842
for _, spec := range testSpecs {
775843
if spec.wantPodDisruptionCondition != nil {

test/featuregates_linter/test_data/versioned_feature_list.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
- name: AllowOverwriteTerminationGracePeriodSeconds
2+
versionedSpecs:
3+
- default: false
4+
lockToDefault: false
5+
preRelease: Deprecated
6+
version: "1.32"
17
- name: AnonymousAuthConfigurableEndpoints
28
versionedSpecs:
39
- default: false

0 commit comments

Comments
 (0)