Skip to content

Commit 825eb77

Browse files
authored
Merge pull request kubernetes#83906 from mgugino-upstream-stage/pdb-exclude-pending
Allow deletion of pending pods when using PDBS
2 parents 081f97a + 047b0ce commit 825eb77

File tree

3 files changed

+350
-26
lines changed

3 files changed

+350
-26
lines changed

pkg/registry/core/pod/storage/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ go_test(
2222
"//staging/src/k8s.io/api/policy/v1beta1:go_default_library",
2323
"//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library",
2424
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
25+
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/internalversion:go_default_library",
2526
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
2627
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
2728
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
2829
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
30+
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
2931
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
32+
"//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
3033
"//staging/src/k8s.io/apiserver/pkg/endpoints/request:go_default_library",
3134
"//staging/src/k8s.io/apiserver/pkg/registry/generic:go_default_library",
3235
"//staging/src/k8s.io/apiserver/pkg/registry/generic/registry:go_default_library",

pkg/registry/core/pod/storage/eviction.go

Lines changed: 82 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"k8s.io/apimachinery/pkg/runtime"
3030
"k8s.io/apimachinery/pkg/runtime/schema"
3131
"k8s.io/apimachinery/pkg/util/wait"
32-
genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
3332
"k8s.io/apiserver/pkg/registry/rest"
3433
"k8s.io/apiserver/pkg/util/dryrun"
3534
policyclient "k8s.io/client-go/kubernetes/typed/policy/v1beta1"
@@ -57,13 +56,13 @@ var EvictionsRetry = wait.Backoff{
5756
Jitter: 0.1,
5857
}
5958

60-
func newEvictionStorage(store *genericregistry.Store, podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter) *EvictionREST {
59+
func newEvictionStorage(store rest.StandardStorage, podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter) *EvictionREST {
6160
return &EvictionREST{store: store, podDisruptionBudgetClient: podDisruptionBudgetClient}
6261
}
6362

6463
// EvictionREST implements the REST endpoint for evicting pods from nodes
6564
type EvictionREST struct {
66-
store *genericregistry.Store
65+
store rest.StandardStorage
6766
podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter
6867
}
6968

@@ -111,33 +110,75 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
111110
return nil, errors.NewBadRequest("name in URL does not match name in Eviction object")
112111
}
113112

114-
deletionOptions, err := propagateDryRun(eviction, options)
113+
originalDeleteOptions, err := propagateDryRun(eviction, options)
115114
if err != nil {
116115
return nil, err
117116
}
118117

119-
obj, err = r.store.Get(ctx, eviction.Name, &metav1.GetOptions{})
120-
if err != nil {
121-
return nil, err
122-
}
123-
pod := obj.(*api.Pod)
124-
125118
if createValidation != nil {
126119
if err := createValidation(ctx, eviction.DeepCopyObject()); err != nil {
127120
return nil, err
128121
}
129122
}
130123

131-
// Evicting a terminal pod should result in direct deletion of pod as it already caused disruption by the time we are evicting.
132-
// There is no need to check for pdb.
133-
if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed {
124+
var pod *api.Pod
125+
deletedPod := false
126+
// by default, retry conflict errors
127+
shouldRetry := errors.IsConflict
128+
if !resourceVersionIsUnset(originalDeleteOptions) {
129+
// if the original options included a resourceVersion precondition, don't retry
130+
shouldRetry = func(err error) bool { return false }
131+
}
132+
133+
err = retry.OnError(EvictionsRetry, shouldRetry, func() error {
134+
obj, err = r.store.Get(ctx, eviction.Name, &metav1.GetOptions{})
135+
if err != nil {
136+
return err
137+
}
138+
pod = obj.(*api.Pod)
139+
140+
// Evicting a terminal pod should result in direct deletion of pod as it already caused disruption by the time we are evicting.
141+
// There is no need to check for pdb.
142+
if !canIgnorePDB(pod) {
143+
// Pod is not in a state where we can skip checking PDBs, exit the loop, and continue to PDB checks.
144+
return nil
145+
}
146+
147+
// the PDB can be ignored, so delete the pod
148+
deletionOptions := originalDeleteOptions.DeepCopy()
149+
// We should check if resourceVersion is already set by the requestor
150+
// as it might be older than the pod we just fetched and should be
151+
// honored.
152+
if shouldEnforceResourceVersion(pod) && resourceVersionIsUnset(originalDeleteOptions) {
153+
// Set deletionOptions.Preconditions.ResourceVersion to ensure we're not
154+
// racing with another PDB-impacting process elsewhere.
155+
if deletionOptions.Preconditions == nil {
156+
deletionOptions.Preconditions = &metav1.Preconditions{}
157+
}
158+
deletionOptions.Preconditions.ResourceVersion = &pod.ResourceVersion
159+
}
134160
_, _, err = r.store.Delete(ctx, eviction.Name, rest.ValidateAllObjectFunc, deletionOptions)
135161
if err != nil {
136-
return nil, err
162+
return err
137163
}
138-
return &metav1.Status{
139-
Status: metav1.StatusSuccess}, nil
164+
deletedPod = true
165+
return nil
166+
})
167+
switch {
168+
case err != nil:
169+
// this can happen in cases where the PDB can be ignored, but there was a problem issuing the pod delete:
170+
// maybe we conflicted too many times or we didn't have permission or something else weird.
171+
return nil, err
172+
173+
case deletedPod:
174+
// this happens when we successfully deleted the pod. In this case, we're done executing because we've evicted/deleted the pod
175+
return &metav1.Status{Status: metav1.StatusSuccess}, nil
176+
177+
default:
178+
// this happens when we didn't have an error and we didn't delete the pod. The only branch that happens on is when
179+
// we cannot ignored the PDB for this pod, so this is the fall through case.
140180
}
181+
141182
var rtStatus *metav1.Status
142183
var pdbName string
143184
err = func() error {
@@ -172,7 +213,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
172213

173214
// If it was false already, or if it becomes false during the course of our retries,
174215
// raise an error marked as a 429.
175-
if err = r.checkAndDecrement(pod.Namespace, pod.Name, *pdb, dryrun.IsDryRun(deletionOptions.DryRun)); err != nil {
216+
if err = r.checkAndDecrement(pod.Namespace, pod.Name, *pdb, dryrun.IsDryRun(originalDeleteOptions.DryRun)); err != nil {
176217
refresh = true
177218
return err
178219
}
@@ -194,7 +235,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
194235
// At this point there was either no PDB or we succeeded in decrementing
195236

196237
// Try the delete
197-
_, _, err = r.store.Delete(ctx, eviction.Name, rest.ValidateAllObjectFunc, deletionOptions)
238+
_, _, err = r.store.Delete(ctx, eviction.Name, rest.ValidateAllObjectFunc, originalDeleteOptions.DeepCopy())
198239
if err != nil {
199240
return nil, err
200241
}
@@ -203,6 +244,29 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
203244
return &metav1.Status{Status: metav1.StatusSuccess}, nil
204245
}
205246

247+
// canIgnorePDB returns true for pod conditions that allow the pod to be deleted
248+
// without checking PDBs.
249+
func canIgnorePDB(pod *api.Pod) bool {
250+
if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed || pod.Status.Phase == api.PodPending {
251+
return true
252+
}
253+
return false
254+
}
255+
256+
func shouldEnforceResourceVersion(pod *api.Pod) bool {
257+
// We don't need to enforce ResourceVersion for terminal pods
258+
if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed {
259+
return false
260+
}
261+
// Return true for all other pods to ensure we don't race against a pod becoming
262+
// ready and violating PDBs.
263+
return true
264+
}
265+
266+
func resourceVersionIsUnset(options *metav1.DeleteOptions) bool {
267+
return options.Preconditions == nil || options.Preconditions.ResourceVersion == nil
268+
}
269+
206270
// checkAndDecrement checks if the provided PodDisruptionBudget allows any disruption.
207271
func (r *EvictionREST) checkAndDecrement(namespace string, podName string, pdb policyv1beta1.PodDisruptionBudget, dryRun bool) error {
208272
if pdb.Status.ObservedGeneration < pdb.Generation {

0 commit comments

Comments
 (0)