@@ -29,7 +29,6 @@ import (
29
29
"k8s.io/apimachinery/pkg/runtime"
30
30
"k8s.io/apimachinery/pkg/runtime/schema"
31
31
"k8s.io/apimachinery/pkg/util/wait"
32
- genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
33
32
"k8s.io/apiserver/pkg/registry/rest"
34
33
"k8s.io/apiserver/pkg/util/dryrun"
35
34
policyclient "k8s.io/client-go/kubernetes/typed/policy/v1beta1"
@@ -57,13 +56,13 @@ var EvictionsRetry = wait.Backoff{
57
56
Jitter : 0.1 ,
58
57
}
59
58
60
- func newEvictionStorage (store * genericregistry. Store , podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter ) * EvictionREST {
59
+ func newEvictionStorage (store rest. StandardStorage , podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter ) * EvictionREST {
61
60
return & EvictionREST {store : store , podDisruptionBudgetClient : podDisruptionBudgetClient }
62
61
}
63
62
64
63
// EvictionREST implements the REST endpoint for evicting pods from nodes
65
64
type EvictionREST struct {
66
- store * genericregistry. Store
65
+ store rest. StandardStorage
67
66
podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter
68
67
}
69
68
@@ -111,33 +110,75 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
111
110
return nil , errors .NewBadRequest ("name in URL does not match name in Eviction object" )
112
111
}
113
112
114
- deletionOptions , err := propagateDryRun (eviction , options )
113
+ originalDeleteOptions , err := propagateDryRun (eviction , options )
115
114
if err != nil {
116
115
return nil , err
117
116
}
118
117
119
- obj , err = r .store .Get (ctx , eviction .Name , & metav1.GetOptions {})
120
- if err != nil {
121
- return nil , err
122
- }
123
- pod := obj .(* api.Pod )
124
-
125
118
if createValidation != nil {
126
119
if err := createValidation (ctx , eviction .DeepCopyObject ()); err != nil {
127
120
return nil , err
128
121
}
129
122
}
130
123
131
- // Evicting a terminal pod should result in direct deletion of pod as it already caused disruption by the time we are evicting.
132
- // There is no need to check for pdb.
133
- if pod .Status .Phase == api .PodSucceeded || pod .Status .Phase == api .PodFailed {
124
+ var pod * api.Pod
125
+ deletedPod := false
126
+ // by default, retry conflict errors
127
+ shouldRetry := errors .IsConflict
128
+ if ! resourceVersionIsUnset (originalDeleteOptions ) {
129
+ // if the original options included a resourceVersion precondition, don't retry
130
+ shouldRetry = func (err error ) bool { return false }
131
+ }
132
+
133
+ err = retry .OnError (EvictionsRetry , shouldRetry , func () error {
134
+ obj , err = r .store .Get (ctx , eviction .Name , & metav1.GetOptions {})
135
+ if err != nil {
136
+ return err
137
+ }
138
+ pod = obj .(* api.Pod )
139
+
140
+ // Evicting a terminal pod should result in direct deletion of pod as it already caused disruption by the time we are evicting.
141
+ // There is no need to check for pdb.
142
+ if ! canIgnorePDB (pod ) {
143
+ // Pod is not in a state where we can skip checking PDBs, exit the loop, and continue to PDB checks.
144
+ return nil
145
+ }
146
+
147
+ // the PDB can be ignored, so delete the pod
148
+ deletionOptions := originalDeleteOptions .DeepCopy ()
149
+ // We should check if resourceVersion is already set by the requestor
150
+ // as it might be older than the pod we just fetched and should be
151
+ // honored.
152
+ if shouldEnforceResourceVersion (pod ) && resourceVersionIsUnset (originalDeleteOptions ) {
153
+ // Set deletionOptions.Preconditions.ResourceVersion to ensure we're not
154
+ // racing with another PDB-impacting process elsewhere.
155
+ if deletionOptions .Preconditions == nil {
156
+ deletionOptions .Preconditions = & metav1.Preconditions {}
157
+ }
158
+ deletionOptions .Preconditions .ResourceVersion = & pod .ResourceVersion
159
+ }
134
160
_ , _ , err = r .store .Delete (ctx , eviction .Name , rest .ValidateAllObjectFunc , deletionOptions )
135
161
if err != nil {
136
- return nil , err
162
+ return err
137
163
}
138
- return & metav1.Status {
139
- Status : metav1 .StatusSuccess }, nil
164
+ deletedPod = true
165
+ return nil
166
+ })
167
+ switch {
168
+ case err != nil :
169
+ // this can happen in cases where the PDB can be ignored, but there was a problem issuing the pod delete:
170
+ // maybe we conflicted too many times or we didn't have permission or something else weird.
171
+ return nil , err
172
+
173
+ case deletedPod :
174
+ // this happens when we successfully deleted the pod. In this case, we're done executing because we've evicted/deleted the pod
175
+ return & metav1.Status {Status : metav1 .StatusSuccess }, nil
176
+
177
+ default :
178
+ // this happens when we didn't have an error and we didn't delete the pod. The only branch that happens on is when
179
+ // we cannot ignored the PDB for this pod, so this is the fall through case.
140
180
}
181
+
141
182
var rtStatus * metav1.Status
142
183
var pdbName string
143
184
err = func () error {
@@ -172,7 +213,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
172
213
173
214
// If it was false already, or if it becomes false during the course of our retries,
174
215
// raise an error marked as a 429.
175
- if err = r .checkAndDecrement (pod .Namespace , pod .Name , * pdb , dryrun .IsDryRun (deletionOptions .DryRun )); err != nil {
216
+ if err = r .checkAndDecrement (pod .Namespace , pod .Name , * pdb , dryrun .IsDryRun (originalDeleteOptions .DryRun )); err != nil {
176
217
refresh = true
177
218
return err
178
219
}
@@ -194,7 +235,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
194
235
// At this point there was either no PDB or we succeeded in decrementing
195
236
196
237
// Try the delete
197
- _ , _ , err = r .store .Delete (ctx , eviction .Name , rest .ValidateAllObjectFunc , deletionOptions )
238
+ _ , _ , err = r .store .Delete (ctx , eviction .Name , rest .ValidateAllObjectFunc , originalDeleteOptions . DeepCopy () )
198
239
if err != nil {
199
240
return nil , err
200
241
}
@@ -203,6 +244,29 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
203
244
return & metav1.Status {Status : metav1 .StatusSuccess }, nil
204
245
}
205
246
247
+ // canIgnorePDB returns true for pod conditions that allow the pod to be deleted
248
+ // without checking PDBs.
249
+ func canIgnorePDB (pod * api.Pod ) bool {
250
+ if pod .Status .Phase == api .PodSucceeded || pod .Status .Phase == api .PodFailed || pod .Status .Phase == api .PodPending {
251
+ return true
252
+ }
253
+ return false
254
+ }
255
+
256
+ func shouldEnforceResourceVersion (pod * api.Pod ) bool {
257
+ // We don't need to enforce ResourceVersion for terminal pods
258
+ if pod .Status .Phase == api .PodSucceeded || pod .Status .Phase == api .PodFailed {
259
+ return false
260
+ }
261
+ // Return true for all other pods to ensure we don't race against a pod becoming
262
+ // ready and violating PDBs.
263
+ return true
264
+ }
265
+
266
+ func resourceVersionIsUnset (options * metav1.DeleteOptions ) bool {
267
+ return options .Preconditions == nil || options .Preconditions .ResourceVersion == nil
268
+ }
269
+
206
270
// checkAndDecrement checks if the provided PodDisruptionBudget allows any disruption.
207
271
func (r * EvictionREST ) checkAndDecrement (namespace string , podName string , pdb policyv1beta1.PodDisruptionBudget , dryRun bool ) error {
208
272
if pdb .Status .ObservedGeneration < pdb .Generation {
0 commit comments