@@ -29,7 +29,6 @@ import (
29
29
"k8s.io/apimachinery/pkg/runtime"
30
30
"k8s.io/apimachinery/pkg/runtime/schema"
31
31
"k8s.io/apimachinery/pkg/util/wait"
32
- genericregistry "k8s.io/apiserver/pkg/registry/generic/registry"
33
32
"k8s.io/apiserver/pkg/registry/rest"
34
33
"k8s.io/apiserver/pkg/util/dryrun"
35
34
policyclient "k8s.io/client-go/kubernetes/typed/policy/v1beta1"
@@ -46,24 +45,25 @@ const (
46
45
// This situation should self-correct because the PDB controller removes
47
46
// entries from the map automatically after the PDB DeletionTimeout regardless.
48
47
MaxDisruptedPodSize = 2000
48
+ retrySteps = 20
49
49
)
50
50
51
51
// EvictionsRetry is the retry for a conflict where multiple clients
52
52
// are making changes to the same resource.
53
53
var EvictionsRetry = wait.Backoff {
54
- Steps : 20 ,
54
+ Steps : retrySteps ,
55
55
Duration : 500 * time .Millisecond ,
56
56
Factor : 1.0 ,
57
57
Jitter : 0.1 ,
58
58
}
59
59
60
- func newEvictionStorage (store * genericregistry. Store , podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter ) * EvictionREST {
60
+ func newEvictionStorage (store rest. StandardStorage , podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter ) * EvictionREST {
61
61
return & EvictionREST {store : store , podDisruptionBudgetClient : podDisruptionBudgetClient }
62
62
}
63
63
64
64
// EvictionREST implements the REST endpoint for evicting pods from nodes
65
65
type EvictionREST struct {
66
- store * genericregistry. Store
66
+ store rest. StandardStorage
67
67
podDisruptionBudgetClient policyclient.PodDisruptionBudgetsGetter
68
68
}
69
69
@@ -130,13 +130,55 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
130
130
131
131
// Evicting a terminal pod should result in direct deletion of pod as it already caused disruption by the time we are evicting.
132
132
// There is no need to check for pdb.
133
- if pod .Status .Phase == api .PodSucceeded || pod .Status .Phase == api .PodFailed {
134
- _ , _ , err = r .store .Delete (ctx , eviction .Name , rest .ValidateAllObjectFunc , deletionOptions )
135
- if err != nil {
136
- return nil , err
133
+ if canIgnorePDB (pod ) {
134
+ deleteRefresh := false
135
+ continueToPDBs := false
136
+ // Preserve current deletionOptions if we need to fall through to checking PDBs later.
137
+ preservedDeletionOptions := deletionOptions .DeepCopy ()
138
+ err := retry .RetryOnConflict (EvictionsRetry , func () error {
139
+ if deleteRefresh {
140
+ // If we hit a conflict error, get the latest pod
141
+ obj , err = r .store .Get (ctx , eviction .Name , & metav1.GetOptions {})
142
+ if err != nil {
143
+ return err
144
+ }
145
+ pod = obj .(* api.Pod )
146
+ if ! canIgnorePDB (pod ) {
147
+ // Pod is no longer in a state where we can skip checking
148
+ // PDBs, continue to PDB checks.
149
+ continueToPDBs = true
150
+ // restore original deletion options because we may have
151
+ // modified them.
152
+ deletionOptions = preservedDeletionOptions
153
+ return nil
154
+ }
155
+ }
156
+ if shouldEnforceResourceVersion (pod ) && resourceVersionIsUnset (preservedDeletionOptions ) {
157
+ // Set deletionOptions.Preconditions.ResourceVersion to ensure we're not
158
+ // racing with another PDB-impacting process elsewhere.
159
+ if deletionOptions .Preconditions == nil {
160
+ deletionOptions .Preconditions = & metav1.Preconditions {}
161
+ }
162
+ deletionOptions .Preconditions .ResourceVersion = & pod .ResourceVersion
163
+ } else {
164
+ // restore original deletion options because we may have
165
+ // modified them.
166
+ deletionOptions = preservedDeletionOptions
167
+ }
168
+ _ , _ , err = r .store .Delete (ctx , eviction .Name , rest .ValidateAllObjectFunc , deletionOptions )
169
+ if err != nil {
170
+ deleteRefresh = true
171
+ return err
172
+ }
173
+ return nil
174
+ })
175
+ if ! continueToPDBs {
176
+ if err != nil {
177
+ return nil , err
178
+ }
179
+ return & metav1.Status {
180
+ Status : metav1 .StatusSuccess }, nil
137
181
}
138
- return & metav1.Status {
139
- Status : metav1 .StatusSuccess }, nil
140
182
}
141
183
var rtStatus * metav1.Status
142
184
var pdbName string
@@ -203,6 +245,27 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
203
245
return & metav1.Status {Status : metav1 .StatusSuccess }, nil
204
246
}
205
247
248
+ // canIgnorePDB returns true for pod conditions that allow the pod to be deleted
249
+ // without checking PDBs.
250
+ func canIgnorePDB (pod * api.Pod ) bool {
251
+ if pod .Status .Phase == api .PodSucceeded || pod .Status .Phase == api .PodFailed || pod .Status .Phase == api .PodPending {
252
+ return true
253
+ }
254
+ return false
255
+ }
256
+
257
+ func shouldEnforceResourceVersion (pod * api.Pod ) bool {
258
+ // Only pods that may be included as health in PDBs in the future need to be checked.
259
+ if pod .Status .Phase == api .PodPending {
260
+ return true
261
+ }
262
+ return false
263
+ }
264
+
265
+ func resourceVersionIsUnset (options * metav1.DeleteOptions ) bool {
266
+ return options .Preconditions == nil || options .Preconditions .ResourceVersion == nil
267
+ }
268
+
206
269
// checkAndDecrement checks if the provided PodDisruptionBudget allows any disruption.
207
270
func (r * EvictionREST ) checkAndDecrement (namespace string , podName string , pdb policyv1beta1.PodDisruptionBudget , dryRun bool ) error {
208
271
if pdb .Status .ObservedGeneration < pdb .Generation {
0 commit comments