Skip to content

Commit 047b0ce

Browse files
committed
Quit retrying early with user supplied resourceVersion
This commit also updates tests and comments.
1 parent 4522141 commit 047b0ce

File tree

2 files changed

+21
-10
lines changed

2 files changed

+21
-10
lines changed

pkg/registry/core/pod/storage/eviction.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,12 @@ const (
4545
// This situation should self-correct because the PDB controller removes
4646
// entries from the map automatically after the PDB DeletionTimeout regardless.
4747
MaxDisruptedPodSize = 2000
48-
retrySteps = 20
4948
)
5049

5150
// EvictionsRetry is the retry for a conflict where multiple clients
5251
// are making changes to the same resource.
5352
var EvictionsRetry = wait.Backoff{
54-
Steps: retrySteps,
53+
Steps: 20,
5554
Duration: 500 * time.Millisecond,
5655
Factor: 1.0,
5756
Jitter: 0.1,
@@ -124,7 +123,14 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
124123

125124
var pod *api.Pod
126125
deletedPod := false
127-
err = retry.RetryOnConflict(EvictionsRetry, func() error {
126+
// by default, retry conflict errors
127+
shouldRetry := errors.IsConflict
128+
if !resourceVersionIsUnset(originalDeleteOptions) {
129+
// if the original options included a resourceVersion precondition, don't retry
130+
shouldRetry = func(err error) bool { return false }
131+
}
132+
133+
err = retry.OnError(EvictionsRetry, shouldRetry, func() error {
128134
obj, err = r.store.Get(ctx, eviction.Name, &metav1.GetOptions{})
129135
if err != nil {
130136
return err
@@ -140,6 +146,9 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
140146

141147
// the PDB can be ignored, so delete the pod
142148
deletionOptions := originalDeleteOptions.DeepCopy()
149+
// We should check if resourceVersion is already set by the requestor
150+
// as it might be older than the pod we just fetched and should be
151+
// honored.
143152
if shouldEnforceResourceVersion(pod) && resourceVersionIsUnset(originalDeleteOptions) {
144153
// Set deletionOptions.Preconditions.ResourceVersion to ensure we're not
145154
// racing with another PDB-impacting process elsewhere.
@@ -158,7 +167,7 @@ func (r *EvictionREST) Create(ctx context.Context, name string, obj runtime.Obje
158167
switch {
159168
case err != nil:
160169
// this can happen in cases where the PDB can be ignored, but there was a problem issuing the pod delete:
161-
// maybe we conflicted two many times or we didn't have permission or something else weird.
170+
// maybe we conflicted too many times or we didn't have permission or something else weird.
162171
return nil, err
163172

164173
case deletedPod:
@@ -245,11 +254,13 @@ func canIgnorePDB(pod *api.Pod) bool {
245254
}
246255

247256
func shouldEnforceResourceVersion(pod *api.Pod) bool {
248-
// Only pods that may be included as health in PDBs in the future need to be checked.
249-
if pod.Status.Phase == api.PodPending {
250-
return true
257+
// We don't need to enforce ResourceVersion for terminal pods
258+
if pod.Status.Phase == api.PodSucceeded || pod.Status.Phase == api.PodFailed {
259+
return false
251260
}
252-
return false
261+
// Return true for all other pods to ensure we don't race against a pod becoming
262+
// ready and violating PDBs.
263+
return true
253264
}
254265

255266
func resourceVersionIsUnset(options *metav1.DeleteOptions) bool {

pkg/registry/core/pod/storage/eviction_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ func TestEvictionIngorePDB(t *testing.T) {
272272
expectError: true,
273273
podPhase: api.PodPending,
274274
podName: "t4",
275-
expectedDeleteCount: retrySteps,
275+
expectedDeleteCount: EvictionsRetry.Steps,
276276
},
277277
{
278278
name: "pod pending, always conflict on delete, user provided ResourceVersion constraint",
@@ -285,7 +285,7 @@ func TestEvictionIngorePDB(t *testing.T) {
285285
expectError: true,
286286
podPhase: api.PodPending,
287287
podName: "t5",
288-
expectedDeleteCount: retrySteps,
288+
expectedDeleteCount: 1,
289289
},
290290
}
291291

0 commit comments

Comments
 (0)