Skip to content

Commit 56adcd0

Browse files
committed
DRA device eviction: fix eviction triggered by pod scheduling
Normally the scheduler shouldn't schedule when there is a taint, but perhaps it didn't know yet. The TestEviction/update test covered this, but only failed under the right timing conditions. The new event handler test case covers it reliably.
1 parent 5856d3e commit 56adcd0

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

pkg/controller/devicetainteviction/device_taint_eviction.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,12 +794,13 @@ func (tc *Controller) handlePodChange(oldPod, newPod *v1.Pod) {
794794

795795
// Pods get updated quite frequently. There's no need
796796
// to check them again unless something changed regarding
797-
// their claims.
797+
// their claims or they got scheduled.
798798
//
799799
// In particular this prevents adding the pod again
800800
// directly after the eviction condition got added
801801
// to it.
802802
if oldPod != nil &&
803+
oldPod.Spec.NodeName == newPod.Spec.NodeName &&
803804
apiequality.Semantic.DeepEqual(oldPod.Status.ResourceClaimStatuses, newPod.Status.ResourceClaimStatuses) {
804805
return
805806
}

pkg/controller/devicetainteviction/device_taint_eviction_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,10 @@ var (
318318
OwnerReference(podName, podUID+"-other", podKind).
319319
UID("other").
320320
Obj()
321+
unscheduledPodWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
322+
UID(podUID).
323+
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
324+
Obj()
321325
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
322326
UID(podUID).
323327
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
@@ -494,6 +498,23 @@ func TestHandlers(t *testing.T) {
494498
// At the moment, the code reliably cancels right away.
495499
wantEvents: []*v1.Event{cancelPodEviction},
496500
},
501+
"evict-pod-after-scheduling": {
502+
initialState: state{
503+
pods: []*v1.Pod{unscheduledPodWithClaimName},
504+
slices: []*resourceapi.ResourceSlice{sliceTainted, slice2},
505+
allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}},
506+
},
507+
events: []any{
508+
// Normally the scheduler shouldn't schedule when there is a taint,
509+
// but perhaps it didn't know yet.
510+
update(unscheduledPodWithClaimName, podWithClaimName),
511+
},
512+
finalState: state{
513+
slices: []*resourceapi.ResourceSlice{sliceTainted, slice2},
514+
allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}},
515+
evicting: []evictAt{{newObject(podWithClaimName), taintTime.Time}},
516+
},
517+
},
497518
"evict-pod-resourceclaim-unrelated-changes": {
498519
initialState: state{
499520
pods: []*v1.Pod{podWithClaimName},

0 commit comments

Comments
 (0)