Skip to content

Commit 4f979c9

Browse files
authored
Merge pull request kubernetes#129010 from ffromani/e2e-fix-device-plugin-reboot-test
node: e2e: fix device plugin reboot test
2 parents 6b7b8e8 + 29d2629 commit 4f979c9

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

test/e2e_node/device_plugin_test.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
933933
// simulate node reboot scenario by removing pods using CRI before kubelet is started. In addition to that,
934934
// intentionally a scenario is created where after node reboot, application pods requesting devices appear before the device plugin pod
935935
// exposing those devices as resource has restarted. The expected behavior is that the application pod fails at admission time.
936-
framework.It("Keeps device plugin assignments across node reboots (no pod restart, no device plugin re-registration)", framework.WithFlaky(), func(ctx context.Context) {
936+
framework.It("Does not keep device plugin assignments across node reboots if fails admission (no pod restart, no device plugin re-registration)", framework.WithFlaky(), func(ctx context.Context) {
937937
podRECMD := fmt.Sprintf("devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s", sleepIntervalForever)
938938
pod1 := e2epod.NewPodClient(f).CreateSync(ctx, makeBusyboxPod(SampleDeviceResourceName, podRECMD))
939939
deviceIDRE := "stub devices: (Dev-[0-9]+)"
@@ -984,9 +984,17 @@ func testDevicePluginNodeReboot(f *framework.Framework, pluginSockDir string) {
984984
return err
985985
}, 30*time.Second, framework.Poll).ShouldNot(gomega.HaveOccurred(), "cannot fetch the compute resource assignment after kubelet restart")
986986

987-
err, _ = checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{devID1})
988-
framework.ExpectNoError(err, "inconsistent device assignment after node reboot")
989-
987+
// if we got this far, podresources API will now report 2 entries:
988+
// - sample device plugin pod, running and doing fine
989+
// - our test pod, in failed state. Pods in terminal state will still be reported, see https://github.com/kubernetes/kubernetes/issues/119423
990+
// so we care about our test pod, and it will be present in the returned list till 119423 is fixed, but since it failed admission it must not have
991+
// any device allocated to it, hence we check for empty device set in the podresources response. So, we check that
992+
// A. our test pod must be present in the list response *and*
993+
// B. it has no devices assigned to it.
994+
// anything else is unexpected and thus makes the test fail. Once 119423 is fixed, a better, simpler and more intuitive check will be for the
995+
// test pod to not be present in the podresources list response, but till that time we're stuck with this approach.
996+
_, found := checkPodResourcesAssignment(v1PodResources, pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name, SampleDeviceResourceName, []string{})
997+
gomega.Expect(found).To(gomega.BeTrueBecause("%s/%s/%s failed admission, should not have devices registered", pod1.Namespace, pod1.Name, pod1.Spec.Containers[0].Name))
990998
})
991999
})
9921000
}

0 commit comments

Comments
 (0)