Skip to content

Commit 447c74c

Browse files
committed
DRA E2E: fix race between container start and check
Previously, "env" ran as the command inside the container and its log output was checked. That suffered from a race because even though the pod might be reported as running, there's no guarantee that the command has really produced output and that this output has been captured. Instead, explicitly running the command inside the pause container is safer because it ensures that the full output of the command gets returned.
1 parent 2642d82 commit 447c74c

File tree

1 file changed

+37
-28
lines changed

1 file changed

+37
-28
lines changed

test/e2e/dra/dra.go

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
157157

158158
b.create(ctx, claim, pod)
159159

160-
b.testPod(ctx, f.ClientSet, pod)
160+
b.testPod(ctx, f, pod)
161161

162162
ginkgo.By(fmt.Sprintf("force delete test pod %s", pod.Name))
163163
err := b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: &zero})
@@ -280,8 +280,8 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
280280
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
281281
framework.ExpectNoError(err, "start pod")
282282

283-
testContainerEnv(ctx, f.ClientSet, pod, pod.Spec.Containers[0].Name, true, container0Env...)
284-
testContainerEnv(ctx, f.ClientSet, pod, pod.Spec.Containers[1].Name, true, container1Env...)
283+
testContainerEnv(ctx, f, pod, pod.Spec.Containers[0].Name, true, container0Env...)
284+
testContainerEnv(ctx, f, pod, pod.Spec.Containers[1].Name, true, container1Env...)
285285
})
286286
})
287287

@@ -291,20 +291,20 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
291291
ginkgo.It("supports simple pod referencing inline resource claim", func(ctx context.Context) {
292292
pod, template := b.podInline()
293293
b.create(ctx, pod, template)
294-
b.testPod(ctx, f.ClientSet, pod)
294+
b.testPod(ctx, f, pod)
295295
})
296296

297297
ginkgo.It("supports inline claim referenced by multiple containers", func(ctx context.Context) {
298298
pod, template := b.podInlineMultiple()
299299
b.create(ctx, pod, template)
300-
b.testPod(ctx, f.ClientSet, pod)
300+
b.testPod(ctx, f, pod)
301301
})
302302

303303
ginkgo.It("supports simple pod referencing external resource claim", func(ctx context.Context) {
304304
pod := b.podExternal()
305305
claim := b.externalClaim()
306306
b.create(ctx, claim, pod)
307-
b.testPod(ctx, f.ClientSet, pod)
307+
b.testPod(ctx, f, pod)
308308
})
309309

310310
ginkgo.It("supports external claim referenced by multiple pods", func(ctx context.Context) {
@@ -315,7 +315,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
315315
b.create(ctx, claim, pod1, pod2, pod3)
316316

317317
for _, pod := range []*v1.Pod{pod1, pod2, pod3} {
318-
b.testPod(ctx, f.ClientSet, pod)
318+
b.testPod(ctx, f, pod)
319319
}
320320
})
321321

@@ -327,7 +327,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
327327
b.create(ctx, claim, pod1, pod2, pod3)
328328

329329
for _, pod := range []*v1.Pod{pod1, pod2, pod3} {
330-
b.testPod(ctx, f.ClientSet, pod)
330+
b.testPod(ctx, f, pod)
331331
}
332332
})
333333

@@ -339,7 +339,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
339339
pod.Spec.InitContainers[0].Command = []string{"sh", "-c", "env | grep user_a=b"}
340340
b.create(ctx, pod, template)
341341

342-
b.testPod(ctx, f.ClientSet, pod)
342+
b.testPod(ctx, f, pod)
343343
})
344344

345345
ginkgo.It("removes reservation from claim when pod is done", func(ctx context.Context) {
@@ -394,7 +394,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
394394
return b.f.ClientSet.ResourceV1beta1().ResourceClaims(b.f.Namespace.Name).Get(ctx, claim.Name, metav1.GetOptions{})
395395
}).WithTimeout(f.Timeouts.PodDelete).ShouldNot(gomega.HaveField("Status.Allocation", (*resourceapi.AllocationResult)(nil)))
396396

397-
b.testPod(ctx, f.ClientSet, pod)
397+
b.testPod(ctx, f, pod)
398398

399399
ginkgo.By(fmt.Sprintf("deleting pod %s", klog.KObj(pod)))
400400
framework.ExpectNoError(b.f.ClientSet.CoreV1().Pods(b.f.Namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
@@ -496,7 +496,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
496496
ginkgo.It("supports claim and class parameters", func(ctx context.Context) {
497497
pod, template := b.podInline()
498498
b.create(ctx, pod, template)
499-
b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
499+
b.testPod(ctx, f, pod, expectedEnv...)
500500
})
501501

502502
ginkgo.It("supports reusing resources", func(ctx context.Context) {
@@ -518,7 +518,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
518518
go func() {
519519
defer ginkgo.GinkgoRecover()
520520
defer wg.Done()
521-
b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
521+
b.testPod(ctx, f, pod, expectedEnv...)
522522
err := f.ClientSet.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
523523
framework.ExpectNoError(err, "delete pod")
524524
framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, time.Duration(numPods)*f.Timeouts.PodStartSlow))
@@ -548,7 +548,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
548548
go func() {
549549
defer ginkgo.GinkgoRecover()
550550
defer wg.Done()
551-
b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
551+
b.testPod(ctx, f, pod, expectedEnv...)
552552
}()
553553
}
554554
wg.Wait()
@@ -572,7 +572,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
572572
class.Name = deviceClassName
573573
b.create(ctx, class)
574574

575-
b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
575+
b.testPod(ctx, f, pod, expectedEnv...)
576576
})
577577

578578
ginkgo.It("retries pod scheduling after updating device class", func(ctx context.Context) {
@@ -603,7 +603,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
603603
_, err = f.ClientSet.ResourceV1beta1().DeviceClasses().Update(ctx, class, metav1.UpdateOptions{})
604604
framework.ExpectNoError(err)
605605

606-
b.testPod(ctx, f.ClientSet, pod, expectedEnv...)
606+
b.testPod(ctx, f, pod, expectedEnv...)
607607
})
608608

609609
ginkgo.It("runs a pod without a generated resource claim", func(ctx context.Context) {
@@ -1037,7 +1037,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
10371037
pod.Spec.Containers[0].Resources.Claims[0].Name = pod.Spec.ResourceClaims[0].Name
10381038
b.create(ctx, template, pod)
10391039

1040-
b.testPod(ctx, f.ClientSet, pod)
1040+
b.testPod(ctx, f, pod)
10411041
})
10421042

10431043
ginkgo.It("supports count/resourceclaims.resource.k8s.io ResourceQuota", func(ctx context.Context) {
@@ -1360,7 +1360,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
13601360
)
13611361
}
13621362
b1.create(ctx, claim1, claim1b, claim2, claim2b, pod)
1363-
b1.testPod(ctx, f.ClientSet, pod)
1363+
b1.testPod(ctx, f, pod)
13641364
})
13651365
}
13661366
multipleDriversContext := func(prefix string, nodeV1alpha4, nodeV1beta1 bool) {
@@ -1391,7 +1391,7 @@ var _ = framework.SIGDescribe("node")("DRA", feature.DynamicResourceAllocation,
13911391
driver.Run(nodes, perNode(1, nodes))
13921392

13931393
// Now it should run.
1394-
b.testPod(ctx, f.ClientSet, pod)
1394+
b.testPod(ctx, f, pod)
13951395

13961396
// We need to clean up explicitly because the normal
13971397
// cleanup doesn't work (driver shuts down first).
@@ -1494,7 +1494,7 @@ func (b *builder) parametersEnv() (string, []string) {
14941494
// makePod returns a simple pod with no resource claims.
14951495
// The pod prints its env and waits.
14961496
func (b *builder) pod() *v1.Pod {
1497-
pod := e2epod.MakePod(b.f.Namespace.Name, nil, nil, b.f.NamespacePodSecurityLevel, "env && sleep 100000")
1497+
pod := e2epod.MakePod(b.f.Namespace.Name, nil, nil, b.f.NamespacePodSecurityLevel, "" /* no command = pause */)
14981498
pod.Labels = make(map[string]string)
14991499
pod.Spec.RestartPolicy = v1.RestartPolicyNever
15001500
// Let kubelet kill the pods quickly. Setting
@@ -1621,30 +1621,39 @@ func (b *builder) create(ctx context.Context, objs ...klog.KMetadata) []klog.KMe
16211621
}
16221622

16231623
// testPod runs pod and checks if container logs contain expected environment variables
1624-
func (b *builder) testPod(ctx context.Context, clientSet kubernetes.Interface, pod *v1.Pod, env ...string) {
1624+
func (b *builder) testPod(ctx context.Context, f *framework.Framework, pod *v1.Pod, env ...string) {
16251625
ginkgo.GinkgoHelper()
1626-
err := e2epod.WaitForPodRunningInNamespace(ctx, clientSet, pod)
1626+
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
16271627
framework.ExpectNoError(err, "start pod")
16281628

16291629
if len(env) == 0 {
16301630
_, env = b.parametersEnv()
16311631
}
16321632
for _, container := range pod.Spec.Containers {
1633-
testContainerEnv(ctx, clientSet, pod, container.Name, false, env...)
1633+
testContainerEnv(ctx, f, pod, container.Name, false, env...)
16341634
}
16351635
}
16361636

16371637
// envLineRE matches env output with variables set by test/e2e/dra/test-driver.
16381638
var envLineRE = regexp.MustCompile(`^(?:admin|user|claim)_[a-zA-Z0-9_]*=.*$`)
16391639

1640-
func testContainerEnv(ctx context.Context, clientSet kubernetes.Interface, pod *v1.Pod, containerName string, fullMatch bool, env ...string) {
1640+
func testContainerEnv(ctx context.Context, f *framework.Framework, pod *v1.Pod, containerName string, fullMatch bool, env ...string) {
16411641
ginkgo.GinkgoHelper()
1642-
log, err := e2epod.GetPodLogs(ctx, clientSet, pod.Namespace, pod.Name, containerName)
1643-
framework.ExpectNoError(err, fmt.Sprintf("get logs for container %s", containerName))
1642+
stdout, stderr, err := e2epod.ExecWithOptionsContext(ctx, f, e2epod.ExecOptions{
1643+
Command: []string{"env"},
1644+
Namespace: pod.Namespace,
1645+
PodName: pod.Name,
1646+
ContainerName: containerName,
1647+
CaptureStdout: true,
1648+
CaptureStderr: true,
1649+
Quiet: true,
1650+
})
1651+
framework.ExpectNoError(err, fmt.Sprintf("get env output for container %s", containerName))
1652+
gomega.Expect(stderr).To(gomega.BeEmpty(), fmt.Sprintf("env stderr for container %s", containerName))
16441653
if fullMatch {
16451654
// Find all env variables set by the test driver.
16461655
var actualEnv, expectEnv []string
1647-
for _, line := range strings.Split(log, "\n") {
1656+
for _, line := range strings.Split(stdout, "\n") {
16481657
if envLineRE.MatchString(line) {
16491658
actualEnv = append(actualEnv, line)
16501659
}
@@ -1654,11 +1663,11 @@ func testContainerEnv(ctx context.Context, clientSet kubernetes.Interface, pod *
16541663
}
16551664
sort.Strings(actualEnv)
16561665
sort.Strings(expectEnv)
1657-
gomega.Expect(actualEnv).To(gomega.Equal(expectEnv), fmt.Sprintf("container %s log output:\n%s", containerName, log))
1666+
gomega.Expect(actualEnv).To(gomega.Equal(expectEnv), fmt.Sprintf("container %s env output:\n%s", containerName, stdout))
16581667
} else {
16591668
for i := 0; i < len(env); i += 2 {
16601669
envStr := fmt.Sprintf("\n%s=%s\n", env[i], env[i+1])
1661-
gomega.Expect(log).To(gomega.ContainSubstring(envStr), fmt.Sprintf("container %s env variables", containerName))
1670+
gomega.Expect(stdout).To(gomega.ContainSubstring(envStr), fmt.Sprintf("container %s env variables", containerName))
16621671
}
16631672
}
16641673
}

0 commit comments

Comments
 (0)