Skip to content

Commit 05cf460

Browse files
committed
e2e: PP: cover ExecCPUAffinity support in tests
Add basic e2e tests that checks the default behavior of performance-profile with default enabled `ExecCPUAffinity: first`. Signed-off-by: Shereen Haj <shajmakh@redhat.com>
1 parent bff09b0 commit 05cf460

File tree

5 files changed

+327
-31
lines changed

5 files changed

+327
-31
lines changed

report.xml

Lines changed: 105 additions & 0 deletions
Large diffs are not rendered by default.

test/e2e/performanceprofile/functests/11_mixedcpus/mixedcpus.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,129 @@ var _ = Describe("Mixedcpus", Ordered, Label(string(label.MixedCPUs)), func() {
510510
})
511511
})
512512
})
513+
514+
Context("Check exec-cpu-affinity feature", func() {
515+
When("exec-cpu-affinity is enabled (default in PP)", func() {
516+
var workerRTNode *corev1.Node
517+
var profile *performancev2.PerformanceProfile
518+
var getter cgroup.ControllersGetter
519+
var updatedShared, updatedIsolated cpuset.CPUSet
520+
521+
BeforeEach(func() {
522+
By("Checking if exec-cpu-affinity is enabled by default in the profile")
523+
profile, _ = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
524+
Expect(profile).ToNot(BeNil(), "Failed to get performance profile")
525+
if profile.Annotations != nil {
526+
val, ok := profile.Annotations[performancev2.PerformanceProfileDisableExecCPUAffinityAnnotation]
527+
if ok && val == "true" {
528+
// fail loadly because the default should be enabled
529+
Fail("exec-cpu-affinity is disabled in the profile")
530+
}
531+
}
532+
533+
By("Updating performance profile to have enough shared cpus if needed")
534+
// update pp to have 2 currentShared cpus
535+
currentShared := mustParse(string(*profile.Spec.CPU.Shared))
536+
if len(currentShared.List()) < 2 {
537+
testlog.Info("shared cpuset has less than 2 cpus; this test requires at least 2 shared cpus; update the profile")
538+
isolated := mustParse(string(*profile.Spec.CPU.Isolated))
539+
540+
// we need 4 total isolated and shared CPUs:
541+
// 1 as a buffer for node's base load
542+
// 1 as the test gu pod requests
543+
// 2 as shared cpus
544+
leastIsolatedCpus := 3
545+
if len(currentShared.List()) == 0 {
546+
leastIsolatedCpus = 4
547+
}
548+
if len(isolated.List()) < leastIsolatedCpus {
549+
Skip(fmt.Sprintf("isolated cpuset has less than %d cpus; this test requires at least %d isolated cpus", leastIsolatedCpus, leastIsolatedCpus))
550+
}
551+
552+
updatedShared = cpuset.New(isolated.List()[0], isolated.List()[1])
553+
updatedIsolated = cpuset.New(isolated.List()[2:]...)
554+
555+
if len(currentShared.List()) == 1 {
556+
updatedShared = cpuset.New(currentShared.List()[0], isolated.List()[0])
557+
updatedIsolated = cpuset.New(isolated.List()[1:]...)
558+
}
559+
560+
testlog.Infof("shared cpu ids to be updated are: %q", updatedShared.String())
561+
profile.Spec.CPU.Isolated = cpuSetToPerformanceCPUSet(&updatedIsolated)
562+
profile.Spec.CPU.Shared = cpuSetToPerformanceCPUSet(&updatedShared)
563+
profile.Spec.WorkloadHints.MixedCpus = ptr.To(true) // if not already
564+
565+
profiles.UpdateWithRetry(profile)
566+
567+
poolName := poolname.GetByProfile(context.TODO(), profile)
568+
By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
569+
profilesupdate.WaitForTuningUpdating(context.TODO(), profile)
570+
By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
571+
profilesupdate.WaitForTuningUpdated(context.TODO(), profile)
572+
}
573+
574+
workerRTNodes, err := nodes.GetByLabels(testutils.NodeSelectorLabels)
575+
Expect(err).ToNot(HaveOccurred())
576+
workerRTNodes, err = nodes.MatchingOptionalSelector(workerRTNodes)
577+
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("error looking for the optional selector: %v", err))
578+
Expect(workerRTNodes).ToNot(BeEmpty())
579+
workerRTNode = &workerRTNodes[0]
580+
581+
getter, err = cgroup.BuildGetter(ctx, testclient.DataPlaneClient, testclient.K8sClient)
582+
Expect(err).ToNot(HaveOccurred())
583+
})
584+
585+
It("should pin exec process to first Shared CPU of the container - guratanteed pod", func() {
586+
By("Creating a guaranteed test pod with shared CPU request")
587+
rl := &corev1.ResourceList{
588+
corev1.ResourceCPU: resource.MustParse("1"),
589+
corev1.ResourceMemory: resource.MustParse("100Mi"),
590+
sharedCpusResource: resource.MustParse("1"),
591+
}
592+
testPod := makePod(ctx, testclient.DataPlaneClient, testutils.NamespaceTesting,
593+
withRequests(rl),
594+
withLimits(rl),
595+
onNode(workerRTNode.Name),
596+
withRuntime(components.GetComponentName(profile.Name, components.ComponentNamePrefix)))
597+
598+
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed(), "Failed to create test pod")
599+
testPod, err := pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
600+
Expect(err).ToNot(HaveOccurred())
601+
defer func() {
602+
if testPod != nil {
603+
testlog.Infof("deleting pod %q", testPod.Name)
604+
Expect(pods.Delete(ctx, testPod)).To(BeTrue(), "Failed to delete pod")
605+
}
606+
}()
607+
608+
By("Prepare comparable data")
609+
cpusetCfg := &controller.CpuSet{}
610+
Expect(getter.Container(ctx, testPod, testPod.Spec.Containers[0].Name, cpusetCfg)).To(Succeed(), "Failed to get cpuset config for test pod")
611+
612+
cpusIncludingShared, err := cpuset.Parse(cpusetCfg.Cpus)
613+
Expect(err).ToNot(HaveOccurred(), "Failed to parse cpuset config for test pod cpus=%q", cpusetCfg.Cpus)
614+
cntShared := cpusIncludingShared.Difference(updatedIsolated)
615+
firstSharedCPU := cntShared.List()[0]
616+
testlog.Infof("first shared CPU: %d", firstSharedCPU)
617+
618+
sharedCpuRequest := testPod.Spec.Containers[0].Resources.Requests.Name(sharedCpusResource, resource.DecimalSI).Value()
619+
retries := int(10 / sharedCpuRequest)
620+
By("Run exec command on the pod and verify the process is pinned to the first shared CPU")
621+
622+
for i := 0; i < retries; i++ {
623+
cmd := []string{"/bin/bash", "-c", "sleep 10 & SLPID=$!; ps -o psr -p $SLPID;"}
624+
output, err := pods.ExecCommandOnPod(testclient.K8sClient, testPod, testPod.Spec.Containers[0].Name, cmd)
625+
Expect(err).ToNot(HaveOccurred(), "Failed to exec command on the pod; retry %d", i)
626+
strout := string(output)
627+
testlog.Infof("exec command output: %s", strout)
628+
629+
execProcessCPUs := strings.TrimSpace(strout)
630+
Expect(execProcessCPUs).ToNot(BeEmpty(), "Failed to get exec process CPU; retry %d", i)
631+
Expect(execProcessCPUs).To(Equal(firstSharedCPU), "Exec process CPU is not the first shared CPU; retry %d", i)
632+
}
633+
})
634+
})
635+
})
513636
})
514637

515638
func setup(ctx context.Context) func(ctx2 context.Context) {
@@ -659,6 +782,12 @@ func withRuntime(name string) func(p *corev1.Pod) {
659782
}
660783
}
661784

785+
func onNode(nodeName string) func(p *corev1.Pod) {
786+
return func(p *corev1.Pod) {
787+
p.Spec.NodeName = nodeName
788+
}
789+
}
790+
662791
func getTestingNamespace() corev1.Namespace {
663792
return *namespaces.TestingNamespace
664793
}

test/e2e/performanceprofile/functests/1_performance/cpu_management.go

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212

1313
appsv1 "k8s.io/api/apps/v1"
1414
corev1 "k8s.io/api/core/v1"
15-
"k8s.io/apimachinery/pkg/api/errors"
1615
"k8s.io/apimachinery/pkg/api/resource"
1716
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1817
"k8s.io/apimachinery/pkg/labels"
@@ -248,7 +247,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
248247
})
249248

250249
AfterEach(func() {
251-
deleteTestPod(context.TODO(), testpod)
250+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
252251
})
253252

254253
DescribeTable("Verify CPU usage by stress PODs", func(ctx context.Context, guaranteed bool) {
@@ -339,7 +338,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
339338
Expect(err).ToNot(HaveOccurred())
340339
})
341340
AfterEach(func() {
342-
deleteTestPod(context.TODO(), testpod)
341+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
343342
})
344343
When("kubelet is restart", func() {
345344
It("[test_id: 73501] defaultCpuset should not change", func() {
@@ -422,7 +421,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
422421

423422
AfterEach(func() {
424423
if testpod != nil {
425-
deleteTestPod(context.TODO(), testpod)
424+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
426425
}
427426
})
428427

@@ -481,7 +480,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
481480
fmt.Sprintf("IRQ still active on CPU%s", psr))
482481

483482
By("Checking that after removing POD default smp affinity is returned back to all active CPUs")
484-
deleteTestPod(context.TODO(), testpod)
483+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
485484
defaultSmpAffinitySet, err = nodes.GetDefaultSmpAffinitySet(context.TODO(), workerRTNode)
486485
Expect(err).ToNot(HaveOccurred())
487486

@@ -580,7 +579,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
580579
if testpod == nil {
581580
return
582581
}
583-
deleteTestPod(context.TODO(), testpod)
582+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
584583
})
585584

586585
It("[test_id:49149] should reject pods which request integral CPUs not aligned with machine SMT level", func() {
@@ -633,7 +632,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
633632
if testpod == nil {
634633
return
635634
}
636-
deleteTestPod(context.TODO(), testpod)
635+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
637636
})
638637

639638
DescribeTable("Verify Hyper-Thread aware scheduling for guaranteed pods",
@@ -680,7 +679,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
680679
testpod = startHTtestPod(ctx, cpuCount)
681680
Expect(checkPodHTSiblings(ctx, testpod)).To(BeTrue(), "Pod cpu set does not map to host cpu sibling pairs")
682681
By("Deleting test pod...")
683-
deleteTestPod(ctx, testpod)
682+
Expect(pods.Delete(ctx, testpod)).To(BeTrue(), "Failed to delete pod")
684683
}
685684
},
686685

@@ -983,7 +982,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
983982
defer func() {
984983
if guaranteedPod != nil {
985984
testlog.Infof("deleting pod %q", guaranteedPod.Name)
986-
deleteTestPod(ctx, guaranteedPod)
985+
Expect(pods.Delete(ctx, guaranteedPod)).To(BeTrue(), "Failed to delete guaranteed pod")
987986
}
988987
}()
989988

@@ -1014,7 +1013,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
10141013
defer func() {
10151014
if bestEffortPod != nil {
10161015
testlog.Infof("deleting pod %q", bestEffortPod.Name)
1017-
deleteTestPod(ctx, bestEffortPod)
1016+
Expect(pods.Delete(ctx, bestEffortPod)).To(BeTrue(), "Failed to delete best-effort pod")
10181017
}
10191018
}()
10201019

@@ -1142,15 +1141,73 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
11421141
defer func() {
11431142
if guPod != nil {
11441143
testlog.Infof("deleting pod %q", guPod.Name)
1145-
deleteTestPod(ctx, guPod)
1144+
Expect(pods.Delete(ctx, guPod)).To(BeTrue(), "Failed to delete guaranteed pod")
11461145
}
11471146
if buPod != nil {
11481147
testlog.Infof("deleting pod %q", buPod.Name)
1149-
deleteTestPod(ctx, buPod)
1148+
Expect(pods.Delete(ctx, buPod)).To(BeTrue(), "Failed to delete burstable pod")
11501149
}
11511150
}()
11521151
})
11531152
})
1153+
1154+
Context("Check exec-cpu-affinity feature", func() {
1155+
When("exec-cpu-affinity is enabled (default in PP)", func() {
1156+
// shared-cpus case is covered in 11_mixedcpus test
1157+
// legacy test is covered in 2_performance_update
1158+
1159+
BeforeEach(func() {
1160+
By("Checking if exec-cpu-affinity is enabled by default in the profile")
1161+
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
1162+
Expect(profile).ToNot(BeNil(), "Failed to get performance profile")
1163+
if profile.Annotations != nil {
1164+
val, ok := profile.Annotations[performancev2.PerformanceProfileDisableExecCPUAffinityAnnotation]
1165+
if ok && val == "true" {
1166+
// fail loadly because the default should be enabled
1167+
Fail("exec-cpu-affinity is disabled in the profile")
1168+
}
1169+
}
1170+
})
1171+
1172+
It("should pin exec process to first CPU dedicated to the container - guratanteed pod", func() {
1173+
By("Creating a guaranteed test pod")
1174+
testPod := makePod(ctx, workerRTNode, true)
1175+
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed(), "Failed to create test pod")
1176+
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
1177+
Expect(err).ToNot(HaveOccurred())
1178+
defer func() {
1179+
if testPod != nil {
1180+
testlog.Infof("deleting pod %q", testPod.Name)
1181+
Expect(pods.Delete(ctx, testPod)).To(BeTrue(), "Failed to delete test pod")
1182+
}
1183+
}()
1184+
1185+
cpusetCfg := &controller.CpuSet{}
1186+
Expect(getter.Container(ctx, testPod, testPod.Spec.Containers[0].Name, cpusetCfg)).To(Succeed(), "Failed to get cpuset config for test pod")
1187+
1188+
// assuming no shared cpus are used
1189+
cpusList := strings.Split(cpusetCfg.Cpus, ",")
1190+
Expect(cpusList).ToNot(BeEmpty())
1191+
firstExclusiveCPU := strings.TrimSpace(cpusList[0])
1192+
testlog.Infof("first exclusive CPU: %s", firstExclusiveCPU)
1193+
1194+
cpuRequest := testPod.Spec.Containers[0].Resources.Requests.Name(corev1.ResourceCPU, resource.DecimalSI).Value()
1195+
retries := int(10 / cpuRequest)
1196+
By("Run exec command on the pod and verify the process is pinned to the first exclusive CPU")
1197+
1198+
for i := 0; i < retries; i++ {
1199+
cmd := []string{"/bin/bash", "-c", "sleep 10 & SLPID=$!; ps -o psr -p $SLPID;"}
1200+
output, err := pods.ExecCommandOnPod(testclient.K8sClient, testPod, testPod.Spec.Containers[0].Name, cmd)
1201+
Expect(err).ToNot(HaveOccurred(), "Failed to exec command on the pod; retry %d", i)
1202+
testlog.Infof("exec command output: %s", string(output))
1203+
1204+
execProcessCPUs := strings.TrimSpace(string(output))
1205+
Expect(execProcessCPUs).ToNot(BeEmpty(), "Failed to get exec process CPU; retry %d", i)
1206+
Expect(execProcessCPUs).To(Equal(firstExclusiveCPU), "Exec process CPU is not the first exclusive CPU; retry %d", i)
1207+
}
1208+
})
1209+
})
1210+
})
11541211
})
11551212

11561213
func extractConfigInfo(output string) (*ContainerConfig, error) {
@@ -1401,24 +1458,6 @@ func getTestPodWithAnnotations(annotations map[string]string, cpus int) *corev1.
14011458
return testpod
14021459
}
14031460

1404-
func deleteTestPod(ctx context.Context, testpod *corev1.Pod) (types.UID, bool) {
1405-
// it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
1406-
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
1407-
if errors.IsNotFound(err) {
1408-
return "", false
1409-
}
1410-
1411-
testpodUID := testpod.UID
1412-
1413-
err = testclient.DataPlaneClient.Delete(ctx, testpod)
1414-
Expect(err).ToNot(HaveOccurred())
1415-
1416-
err = pods.WaitForDeletion(ctx, testpod, pods.DefaultDeletionTimeout*time.Second)
1417-
Expect(err).ToNot(HaveOccurred())
1418-
1419-
return testpodUID, true
1420-
}
1421-
14221461
func cpuSpecToString(cpus *performancev2.CPU) (string, error) {
14231462
if cpus == nil {
14241463
return "", fmt.Errorf("performance CPU field is nil")

test/e2e/performanceprofile/functests/1_performance/irqbalance.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ var _ = Describe("[performance] Checking IRQBalance settings", Ordered, func() {
213213
defer func() {
214214
if testpod != nil {
215215
testlog.Infof("deleting pod %q", testpod.Name)
216-
deleteTestPod(context.TODO(), testpod)
216+
Expect(pods.Delete(context.TODO(), testpod)).To(BeTrue(), "Failed to delete pod")
217217
}
218218
bannedCPUs, err := getIrqBalanceBannedCPUs(context.TODO(), targetNode)
219219
Expect(err).ToNot(HaveOccurred(), "failed to extract the banned CPUs from node %q", targetNode.Name)

test/e2e/performanceprofile/functests/utils/pods/pods.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"k8s.io/client-go/kubernetes"
2121
"k8s.io/client-go/kubernetes/scheme"
2222
"k8s.io/client-go/tools/remotecommand"
23+
"k8s.io/klog/v2"
2324
"sigs.k8s.io/controller-runtime/pkg/client"
2425

2526
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
@@ -51,6 +52,28 @@ func GetTestPod() *corev1.Pod {
5152
}
5253
}
5354

55+
func Delete(ctx context.Context, pod *corev1.Pod) bool {
56+
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(pod), pod)
57+
if errors.IsNotFound(err) {
58+
klog.InfoS("pod already deleted", "namespace", pod.Namespace, "name", pod.Name)
59+
return true
60+
}
61+
62+
err = testclient.DataPlaneClient.Delete(ctx, pod)
63+
if err != nil {
64+
klog.ErrorS(err, "failed to delete pod", "namespace", pod.Namespace, "name", pod.Name)
65+
return false
66+
}
67+
68+
err = WaitForDeletion(ctx, pod, DefaultDeletionTimeout*time.Second)
69+
if err != nil {
70+
klog.ErrorS(err, "failed to wait for pod deletion", "namespace", pod.Namespace, "name", pod.Name)
71+
return false
72+
}
73+
74+
return true
75+
}
76+
5477
// WaitForDeletion waits until the pod will be removed from the cluster
5578
func WaitForDeletion(ctx context.Context, pod *corev1.Pod, timeout time.Duration) error {
5679
return wait.PollUntilContextTimeout(ctx, time.Second, timeout, true, func(ctx context.Context) (bool, error) {

0 commit comments

Comments
 (0)