@@ -12,8 +12,10 @@ import (
1212
1313 . "github.com/onsi/ginkgo/v2"
1414 . "github.com/onsi/gomega"
15+ appsv1 "k8s.io/api/apps/v1"
1516 corev1 "k8s.io/api/core/v1"
1617 "k8s.io/apimachinery/pkg/api/errors"
18+ "k8s.io/apimachinery/pkg/api/resource"
1719 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1820 "k8s.io/apimachinery/pkg/labels"
1921 "k8s.io/apimachinery/pkg/types"
@@ -27,11 +29,13 @@ import (
2729 performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
2830 "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
2931 profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
32+ componenttuned "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/tuned"
3033 manifestsutil "github.com/openshift/cluster-node-tuning-operator/pkg/util"
3134 testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
3235 "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cgroup/runtime"
3336 testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
3437 "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cluster"
38+ "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/deployments"
3539 "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery"
3640 "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/hypershift"
3741 "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
@@ -996,6 +1000,155 @@ var _ = Describe("[rfe_id:28761][performance] Updating parameters in performance
9961000 })
9971001 })
9981002
1003+ Context ("Verify IRQ housekeeping updates" , Ordered , Label (string (label .Tier2 )), func () {
1004+ var targetNode * corev1.Node
1005+ var isolatedCPUSet cpuset.CPUSet
1006+
1007+ testutils .CustomBeforeAll (func () {
1008+ initialProfile = profile .DeepCopy ()
1009+ })
1010+
1011+ It ("[test_id:99999] should update housekeeping CPUs when performance profile is modified" , func () {
1012+
1013+ if componenttuned .IsIRQBalancingGloballyDisabled (profile ) {
1014+ Skip ("this test needs IRQ balancing (GloballyDisableIrqLoadBalancing=false)" )
1015+ }
1016+
1017+ ctx := context .TODO ()
1018+
1019+ // Get current profile CPU configuration
1020+ Expect (profile .Spec .CPU .Reserved ).ToNot (BeNil (), "expected reserved CPUs, found none" )
1021+ Expect (profile .Spec .CPU .Isolated ).ToNot (BeNil (), "expected isolated CPUs, found none" )
1022+
1023+ reservedCPUSet , err := cpuset .Parse (string (* profile .Spec .CPU .Reserved ))
1024+ Expect (err ).ToNot (HaveOccurred (), "failed to parse reserved CPUs" )
1025+
1026+ isolatedCPUSet , err = cpuset .Parse (string (* profile .Spec .CPU .Isolated ))
1027+ Expect (err ).ToNot (HaveOccurred (), "failed to parse isolated CPUs" )
1028+
1029+ targetNodeIdx := nodes .PickNodeIdx (workerRTNodes )
1030+ targetNode = & workerRTNodes [targetNodeIdx ]
1031+ Expect (targetNode ).ToNot (BeNil (), "missing target node" )
1032+ By (fmt .Sprintf ("Using target worker node %q" , targetNode .Name ))
1033+
1034+ // Ensure we have enough isolated CPUs for the test
1035+ // minimum amount to avoid SMT-alignment error
1036+ cpuRequest := 2
1037+ if cpuRequest >= isolatedCPUSet .Size () {
1038+ Skip (fmt .Sprintf ("cpus request %d is greater than the available isolated cpus %d" , cpuRequest , isolatedCPUSet .Size ()))
1039+ }
1040+
1041+ By ("Creating a Deployment with guaranteed pod that has irq-load-balancing.crio.io: housekeeping annotation" )
1042+ annotations := map [string ]string {
1043+ "irq-load-balancing.crio.io" : "housekeeping" ,
1044+ }
1045+ podTemplate := getTestPodWithProfileAndAnnotations (profile , annotations , cpuRequest )
1046+
1047+ dp := deployments .Make ("irq-housekeeping-dp" , testutils .NamespaceTesting ,
1048+ deployments .WithPodTemplate (podTemplate ),
1049+ deployments .WithNodeSelector (map [string ]string {testutils .LabelHostname : targetNode .Name }),
1050+ )
1051+
1052+ err = testclient .DataPlaneClient .Create (ctx , dp )
1053+ Expect (err ).ToNot (HaveOccurred (), "failed to create test deployment" )
1054+ defer func () {
1055+ By ("Cleaning up: deleting deployment" )
1056+ testclient .DataPlaneClient .Delete (ctx , dp )
1057+ }()
1058+
1059+ By ("Waiting for the deployment to be ready" )
1060+ desiredStatus := appsv1.DeploymentStatus {
1061+ Replicas : 1 ,
1062+ AvailableReplicas : 1 ,
1063+ }
1064+ err = deployments .WaitForDesiredDeploymentStatus (ctx , dp , testclient .DataPlaneClient , dp .Namespace , dp .Name , desiredStatus )
1065+ Expect (err ).ToNot (HaveOccurred (), "deployment did not reach desired status" )
1066+
1067+ By ("Getting the pod from the deployment" )
1068+ podList := & corev1.PodList {}
1069+ listOptions := & client.ListOptions {
1070+ Namespace : dp .Namespace ,
1071+ LabelSelector : labels .SelectorFromSet (dp .Spec .Selector .MatchLabels ),
1072+ }
1073+ err = testclient .DataPlaneClient .List (ctx , podList , listOptions )
1074+ Expect (err ).ToNot (HaveOccurred (), "failed to list pods from deployment" )
1075+ Expect (len (podList .Items )).To (Equal (1 ), "expected exactly one pod in deployment" )
1076+ testpod := & podList .Items [0 ]
1077+ Expect (testpod .Status .QOSClass ).To (Equal (corev1 .PodQOSGuaranteed ), "Test pod does not have QoS class of Guaranteed" )
1078+
1079+ By ("Verifying OPENSHIFT_HOUSEKEEPING_CPUS environment variable is set" )
1080+ initialHousekeepingCPUSet , err := getHousekeepingCPUsFromEnv (testpod )
1081+ Expect (err ).ToNot (HaveOccurred (), "failed to get OPENSHIFT_HOUSEKEEPING_CPUS from pod" )
1082+ Expect (initialHousekeepingCPUSet .Size ()).ToNot (BeZero (), "OPENSHIFT_HOUSEKEEPING_CPUS should not be empty" )
1083+
1084+ By ("Verifying initial IRQ affinity includes housekeeping CPUs" )
1085+ smpAffinitySet , err := nodes .GetDefaultSmpAffinitySet (ctx , targetNode )
1086+ Expect (err ).ToNot (HaveOccurred (), "failed to get default smp affinity" )
1087+ onlineCPUsSet , err := nodes .GetOnlineCPUsSet (ctx , targetNode )
1088+ Expect (err ).ToNot (HaveOccurred (), "failed to get online CPUs" )
1089+ smpAffinitySet = smpAffinitySet .Intersection (onlineCPUsSet )
1090+
1091+ Expect (initialHousekeepingCPUSet .IsSubsetOf (smpAffinitySet )).To (BeTrue (),
1092+ "Housekeeping CPUs %s should be subset of IRQ affinity %s" , initialHousekeepingCPUSet .String (), smpAffinitySet .String ())
1093+
1094+ By ("Modifying the performance profile to change reserved and isolated CPUs" )
1095+
1096+ // Move one isolated CPU to reserved to trigger housekeeping CPUs update
1097+ cpuToMove := cpuset .New (isolatedCPUSet .List ()[0 ])
1098+ newReservedSet := reservedCPUSet .Union (cpuToMove )
1099+ newIsolatedSet := isolatedCPUSet .Difference (cpuToMove )
1100+
1101+ profile .Spec .CPU .Reserved = ptr .To (performancev2 .CPUSet (newReservedSet .String ()))
1102+ profile .Spec .CPU .Isolated = ptr .To (performancev2 .CPUSet (newIsolatedSet .String ()))
1103+
1104+ By ("Updating the performance profile" )
1105+ profiles .UpdateWithRetry (profile )
1106+
1107+ By ("Waiting for tuning to start updating" )
1108+ profilesupdate .WaitForTuningUpdating (ctx , profile )
1109+
1110+ By ("Waiting for tuning to complete" )
1111+ profilesupdate .WaitForTuningUpdated (ctx , profile )
1112+
1113+ By ("Waiting for the deployment to be ready again after profile update and node reboot" )
1114+ Eventually (func () error {
1115+ return deployments .WaitForDesiredDeploymentStatus (ctx , dp , testclient .DataPlaneClient , dp .Namespace , dp .Name , desiredStatus )
1116+ }).WithTimeout (20 * time .Minute ).WithPolling (30 * time .Second ).Should (Succeed (), "deployment did not become ready after profile update" )
1117+
1118+ By ("Getting the updated pod from the deployment" )
1119+ err = testclient .DataPlaneClient .List (ctx , podList , listOptions )
1120+ Expect (err ).ToNot (HaveOccurred (), "failed to list pods from deployment after update" )
1121+ Expect (len (podList .Items )).To (Equal (1 ), "expected exactly one pod in deployment after update" )
1122+ testpod = & podList .Items [0 ]
1123+
1124+ By ("Verifying OPENSHIFT_HOUSEKEEPING_CPUS is updated after profile modification" )
1125+ updatedHousekeepingCPUSet , err := getHousekeepingCPUsFromEnv (testpod )
1126+ Expect (err ).ToNot (HaveOccurred (), "failed to get updated OPENSHIFT_HOUSEKEEPING_CPUS from pod" )
1127+ Expect (updatedHousekeepingCPUSet .Size ()).ToNot (BeZero (), "updated OPENSHIFT_HOUSEKEEPING_CPUS should not be empty" )
1128+
1129+ By ("Verifying updated IRQ affinity includes housekeeping CPUs" )
1130+ updatedSmpAffinitySet , err := nodes .GetDefaultSmpAffinitySet (ctx , targetNode )
1131+ Expect (err ).ToNot (HaveOccurred (), "failed to get updated default smp affinity" )
1132+ updatedOnlineCPUsSet , err := nodes .GetOnlineCPUsSet (ctx , targetNode )
1133+ Expect (err ).ToNot (HaveOccurred (), "failed to get updated online CPUs" )
1134+ updatedSmpAffinitySet = updatedSmpAffinitySet .Intersection (updatedOnlineCPUsSet )
1135+
1136+ Expect (updatedHousekeepingCPUSet .IsSubsetOf (updatedSmpAffinitySet )).To (BeTrue (),
1137+ "Updated housekeeping CPUs %s should be subset of IRQ affinity %s" , updatedHousekeepingCPUSet .String (), updatedSmpAffinitySet .String ())
1138+ })
1139+
1140+ AfterAll (func () {
1141+ By ("Reverting the profile to its initial state" )
1142+ profiles .UpdateWithRetry (initialProfile )
1143+
1144+ By (fmt .Sprintf ("Applying changes in performance profile and waiting until %s will start updating" , poolName ))
1145+ profilesupdate .WaitForTuningUpdating (context .TODO (), profile )
1146+
1147+ By (fmt .Sprintf ("Waiting when %s finishes updates" , poolName ))
1148+ profilesupdate .WaitForTuningUpdated (context .TODO (), profile )
1149+ })
1150+ })
1151+
9991152 Context ("[rfe_id:54374][rps_mask] Network Stack Pinning" , Label (string (label .RPSMask ), string (label .Tier1 )), func () {
10001153
10011154 BeforeEach (func () {
@@ -1435,3 +1588,50 @@ func copyNumaCoreSiblings(src map[int]map[int][]int) map[int]map[int][]int {
14351588 }
14361589 return dst
14371590}
1591+
1592+ // getHousekeepingCPUsFromEnv extracts the OPENSHIFT_HOUSEKEEPING_CPUS environment variable from the pod and returns it as a CPUSet.
1593+ func getHousekeepingCPUsFromEnv (pod * corev1.Pod ) (cpuset.CPUSet , error ) {
1594+ const housekeepingCpusEnv = "OPENSHIFT_HOUSEKEEPING_CPUS"
1595+
1596+ cmd := []string {"printenv" , housekeepingCpusEnv }
1597+ output , err := pods .ExecCommandOnPod (testclient .K8sClient , pod , "" , cmd )
1598+ if err != nil {
1599+ return cpuset .New (), fmt .Errorf ("failed to get %s from pod %s/%s: %v" , housekeepingCpusEnv , pod .Namespace , pod .Name , err )
1600+ }
1601+
1602+ value := strings .TrimSpace (string (output ))
1603+ if value == "" {
1604+ return cpuset .New (), fmt .Errorf ("%s environment variable not found or empty in pod %s/%s" , housekeepingCpusEnv , pod .Namespace , pod .Name )
1605+ }
1606+
1607+ cpuSet , err := cpuset .Parse (value )
1608+ if err != nil {
1609+ return cpuset .New (), fmt .Errorf ("failed to parse %s value %q from pod %s/%s: %v" , housekeepingCpusEnv , value , pod .Namespace , pod .Name , err )
1610+ }
1611+
1612+ return cpuSet , nil
1613+ }
1614+
1615+ // getTestPodWithProfileAndAnnotations creates a test pod with specified profile and annotations
1616+ func getTestPodWithProfileAndAnnotations (perfProf * performancev2.PerformanceProfile , annotations map [string ]string , cpus int ) * corev1.Pod {
1617+ testpod := pods .GetTestPod ()
1618+ if len (annotations ) > 0 {
1619+ testpod .Annotations = annotations
1620+ }
1621+ testpod .Namespace = testutils .NamespaceTesting
1622+
1623+ cpuCount := fmt .Sprintf ("%d" , cpus )
1624+ resCpu := resource .MustParse (cpuCount )
1625+ resMem := resource .MustParse ("256Mi" )
1626+ testpod .Spec .Containers [0 ].Resources = corev1.ResourceRequirements {
1627+ Limits : corev1.ResourceList {
1628+ corev1 .ResourceCPU : resCpu ,
1629+ corev1 .ResourceMemory : resMem ,
1630+ },
1631+ }
1632+ if perfProf != nil {
1633+ runtimeClassName := components .GetComponentName (perfProf .Name , components .ComponentNamePrefix )
1634+ testpod .Spec .RuntimeClassName = & runtimeClassName
1635+ }
1636+ return testpod
1637+ }
0 commit comments