Skip to content

Commit 2c79b90

Browse files
sutaakaropenshift-merge-bot[bot]
authored andcommitted
Remove GPU utilization verification
1 parent 5250144 commit 2c79b90

File tree

1 file changed

+0
-22
lines changed

1 file changed

+0
-22
lines changed

tests/kfto/kfto_training_test.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package kfto
1919
import (
2020
"fmt"
2121
"testing"
22-
"time"
2322

2423
kftov1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
2524
. "github.com/onsi/gomega"
@@ -183,27 +182,6 @@ func runKFTOPyTorchJob(t *testing.T, image string, gpu Accelerator, numGpus, num
183182
test.Eventually(PyTorchJob(test, namespace, tuningJob.Name), TestTimeoutDouble).
184183
Should(WithTransform(PyTorchJobConditionRunning, Equal(corev1.ConditionTrue)))
185184

186-
// Verify GPU utilization
187-
if IsOpenShift(test) && gpu == NVIDIA {
188-
trainingPods := GetPods(test, namespace, metav1.ListOptions{LabelSelector: "training.kubeflow.org/job-name=" + tuningJob.GetName()})
189-
test.Expect(trainingPods).To(HaveLen(numberOfWorkerNodes + 1)) // +1 is a master node
190-
191-
for _, trainingPod := range trainingPods {
192-
// Check that GPUs for training pods were utilized recently
193-
test.Eventually(OpenShiftPrometheusGpuUtil(test, trainingPod, gpu), 10*time.Minute).
194-
Should(
195-
And(
196-
HaveLen(numGpus),
197-
ContainElement(
198-
// Check that at least some GPU was utilized on more than 10%
199-
HaveField("Value", BeNumerically(">", 10)),
200-
),
201-
),
202-
)
203-
}
204-
test.T().Log("All GPUs were successfully utilized")
205-
}
206-
207185
// Make sure the PyTorch job succeeded
208186
test.Eventually(PyTorchJob(test, namespace, tuningJob.Name), TestTimeoutLong).Should(WithTransform(PyTorchJobConditionSucceeded, Equal(corev1.ConditionTrue)))
209187
test.T().Logf("PytorchJob %s/%s ran successfully", tuningJob.Namespace, tuningJob.Name)

0 commit comments

Comments
 (0)