@@ -19,7 +19,6 @@ package kfto
19
19
import (
20
20
"fmt"
21
21
"testing"
22
- "time"
23
22
24
23
kftov1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
25
24
. "github.com/onsi/gomega"
@@ -183,27 +182,6 @@ func runKFTOPyTorchJob(t *testing.T, image string, gpu Accelerator, numGpus, num
183
182
test .Eventually (PyTorchJob (test , namespace , tuningJob .Name ), TestTimeoutDouble ).
184
183
Should (WithTransform (PyTorchJobConditionRunning , Equal (corev1 .ConditionTrue )))
185
184
186
- // Verify GPU utilization
187
- if IsOpenShift (test ) && gpu == NVIDIA {
188
- trainingPods := GetPods (test , namespace , metav1.ListOptions {LabelSelector : "training.kubeflow.org/job-name=" + tuningJob .GetName ()})
189
- test .Expect (trainingPods ).To (HaveLen (numberOfWorkerNodes + 1 )) // +1 is a master node
190
-
191
- for _ , trainingPod := range trainingPods {
192
- // Check that GPUs for training pods were utilized recently
193
- test .Eventually (OpenShiftPrometheusGpuUtil (test , trainingPod , gpu ), 10 * time .Minute ).
194
- Should (
195
- And (
196
- HaveLen (numGpus ),
197
- ContainElement (
198
- // Check that at least some GPU was utilized on more than 10%
199
- HaveField ("Value" , BeNumerically (">" , 10 )),
200
- ),
201
- ),
202
- )
203
- }
204
- test .T ().Log ("All GPUs were successfully utilized" )
205
- }
206
-
207
185
// Make sure the PyTorch job succeeded
208
186
test .Eventually (PyTorchJob (test , namespace , tuningJob .Name ), TestTimeoutLong ).Should (WithTransform (PyTorchJobConditionSucceeded , Equal (corev1 .ConditionTrue )))
209
187
test .T ().Logf ("PytorchJob %s/%s ran successfully" , tuningJob .Namespace , tuningJob .Name )
0 commit comments