Skip to content

Commit 6192483

Browse files
sutaakaropenshift-merge-bot[bot]
authored andcommitted
Use proper job name and label for Trainer v2 FMS tests
1 parent 371fdf2 commit 6192483

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

tests/fms/trainer/sft_trainjob_gpu_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ func createAlpacaTrainJob(test Test, namespace, runtimeName string, config corev
196196
PodTemplateOverrides: []trainerv1alpha1.PodTemplateOverride{
197197
{
198198
TargetJobs: []trainerv1alpha1.PodTemplateOverrideTargetJob{
199-
{Name: "trainer"},
199+
{Name: "node"},
200200
},
201201
Spec: &trainerv1alpha1.PodTemplateSpecOverride{
202202
Tolerations: []corev1.Toleration{
@@ -342,10 +342,10 @@ var mountModelVolumeIntoTrainer = ErrorOption[*trainerv1alpha1.TrainJob](func(to
342342
MountPath: "/mnt/model",
343343
}
344344

345-
// Find the trainer pod template override and add the volume and volume mount
345+
// Find the trainer/node pod template override and add the volume and volume mount
346346
for i := range to.Spec.PodTemplateOverrides {
347347
for _, target := range to.Spec.PodTemplateOverrides[i].TargetJobs {
348-
if target.Name == "trainer" && to.Spec.PodTemplateOverrides[i].Spec != nil {
348+
if target.Name == "node" && to.Spec.PodTemplateOverrides[i].Spec != nil {
349349
to.Spec.PodTemplateOverrides[i].Spec.Volumes = append(to.Spec.PodTemplateOverrides[i].Spec.Volumes, modelVolume)
350350

351351
// Find the node container and add the volume mount
@@ -387,15 +387,15 @@ func createMultiGpuTrainingRuntime(test Test, namespace string, numberOfGpus int
387387
Spec: jobsetv1alpha2.JobSetSpec{
388388
ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{
389389
{
390-
Name: "trainer",
390+
Name: "node",
391391
Template: batchv1.JobTemplateSpec{
392+
ObjectMeta: metav1.ObjectMeta{
393+
Labels: map[string]string{
394+
"trainer.kubeflow.org/trainjob-ancestor-step": "trainer",
395+
},
396+
},
392397
Spec: batchv1.JobSpec{
393398
Template: corev1.PodTemplateSpec{
394-
ObjectMeta: metav1.ObjectMeta{
395-
Labels: map[string]string{
396-
"trainer.kubeflow.org/trainjob-ancestor-step": "trainer",
397-
},
398-
},
399399
Spec: corev1.PodSpec{
400400
Tolerations: []corev1.Toleration{
401401
{

tests/fms/trainer/sft_trainjob_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ func createSftTrainJob(test Test, namespace, runtimeName, localQueueName string,
232232
PodTemplateOverrides: []trainerv1alpha1.PodTemplateOverride{
233233
{
234234
TargetJobs: []trainerv1alpha1.PodTemplateOverrideTargetJob{
235-
{Name: "trainer"},
235+
{Name: "node"},
236236
},
237237
Spec: &trainerv1alpha1.PodTemplateSpecOverride{
238238
Tolerations: []corev1.Toleration{
@@ -357,15 +357,15 @@ func createSingleGpuTrainingRuntime(test Test, namespace string) *trainerv1alpha
357357
Spec: jobsetv1alpha2.JobSetSpec{
358358
ReplicatedJobs: []jobsetv1alpha2.ReplicatedJob{
359359
{
360-
Name: "trainer",
360+
Name: "node",
361361
Template: batchv1.JobTemplateSpec{
362+
ObjectMeta: metav1.ObjectMeta{
363+
Labels: map[string]string{
364+
"trainer.kubeflow.org/trainjob-ancestor-step": "trainer",
365+
},
366+
},
362367
Spec: batchv1.JobSpec{
363368
Template: corev1.PodTemplateSpec{
364-
ObjectMeta: metav1.ObjectMeta{
365-
Labels: map[string]string{
366-
"trainer.kubeflow.org/trainjob-ancestor-step": "trainer",
367-
},
368-
},
369369
Spec: corev1.PodSpec{
370370
Tolerations: []corev1.Toleration{
371371
{

0 commit comments

Comments
 (0)