@@ -1024,14 +1024,16 @@ func TestResumeMPIJob(t *testing.T) {
10241024 // resume the MPIJob
10251025 mpiJob .Spec .RunPolicy .Suspend = ptr .To (false )
10261026
1027- // expect creation of the pods
1027+ // expect creation of the worker pods
10281028 for i := 0 ; i < int (replicas ); i ++ {
10291029 worker := fmjc .newWorker (mpiJob , i )
10301030 f .kubeActions = append (f .kubeActions , core .NewCreateAction (schema.GroupVersionResource {Resource : "pods" }, mpiJob .Namespace , worker ))
10311031 }
10321032
1033- // expect the launcher update to resume it
1033+ // expect the launcher update to sync scheduling directives and resume it
10341034 launcherCopy := launcher .DeepCopy ()
1035+ desiredPodTemplate := fmjc .newLauncherPodTemplate (mpiJob )
1036+ syncLauncherSchedulingDirectives (launcherCopy , & desiredPodTemplate )
10351037 launcherCopy .Spec .Suspend = ptr .To (false )
10361038 f .expectUpdateJobAction (launcherCopy )
10371039
@@ -1044,6 +1046,183 @@ func TestResumeMPIJob(t *testing.T) {
10441046 f .runWithClock (t .Context (), getKey (mpiJob , t ), fakeClock )
10451047}
10461048
1049+ func TestResumeMPIJobWithExistingLauncher (t * testing.T ) {
1050+ // Tests the running→suspended→resumed path where a launcher already exists
1051+ // (from before suspension) with startTime == nil. The launcher should be
1052+ // updated in place with synced scheduling directives (KEP-2926).
1053+ fakeClock := clocktesting .NewFakeClock (time .Now ().Truncate (time .Second ))
1054+ f := newFixture (t , "" )
1055+
1056+ var replicas int32 = 8
1057+ startTime := metav1 .Now ()
1058+ mpiJob := newMPIJob ("test" , & replicas , & startTime , nil )
1059+ mpiJob .Spec .RunPolicy .Suspend = ptr .To (true )
1060+ msg := fmt .Sprintf ("MPIJob %s/%s is created." , mpiJob .Namespace , mpiJob .Name )
1061+ updateMPIJobConditions (mpiJob , kubeflow .JobCreated , corev1 .ConditionTrue , mpiJobCreatedReason , msg )
1062+ updateMPIJobConditions (mpiJob , kubeflow .JobSuspended , corev1 .ConditionTrue , mpiJobSuspendedReason , "MPIJob suspended" )
1063+ msg = fmt .Sprintf ("MPIJob %s/%s is suspended." , mpiJob .Namespace , mpiJob .Name )
1064+ updateMPIJobConditions (mpiJob , kubeflow .JobRunning , corev1 .ConditionFalse , mpiJobSuspendedReason , msg )
1065+ mpiJob .Status .ReplicaStatuses = map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaStatus {
1066+ kubeflow .MPIReplicaTypeLauncher : {},
1067+ kubeflow .MPIReplicaTypeWorker : {},
1068+ }
1069+ f .setUpMPIJob (mpiJob )
1070+
1071+ scheme .Scheme .Default (mpiJob )
1072+ f .expectCreateServiceAction (newJobService (mpiJob ))
1073+ cfgMap := newConfigMap (mpiJob , replicas , "" )
1074+ updateDiscoverHostsInConfigMap (cfgMap , mpiJob , nil , "" )
1075+ f .setUpConfigMap (cfgMap )
1076+ secret , err := newSSHAuthSecret (mpiJob )
1077+ if err != nil {
1078+ t .Fatalf ("Failed creating secret" )
1079+ }
1080+ f .setUpSecret (secret )
1081+
1082+ // set up an existing suspended launcher (startTime == nil, never started)
1083+ fmjc := f .newFakeMPIJobController ()
1084+ launcher := fmjc .newLauncherJob (mpiJob )
1085+ launcher .Spec .Suspend = ptr .To (true )
1086+ // Simulate Kueue injecting scheduling directives into the MPIJob template
1087+ // after the launcher was already created (so the launcher has stale templates).
1088+ launcherSpec := & mpiJob .Spec .MPIReplicaSpecs [kubeflow .MPIReplicaTypeLauncher ].Template
1089+ launcherSpec .Spec .NodeSelector = map [string ]string {
1090+ "foo" : "bar" ,
1091+ }
1092+ launcherSpec .Spec .Tolerations = []corev1.Toleration {
1093+ {Key : "gpu" , Operator : corev1 .TolerationOpEqual , Value : "true" , Effect : corev1 .TaintEffectNoSchedule },
1094+ }
1095+ launcherSpec .Spec .SchedulingGates = []corev1.PodSchedulingGate {
1096+ {Name : "kueue.x-k8s.io/topology" },
1097+ }
1098+ if launcherSpec .Annotations == nil {
1099+ launcherSpec .Annotations = make (map [string ]string )
1100+ }
1101+ launcherSpec .Annotations ["kueue.x-k8s.io/workload" ] = "my-workload"
1102+ f .setUpLauncher (launcher )
1103+
1104+ fakeClock .Sleep (time .Second )
1105+
1106+ // resume the MPIJob
1107+ mpiJob .Spec .RunPolicy .Suspend = ptr .To (false )
1108+
1109+ // expect creation of the worker pods
1110+ for i := 0 ; i < int (replicas ); i ++ {
1111+ worker := fmjc .newWorker (mpiJob , i )
1112+ f .kubeActions = append (f .kubeActions , core .NewCreateAction (schema.GroupVersionResource {Resource : "pods" }, mpiJob .Namespace , worker ))
1113+ }
1114+
1115+ // expect the launcher to be updated (scheduling directives synced + unsuspended)
1116+ launcherCopy := launcher .DeepCopy ()
1117+ desiredPodTemplate := fmjc .newLauncherPodTemplate (mpiJob )
1118+ syncLauncherSchedulingDirectives (launcherCopy , & desiredPodTemplate )
1119+ launcherCopy .Spec .Suspend = ptr .To (false )
1120+
1121+ // Verify the synced launcher has the Kueue-injected scheduling directives.
1122+ tmpl := & launcherCopy .Spec .Template
1123+ if tmpl .Spec .NodeSelector ["foo" ] != "bar" {
1124+ t .Errorf ("expected nodeSelector to be synced, got %v" , tmpl .Spec .NodeSelector )
1125+ }
1126+ if len (tmpl .Spec .Tolerations ) != 1 || tmpl .Spec .Tolerations [0 ].Key != "gpu" {
1127+ t .Errorf ("expected tolerations to be synced, got %v" , tmpl .Spec .Tolerations )
1128+ }
1129+ if len (tmpl .Spec .SchedulingGates ) != 1 || tmpl .Spec .SchedulingGates [0 ].Name != "kueue.x-k8s.io/topology" {
1130+ t .Errorf ("expected schedulingGates to be synced, got %v" , tmpl .Spec .SchedulingGates )
1131+ }
1132+ if tmpl .Annotations ["kueue.x-k8s.io/workload" ] != "my-workload" {
1133+ t .Errorf ("expected annotations to be synced, got %v" , tmpl .Annotations )
1134+ }
1135+
1136+ f .expectUpdateJobAction (launcherCopy )
1137+
1138+ // expect status update
1139+ mpiJobCopy := mpiJob .DeepCopy ()
1140+ mpiJobCopy .Status .StartTime = & metav1.Time {Time : fakeClock .Now ()}
1141+ updateMPIJobConditions (mpiJobCopy , kubeflow .JobSuspended , corev1 .ConditionFalse , "MPIJobResumed" , "MPIJob resumed" )
1142+ f .expectUpdateMPIJobStatusAction (mpiJobCopy )
1143+
1144+ f .runWithClock (t .Context (), getKey (mpiJob , t ), fakeClock )
1145+ }
1146+
1147+ func TestResumeMPIJobClearsStartTime (t * testing.T ) {
1148+ // Tests the re-admission case where the launcher has startTime != nil.
1149+ // The controller should clear StartTime via a status sub-resource update
1150+ // (consistent with JobSet), then sync scheduling directives and unsuspend.
1151+ fakeClock := clocktesting .NewFakeClock (time .Now ().Truncate (time .Second ))
1152+ f := newFixture (t , "" )
1153+
1154+ var replicas int32 = 8
1155+ startTime := metav1 .Now ()
1156+ mpiJob := newMPIJob ("test" , & replicas , & startTime , nil )
1157+ mpiJob .Spec .RunPolicy .Suspend = ptr .To (true )
1158+ msg := fmt .Sprintf ("MPIJob %s/%s is created." , mpiJob .Namespace , mpiJob .Name )
1159+ updateMPIJobConditions (mpiJob , kubeflow .JobCreated , corev1 .ConditionTrue , mpiJobCreatedReason , msg )
1160+ updateMPIJobConditions (mpiJob , kubeflow .JobSuspended , corev1 .ConditionTrue , mpiJobSuspendedReason , "MPIJob suspended" )
1161+ msg = fmt .Sprintf ("MPIJob %s/%s is suspended." , mpiJob .Namespace , mpiJob .Name )
1162+ updateMPIJobConditions (mpiJob , kubeflow .JobRunning , corev1 .ConditionFalse , mpiJobSuspendedReason , msg )
1163+ mpiJob .Status .ReplicaStatuses = map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaStatus {
1164+ kubeflow .MPIReplicaTypeLauncher : {},
1165+ kubeflow .MPIReplicaTypeWorker : {},
1166+ }
1167+ f .setUpMPIJob (mpiJob )
1168+
1169+ scheme .Scheme .Default (mpiJob )
1170+ f .expectCreateServiceAction (newJobService (mpiJob ))
1171+ cfgMap := newConfigMap (mpiJob , replicas , "" )
1172+ updateDiscoverHostsInConfigMap (cfgMap , mpiJob , nil , "" )
1173+ f .setUpConfigMap (cfgMap )
1174+ secret , err := newSSHAuthSecret (mpiJob )
1175+ if err != nil {
1176+ t .Fatalf ("Failed creating secret" )
1177+ }
1178+ f .setUpSecret (secret )
1179+
1180+ // set up an existing suspended launcher that was previously started (startTime != nil)
1181+ fmjc := f .newFakeMPIJobController ()
1182+ launcher := fmjc .newLauncherJob (mpiJob )
1183+ launcher .Spec .Suspend = ptr .To (true )
1184+ launcherStartTime := metav1 .Now ()
1185+ launcher .Status .StartTime = & launcherStartTime
1186+ f .setUpLauncher (launcher )
1187+
1188+ fakeClock .Sleep (time .Second )
1189+
1190+ // resume the MPIJob
1191+ mpiJob .Spec .RunPolicy .Suspend = ptr .To (false )
1192+
1193+ // expect creation of worker pods
1194+ for i := 0 ; i < int (replicas ); i ++ {
1195+ worker := fmjc .newWorker (mpiJob , i )
1196+ f .kubeActions = append (f .kubeActions , core .NewCreateAction (schema.GroupVersionResource {Resource : "pods" }, mpiJob .Namespace , worker ))
1197+ }
1198+
1199+ // expect a status sub-resource update to clear launcher's StartTime
1200+ launcherStatusCleared := launcher .DeepCopy ()
1201+ launcherStatusCleared .Status .StartTime = nil
1202+ f .kubeActions = append (f .kubeActions , core .NewUpdateSubresourceAction (
1203+ schema.GroupVersionResource {Resource : "jobs" , Group : "batch" , Version : "v1" },
1204+ "status" ,
1205+ mpiJob .Namespace ,
1206+ launcherStatusCleared ,
1207+ ))
1208+
1209+ // expect the launcher to be updated (scheduling directives synced + unsuspended)
1210+ launcherCopy := launcher .DeepCopy ()
1211+ launcherCopy .Status .StartTime = nil
1212+ desiredPodTemplate := fmjc .newLauncherPodTemplate (mpiJob )
1213+ syncLauncherSchedulingDirectives (launcherCopy , & desiredPodTemplate )
1214+ launcherCopy .Spec .Suspend = ptr .To (false )
1215+ f .expectUpdateJobAction (launcherCopy )
1216+
1217+ // expect MPIJob status update
1218+ mpiJobCopy := mpiJob .DeepCopy ()
1219+ mpiJobCopy .Status .StartTime = & metav1.Time {Time : fakeClock .Now ()}
1220+ updateMPIJobConditions (mpiJobCopy , kubeflow .JobSuspended , corev1 .ConditionFalse , "MPIJobResumed" , "MPIJob resumed" )
1221+ f .expectUpdateMPIJobStatusAction (mpiJobCopy )
1222+
1223+ f .runWithClock (t .Context (), getKey (mpiJob , t ), fakeClock )
1224+ }
1225+
10471226func TestWorkerNotControlledByUs (t * testing.T ) {
10481227 f := newFixture (t , "" )
10491228 startTime := metav1 .Now ()
0 commit comments