@@ -224,7 +224,7 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request)
224
224
// mode is not stuck in the `Running` status indefinitely.
225
225
namespacedName := common .RayJobK8sJobNamespacedName (rayJobInstance )
226
226
if err := r .Client .Get (ctx , namespacedName , job ); err != nil {
227
- logger .Error (err , "Failed to get the submitter Kubernetes Job" , "NamespacedName" , namespacedName )
227
+ logger .Error (err , "Failed to get the submitter Kubernetes Job for RayJob " , "NamespacedName" , namespacedName )
228
228
return ctrl.Result {RequeueAfter : RayJobDefaultRequeueDuration }, err
229
229
}
230
230
if shouldUpdate := r .checkK8sJobAndUpdateStatusIfNeeded (ctx , rayJobInstance , job ); shouldUpdate {
@@ -443,7 +443,7 @@ func (r *RayJobReconciler) createK8sJobIfNeed(ctx context.Context, rayJobInstanc
443
443
return err
444
444
}
445
445
446
- logger .Info ("Kubernetes Job already exists" , "RayJob" , rayJobInstance .Name , "Kubernetes Job" , job .Name )
446
+ logger .Info ("The submitter Kubernetes Job for RayJob already exists" , "RayJob" , rayJobInstance .Name , "Kubernetes Job" , job .Name )
447
447
return nil
448
448
}
449
449
@@ -528,12 +528,12 @@ func (r *RayJobReconciler) createNewK8sJob(ctx context.Context, rayJobInstance *
528
528
529
529
// Create the Kubernetes Job
530
530
if err := r .Client .Create (ctx , job ); err != nil {
531
- logger .Error (err , "Failed to create new Kubernetes Job" )
532
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , "k8sJobCreationFailed" , "Failed to create new Kubernetes Job %s: %v" , job .Name , err )
531
+ logger .Error (err , "Failed to create new submitter Kubernetes Job for RayJob " )
532
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string ( utils . FailedToCreateRayJobSubmitter ) , "Failed to create new Kubernetes Job %s/%s : %v" , job . Namespace , job .Name , err )
533
533
return err
534
534
}
535
- logger .Info ("Kubernetes Job created " , "RayJob" , rayJobInstance .Name , "Kubernetes Job" , job .Name )
536
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , "k8sJobCreationCreated" , "Created Kubernetes Job %s" , job .Name )
535
+ logger .Info ("Created submitter Kubernetes Job for RayJob " , "RayJob" , rayJobInstance .Name , "Kubernetes Job" , job .Name )
536
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , string ( utils . CreatedRayJobSubmitter ) , "Created Kubernetes Job %s/%s" , job . Namespace , job .Name )
537
537
return nil
538
538
}
539
539
@@ -559,13 +559,14 @@ func (r *RayJobReconciler) deleteSubmitterJob(ctx context.Context, rayJobInstanc
559
559
}
560
560
} else {
561
561
if ! job .DeletionTimestamp .IsZero () {
562
- logger .Info ("The Job deletion is ongoing." , "RayJob" , rayJobInstance .Name , "Submitter K8s Job" , job .Name )
562
+ logger .Info ("The deletion of submitter Kubernetes Job for RayJob is ongoing." , "RayJob" , rayJobInstance .Name , "Submitter K8s Job" , job .Name )
563
563
} else {
564
564
if err := r .Client .Delete (ctx , job , client .PropagationPolicy (metav1 .DeletePropagationBackground )); err != nil {
565
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string (utils .FailedToDeleteRayJobSubmitter ), "Failed to delete submitter K8s Job %s/%s: %v" , job .Namespace , job .Name , err )
565
566
return false , err
566
567
}
567
- logger .Info ("The associated submitter Job is deleted" , "RayJob" , rayJobInstance .Name , "Submitter K8s Job" , job .Name )
568
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , "Deleted" , "Deleted submitter K8s Job %s" , job .Name )
568
+ logger .Info ("The associated submitter Kubernetes Job for RayJob is deleted" , "RayJob" , rayJobInstance .Name , "Submitter K8s Job" , job .Name )
569
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , string ( utils . DeletedRayJobSubmitter ) , "Deleted submitter K8s Job %s/%s" , job . Namespace , job .Name )
569
570
}
570
571
}
571
572
@@ -585,19 +586,20 @@ func (r *RayJobReconciler) deleteClusterResources(ctx context.Context, rayJobIns
585
586
// If the cluster is not found, it means the cluster has been already deleted.
586
587
// Don't return error to make this function idempotent.
587
588
isClusterDeleted = true
588
- logger .Info ("The associated cluster has been already deleted and it can not be found" , "RayCluster" , clusterIdentifier )
589
+ logger .Info ("The associated RayCluster for RayJob has been already deleted and it can not be found" , "RayCluster" , clusterIdentifier , "RayJob" , rayJobInstance . Name )
589
590
} else {
590
591
return false , err
591
592
}
592
593
} else {
593
594
if ! cluster .DeletionTimestamp .IsZero () {
594
- logger .Info ("The cluster deletion is ongoing." , "rayjob " , rayJobInstance .Name , "raycluster " , cluster .Name )
595
+ logger .Info ("The deletion of the associated RayCluster for RayJob is ongoing." , "RayJob " , rayJobInstance .Name , "RayCluster " , cluster .Name )
595
596
} else {
596
597
if err := r .Delete (ctx , & cluster ); err != nil {
598
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string (utils .FailedToDeleteRayCluster ), "Failed to delete cluster %s/%s: %v" , cluster .Namespace , cluster .Name , err )
597
599
return false , err
598
600
}
599
- logger .Info ("The associated cluster is deleted" , "RayCluster" , clusterIdentifier )
600
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , "Deleted" , "Deleted cluster %s" , rayJobInstance . Status . RayClusterName )
601
+ logger .Info ("The associated RayCluster for RayJob is deleted" , "RayCluster" , clusterIdentifier , "RayJob" , rayJobInstance . Name )
602
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , string ( utils . DeletedRayCluster ) , "Deleted cluster %s/%s " , cluster . Namespace , cluster . Name )
601
603
}
602
604
}
603
605
@@ -712,14 +714,15 @@ func (r *RayJobReconciler) getOrCreateRayClusterInstance(ctx context.Context, ra
712
714
return nil , err
713
715
}
714
716
if err := r .Create (ctx , rayClusterInstance ); err != nil {
717
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeWarning , string (utils .FailedToCreateRayCluster ), "Failed to create RayCluster %s/%s: %v" , rayClusterInstance .Namespace , rayClusterInstance .Name , err )
715
718
return nil , err
716
719
}
717
- r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , "Created" , "Created RayCluster %s" , rayJobInstance . Status . RayClusterName )
720
+ r .Recorder .Eventf (rayJobInstance , corev1 .EventTypeNormal , string ( utils . CreatedRayCluster ) , "Created RayCluster %s/%s " , rayClusterInstance . Namespace , rayClusterInstance . Name )
718
721
} else {
719
722
return nil , err
720
723
}
721
724
}
722
- logger .Info ("Found associated RayCluster for RayJob" , "RayJob" , rayJobInstance .Name , "RayCluster" , rayClusterNamespacedName )
725
+ logger .Info ("Found the associated RayCluster for RayJob" , "RayJob" , rayJobInstance .Name , "RayCluster" , rayClusterNamespacedName )
723
726
724
727
// Verify that RayJob is not in cluster selector mode first to avoid nil pointer dereference error during spec comparison.
725
728
// This is checked by ensuring len(rayJobInstance.Spec.ClusterSelector) equals 0.
0 commit comments