@@ -514,12 +514,32 @@ func (r *AppWrapperReconciler) getPodStatus(ctx context.Context, aw *workloadv1b
514514 return summary , nil
515515}
516516
517+ //gocyclo:ignore
517518func (r * AppWrapperReconciler ) getComponentStatus (ctx context.Context , aw * workloadv1beta2.AppWrapper ) (* componentStatusSummary , error ) {
518519 summary := & componentStatusSummary {expected : int32 (len (aw .Status .ComponentStatus ))}
519520
520521 for componentIdx := range aw .Status .ComponentStatus {
521522 cs := & aw .Status .ComponentStatus [componentIdx ]
522523 switch cs .APIVersion + ":" + cs .Kind {
524+
525+ case "batch/v1:Job" :
526+ obj := & batchv1.Job {}
527+ if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
528+ if obj .GetDeletionTimestamp ().IsZero () {
529+ summary .deployed += 1
530+
531+ // batch/v1 Jobs are failed when status.Conditions contains an entry with type "Failed" and status "True"
532+ for _ , jc := range obj .Status .Conditions {
533+ if jc .Type == batchv1 .JobFailed && jc .Status == v1 .ConditionTrue {
534+ summary .failed += 1
535+ }
536+ }
537+ }
538+
539+ } else if ! apierrors .IsNotFound (err ) {
540+ return nil , err
541+ }
542+
523543 case "kubeflow.org/v1:PyTorchJob" :
524544 obj := & unstructured.Unstructured {}
525545 obj .SetAPIVersion (cs .APIVersion )
@@ -555,20 +575,52 @@ func (r *AppWrapperReconciler) getComponentStatus(ctx context.Context, aw *workl
555575 return nil , err
556576 }
557577
558- case "batch/v1:Job" :
559- obj := & batchv1.Job {}
578+ case "ray.io/v1:RayCluster" :
579+ obj := & unstructured.Unstructured {}
580+ obj .SetAPIVersion (cs .APIVersion )
581+ obj .SetKind (cs .Kind )
560582 if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
561583 if obj .GetDeletionTimestamp ().IsZero () {
562584 summary .deployed += 1
563585
564- // batch/v1 Jobs are failed when status.Conditions contains an entry with type "Failed" and status "True"
565- for _ , jc := range obj .Status .Conditions {
566- if jc .Type == batchv1 .JobFailed && jc .Status == v1 .ConditionTrue {
567- summary .failed += 1
568- }
586+ // RayCluster is failed if status.State is "failed"
587+ status , ok := obj .UnstructuredContent ()["status" ]
588+ if ! ok {
589+ continue
590+ }
591+ state , ok := status .(map [string ]interface {})["state" ]
592+ if ! ok {
593+ continue
594+ }
595+ if state .(string ) == "failed" {
596+ summary .failed += 1
569597 }
570598 }
599+ } else if ! apierrors .IsNotFound (err ) {
600+ return nil , err
601+ }
602+
603+ case "ray.io/v1:RayJob" :
604+ obj := & unstructured.Unstructured {}
605+ obj .SetAPIVersion (cs .APIVersion )
606+ obj .SetKind (cs .Kind )
607+ if err := r .Get (ctx , types.NamespacedName {Name : cs .Name , Namespace : aw .Namespace }, obj ); err == nil {
608+ if obj .GetDeletionTimestamp ().IsZero () {
609+ summary .deployed += 1
571610
611+ // RayJob is failed if status.jobsStatus is "FAILED"
612+ status , ok := obj .UnstructuredContent ()["status" ]
613+ if ! ok {
614+ continue
615+ }
616+ jobStatus , ok := status .(map [string ]interface {})["jobStatus" ]
617+ if ! ok {
618+ continue
619+ }
620+ if jobStatus .(string ) == "FAILED" {
621+ summary .failed += 1
622+ }
623+ }
572624 } else if ! apierrors .IsNotFound (err ) {
573625 return nil , err
574626 }
0 commit comments