Skip to content

Commit f232b5b

Browse files
Add RayClusterProvisioned Condition Type (#2301)
1 parent 7b81970 commit f232b5b

File tree

4 files changed

+44
-52
lines changed

4 files changed

+44
-52
lines changed

ray-operator/apis/ray/v1/raycluster_types.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -168,17 +168,17 @@ type RayClusterConditionType string
168168

169169
// Custom Reason for RayClusterCondition
170170
const (
171-
AllPodRunningAndReady = "AllPodRunningAndReady"
172-
HeadPodNotFound = "HeadPodNotFound"
173-
HeadPodRunningAndReady = "HeadPodRunningAndReady"
171+
AllPodRunningAndReadyFirstTime = "AllPodRunningAndReadyFirstTime"
172+
HeadPodNotFound = "HeadPodNotFound"
173+
HeadPodRunningAndReady = "HeadPodRunningAndReady"
174174
// UnknownReason says that the reason for the condition is unknown.
175175
UnknownReason = "Unknown"
176176
)
177177

178178
const (
179-
// RayClusterReady indicates whether all Ray Pods are ready when the RayCluster is first created.
180-
// After RayClusterReady is set to true for the first time, it only indicates whether the RayCluster's head Pod is ready for requests.
181-
RayClusterReady RayClusterConditionType = "RayClusterReady"
179+
// RayClusterProvisioned indicates whether all Ray Pods are ready for the first time.
180+
// After RayClusterProvisioned is set to true for the first time, it will not change anymore.
181+
RayClusterProvisioned RayClusterConditionType = "RayClusterProvisioned"
182182
// HeadPodReady indicates whether RayCluster's head Pod is ready for requests.
183183
HeadPodReady RayClusterConditionType = "HeadPodReady"
184184
// RayClusterReplicaFailure is added in a RayCluster when one of its pods fails to be created or deleted.

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,25 +1210,17 @@ func (r *RayClusterReconciler) calculateStatus(ctx context.Context, instance *ra
12101210
meta.SetStatusCondition(&newInstance.Status.Conditions, headPodReadyCondition)
12111211
}
12121212

1213-
if meta.FindStatusCondition(newInstance.Status.Conditions, string(rayv1.RayClusterReady)) == nil {
1214-
// RayClusterReady indicates whether all Ray Pods are ready when the RayCluster is first created.
1215-
// Note RayClusterReady StatusCondition will not be added to Raycluster until all Ray Pods are ready for the first time.
1213+
if meta.FindStatusCondition(newInstance.Status.Conditions, string(rayv1.RayClusterProvisioned)) == nil {
1214+
// RayClusterProvisioned indicates whether all Ray Pods are ready when the RayCluster is first created.
1215+
// Note RayClusterProvisioned StatusCondition will not be added to Raycluster until all Ray Pods are ready for the first time.
12161216
if utils.CheckAllPodsRunning(ctx, runtimePods) {
12171217
meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{
1218-
Type: string(rayv1.RayClusterReady),
1218+
Type: string(rayv1.RayClusterProvisioned),
12191219
Status: metav1.ConditionTrue,
1220-
Reason: rayv1.AllPodRunningAndReady,
1221-
Message: "All Ray Pods are ready. Future checks focus on the head",
1220+
Reason: rayv1.AllPodRunningAndReadyFirstTime,
1221+
Message: "All Ray Pods are ready for the first time",
12221222
})
12231223
}
1224-
} else { // After RayClusterReady is set to true for the first time, its meaning changes to be the same as HeadPodReady.
1225-
headPodReadyCondition := meta.FindStatusCondition(newInstance.Status.Conditions, string(rayv1.HeadPodReady))
1226-
meta.SetStatusCondition(&newInstance.Status.Conditions, metav1.Condition{
1227-
Type: string(rayv1.RayClusterReady),
1228-
Status: headPodReadyCondition.Status,
1229-
Reason: headPodReadyCondition.Reason,
1230-
Message: "Only check head after all Ray Pods are initially ready",
1231-
})
12321224
}
12331225

12341226
}

ray-operator/controllers/ray/raycluster_controller_test.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,7 @@ var _ = Context("Inside the default namespace", func() {
834834
DeferCleanup(cleanUpFunc)
835835
})
836836

837-
It("Should handle HeadPodReady and RayClusterReady conditions correctly", func(ctx SpecContext) {
837+
It("Should handle HeadPodReady and RayClusterProvisioned conditions correctly", func(ctx SpecContext) {
838838
namespace := "default"
839839
rayCluster := rayClusterTemplate("raycluster-status-conditions-enabled", namespace)
840840
rayCluster.Spec.WorkerGroupSpecs[0].Replicas = ptr.To[int32](1)
@@ -873,7 +873,7 @@ var _ = Context("Inside the default namespace", func() {
873873
headPod = headPods.Items[0]
874874

875875
By("Check RayCluster conditions empty initially")
876-
// Initially, neither head Pod nor worker Pod are ready. The RayClusterReady condition should not be present.
876+
// Initially, neither head Pod nor worker Pod are ready. The RayClusterProvisioned condition should not be present.
877877
Expect(testRayCluster.Status.Conditions).To(BeEmpty())
878878

879879
By("Update the head pod to Running and Ready")
@@ -887,7 +887,7 @@ var _ = Context("Inside the default namespace", func() {
887887
Expect(k8sClient.Status().Update(ctx, &headPod)).Should(Succeed())
888888

889889
By("Check RayCluster HeadPodReady condition is true")
890-
// The head pod is ready, so RayClusterReady condition should be added and set to True.
890+
// The head pod is ready, so HeadPodReady condition should be added and set to True.
891891
Eventually(
892892
func() bool {
893893
if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil {
@@ -897,14 +897,14 @@ var _ = Context("Inside the default namespace", func() {
897897
},
898898
time.Second*3, time.Millisecond*500).Should(BeTrue())
899899

900-
By("Check RayCluster RayClusterReady condition is false")
901-
// But the worker pod is not ready yet, RayClusterReady condition should still be absent.
900+
By("Check RayCluster RayClusterProvisioned condition is false")
901+
// But the worker pod is not ready yet, RayClusterProvisioned condition should still be absent.
902902
Consistently(
903903
func() *metav1.Condition {
904904
if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil {
905905
return nil
906906
}
907-
return meta.FindStatusCondition(rayCluster.Status.Conditions, string(rayv1.RayClusterReady))
907+
return meta.FindStatusCondition(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))
908908
},
909909
time.Second*3, time.Millisecond*500).Should(BeNil())
910910

@@ -918,14 +918,14 @@ var _ = Context("Inside the default namespace", func() {
918918
}
919919
Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed())
920920

921-
By("Check RayCluster RayClusterReady condition is true")
922-
// All Ray Pods are ready for the first time, RayClusterReady condition should be added and set to True.
921+
By("Check RayCluster RayClusterProvisioned condition is true")
922+
// All Ray Pods are ready for the first time, RayClusterProvisioned condition should be added and set to True.
923923
Eventually(
924924
func() bool {
925925
if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil {
926926
return false
927927
}
928-
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterReady), metav1.ConditionTrue)
928+
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue)
929929
},
930930
time.Second*3, time.Millisecond*500).Should(BeTrue())
931931

@@ -938,15 +938,15 @@ var _ = Context("Inside the default namespace", func() {
938938
}
939939
Expect(k8sClient.Status().Update(ctx, &workerPod)).Should(Succeed())
940940

941-
By("Check RayCluster RayClusterReady condition is true")
942-
// The worker pod fails readiness, but since RayClusterReady focuses solely on the headPod after all Ray Pods were initially ready,
943-
// RayClusterReady condition should still be True.
941+
By("Check RayCluster RayClusterProvisioned condition is true")
942+
// The worker pod fails readiness, but since RayClusterProvisioned focuses solely on whether all Ray Pods are ready for the first time,
943+
// RayClusterProvisioned condition should still be True.
944944
Consistently(
945945
func() bool {
946946
if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil {
947947
return false
948948
}
949-
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterReady), metav1.ConditionTrue)
949+
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue)
950950
},
951951
time.Second*3, time.Millisecond*500).Should(BeTrue())
952952

@@ -970,14 +970,14 @@ var _ = Context("Inside the default namespace", func() {
970970
},
971971
time.Second*3, time.Millisecond*500).Should(BeTrue())
972972

973-
By("Check RayCluster RayClusterReady condition is false")
974-
// The head pod also fails readiness, RayClusterReady condition should set to False.
973+
By("Check RayCluster RayClusterProvisioned condition is still true")
974+
// The head pod also fails readiness, RayClusterProvisioned condition not changed.
975975
Eventually(
976976
func() bool {
977977
if err := getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: namespace}, rayCluster)(); err != nil {
978978
return false
979979
}
980-
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterReady), metav1.ConditionFalse)
980+
return meta.IsStatusConditionPresentAndEqual(rayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned), metav1.ConditionTrue)
981981
},
982982
time.Second*3, time.Millisecond*500).Should(BeTrue())
983983
})

ray-operator/controllers/ray/raycluster_controller_unit_test.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,7 @@ func TestCalculateStatus(t *testing.T) {
17481748
assert.True(t, meta.IsStatusConditionPresentAndEqual(newInstance.Status.Conditions, string(rayv1.RayClusterReplicaFailure), metav1.ConditionTrue))
17491749
}
17501750

1751-
func TestRayClusterReadyCondition(t *testing.T) {
1751+
func TestRayClusterProvisionedCondition(t *testing.T) {
17521752
setupTest(t)
17531753
defer features.SetFeatureGateDuringTest(t, features.RayClusterStatusConditions, true)()
17541754

@@ -1808,40 +1808,40 @@ func TestRayClusterReadyCondition(t *testing.T) {
18081808
Scheme: scheme.Scheme,
18091809
}
18101810

1811-
// Initially, neither head Pod nor worker Pod are ready. The RayClusterReady condition should not be present.
1811+
// Initially, neither head Pod nor worker Pod are ready. The RayClusterProvisioned condition should not be present.
18121812
headPod.Status = UnReadyStatus
18131813
workerPod.Status = UnReadyStatus
18141814
_ = fakeClient.Status().Update(ctx, headPod)
18151815
_ = fakeClient.Status().Update(ctx, workerPod)
18161816
testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil)
1817-
assert.Nil(t, meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterReady)))
1817+
assert.Nil(t, meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned)))
18181818

1819-
// After a while, all Ray Pods are ready for the first time, RayClusterReady condition should be added and set to True.
1819+
// After a while, all Ray Pods are ready for the first time, RayClusterProvisioned condition should be added and set to True.
18201820
headPod.Status = ReadyStatus
18211821
workerPod.Status = ReadyStatus
18221822
_ = fakeClient.Status().Update(ctx, headPod)
18231823
_ = fakeClient.Status().Update(ctx, workerPod)
18241824
testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil)
1825-
rayClusterReadyCondition := meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterReady))
1826-
assert.Equal(t, rayClusterReadyCondition.Status, metav1.ConditionTrue)
1827-
assert.Equal(t, rayClusterReadyCondition.Reason, rayv1.AllPodRunningAndReady)
1825+
rayClusterProvisionedCondition := meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))
1826+
assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue)
1827+
assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime)
18281828

1829-
// After a while, worker Pod fails readiness, but since RayClusterReady focuses solely on the headPod after all Ray Pods were initially ready,
1830-
// RayClusterReady condition should still be True.
1829+
// After a while, worker Pod fails readiness, but since RayClusterProvisioned focuses solely on whether all Ray Pods are ready for the first time,
1830+
// RayClusterProvisioned condition should still be True.
18311831
workerPod.Status = UnReadyStatus
18321832
_ = fakeClient.Status().Update(ctx, workerPod)
18331833
testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil)
1834-
rayClusterReadyCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterReady))
1835-
assert.Equal(t, rayClusterReadyCondition.Status, metav1.ConditionTrue)
1836-
assert.Equal(t, rayClusterReadyCondition.Reason, rayv1.HeadPodRunningAndReady)
1834+
rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))
1835+
assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue)
1836+
assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime)
18371837

1838-
// After a while, head Pod also fails readiness, RayClusterReady condition should set to False.
1838+
// After a while, head Pod also fails readiness, RayClusterProvisioned condition should still be true.
18391839
headPod.Status = UnReadyStatus
18401840
_ = fakeClient.Status().Update(ctx, headPod)
18411841
testRayCluster, _ = r.calculateStatus(ctx, testRayCluster, nil)
1842-
rayClusterReadyCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterReady))
1843-
assert.Equal(t, rayClusterReadyCondition.Status, metav1.ConditionFalse)
1844-
assert.Equal(t, rayClusterReadyCondition.Reason, rayv1.UnknownReason)
1842+
rayClusterProvisionedCondition = meta.FindStatusCondition(testRayCluster.Status.Conditions, string(rayv1.RayClusterProvisioned))
1843+
assert.Equal(t, rayClusterProvisionedCondition.Status, metav1.ConditionTrue)
1844+
assert.Equal(t, rayClusterProvisionedCondition.Reason, rayv1.AllPodRunningAndReadyFirstTime)
18451845
}
18461846

18471847
func TestStateTransitionTimes_NoStateChange(t *testing.T) {

0 commit comments

Comments
 (0)