Skip to content

Commit 355de9a

Browse files
authored
[RayService] Add zero-downtime triggered test after rayVersion is updated (#2881)
1 parent 56cdfb6 commit 355de9a

File tree

1 file changed

+41
-2
lines changed

1 file changed

+41
-2
lines changed

ray-operator/controllers/ray/rayservice_controller_test.go

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func endpointsTemplate(name string, namespace string) *corev1.Endpoints {
135135
}
136136

137137
var _ = Context("RayService env tests", func() {
138-
Describe("RayService happy path", Ordered, func() {
138+
Describe("Zero-downtime upgrade", Ordered, func() {
139139
// This test case simulates the most common scenario in the RayService code path:
140140
// (1) Create a RayService custom resource
141141
// (2) The RayService controller creates a pending RayCluster
@@ -145,7 +145,7 @@ var _ = Context("RayService env tests", func() {
145145
ctx := context.Background()
146146
namespace := "default"
147147
serveAppName := "app1"
148-
rayService := rayServiceTemplate("test-happy-path", namespace, serveAppName)
148+
rayService := rayServiceTemplate("test-zero-downtime-path", namespace, serveAppName)
149149
rayCluster := &rayv1.RayCluster{}
150150

151151
It("Create a RayService custom resource", func() {
@@ -230,6 +230,45 @@ var _ = Context("RayService env tests", func() {
230230
}, time.Second*3, time.Millisecond*500).Should(BeNumerically(">", 0), "RayService status: %v", rayService.Status)
231231
Expect(meta.IsStatusConditionTrue(rayService.Status.Conditions, string(rayv1.RayServiceReady))).Should(BeTrue())
232232
})
233+
234+
It("Should perform a zero-downtime update after rayVersion updated.", func() {
235+
initialClusterName, _ := getRayClusterNameFunc(ctx, rayService)()
236+
const mockRayVersion = "2.40.0" // Current rayVersion is 2.41.0, so set the rayVersion to 2.40.0 to test if zero-downtime is triggered.
237+
238+
// The cluster shouldn't switch until deployments are finished updating
239+
updatingStatus := generateServeStatus(rayv1.DeploymentStatusEnum.UPDATING, rayv1.ApplicationStatusEnum.DEPLOYING)
240+
fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{serveAppName: &updatingStatus})
241+
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
242+
Eventually(
243+
getResourceFunc(ctx, client.ObjectKey{Name: rayService.Name, Namespace: "default"}, rayService),
244+
time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayService = %v", rayService.Name)
245+
rayService.Spec.RayClusterSpec.RayVersion = mockRayVersion
246+
return k8sClient.Update(ctx, rayService)
247+
})
248+
Expect(err).NotTo(HaveOccurred(), "failed to update test RayService resource")
249+
250+
Eventually(
251+
getPreparingRayClusterNameFunc(ctx, rayService),
252+
time.Second*60, time.Millisecond*500).Should(Not(BeEmpty()), "My new RayCluster name = %v", rayService.Status.PendingServiceStatus.RayClusterName)
253+
254+
pendingRayClusterName := rayService.Status.PendingServiceStatus.RayClusterName
255+
256+
Consistently(
257+
getRayClusterNameFunc(ctx, rayService),
258+
time.Second*5, time.Millisecond*500).Should(Equal(initialClusterName), "My current RayCluster name = %v", rayService.Status.ActiveServiceStatus.RayClusterName)
259+
260+
// The cluster should switch once the deployments are finished updating
261+
healthyStatus := generateServeStatus(rayv1.DeploymentStatusEnum.HEALTHY, rayv1.ApplicationStatusEnum.RUNNING)
262+
fakeRayDashboardClient.SetMultiApplicationStatuses(map[string]*utils.ServeApplicationStatus{serveAppName: &healthyStatus})
263+
updateHeadPodToRunningAndReady(ctx, pendingRayClusterName, "default")
264+
265+
Eventually(
266+
getRayClusterNameFunc(ctx, rayService),
267+
time.Second*60, time.Millisecond*500).Should(Equal(pendingRayClusterName), "My current RayCluster name = %v", rayService.Status.ActiveServiceStatus.RayClusterName)
268+
Eventually(
269+
rayService.Spec.RayClusterSpec.RayVersion,
270+
time.Second*60, time.Millisecond*500).Should(Equal(mockRayVersion), "My current RayVersion = %v", rayService.Spec.RayClusterSpec.RayVersion)
271+
})
233272
})
234273

235274
Describe("Autoscaler updates RayCluster should not trigger zero downtime upgrade", Ordered, func() {

0 commit comments

Comments
 (0)