Skip to content

Commit 6687955

Browse files
authored
[Feature] Add e2e test for setting RayCluster deletion delay in RayService (#3912)
* test: for deletion delay in ray service Signed-off-by: machichima <[email protected]> * feat: extract apply rayService YAML and wait to util function Signed-off-by: machichima <[email protected]> * refactor: applyRayServiceYAMLAndWaitReady to private Signed-off-by: machichima <[email protected]> * refactor: rename test-rayservice with suffix Signed-off-by: machichima <[email protected]> * fix: ensure old raycluster exists during deletion delay Signed-off-by: machichima <[email protected]> * refactor: fix comment text Signed-off-by: machichima <[email protected]> * refactor: fix comment Signed-off-by: machichima <[email protected]> --------- Signed-off-by: machichima <[email protected]>
1 parent 9b6b053 commit 6687955

File tree

3 files changed

+149
-36
lines changed

3 files changed

+149
-36
lines changed

ray-operator/test/e2erayservice/rayservice_ha_test.go

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,8 @@ func TestStaticRayService(t *testing.T) {
3030
LogWithTimestamp(test.T(), "Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)
3131

3232
// Create the RayService for testing
33-
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
34-
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
35-
g.Expect(err).NotTo(HaveOccurred())
36-
LogWithTimestamp(test.T(), "Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)
37-
38-
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
39-
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
40-
Should(WithTransform(IsRayServiceReady, BeTrue()))
33+
rayServiceName := "test-rayservice"
34+
applyRayServiceYAMLAndWaitReady(g, test, rayserviceYamlFile, namespace.Name, rayServiceName)
4135

4236
// Create Locust RayCluster
4337
KubectlApplyYAML(test, locustYamlFile, namespace.Name)
@@ -80,17 +74,11 @@ func TestAutoscalingRayService(t *testing.T) {
8074
LogWithTimestamp(test.T(), "Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)
8175

8276
// Create the RayService for testing
83-
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
84-
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
85-
g.Expect(err).NotTo(HaveOccurred())
86-
LogWithTimestamp(test.T(), "Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)
87-
88-
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
89-
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
90-
Should(WithTransform(IsRayServiceReady, BeTrue()))
77+
rayServiceName := "test-rayservice"
78+
applyRayServiceYAMLAndWaitReady(g, test, rayserviceYamlFile, namespace.Name, rayServiceName)
9179

9280
// Get the underlying RayCluster of the RayService
93-
rayService, err = GetRayService(test, namespace.Name, rayService.Name)
81+
rayService, err := GetRayService(test, namespace.Name, rayServiceName)
9482
g.Expect(err).NotTo(HaveOccurred())
9583
rayServiceUnderlyingRayCluster, err := GetRayCluster(test, namespace.Name, rayService.Status.ActiveServiceStatus.RayClusterName)
9684
g.Expect(err).NotTo(HaveOccurred())
@@ -150,14 +138,8 @@ func TestRayServiceZeroDowntimeUpgrade(t *testing.T) {
150138
LogWithTimestamp(test.T(), "Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)
151139

152140
// Create the RayService for testing
153-
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
154-
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
155-
g.Expect(err).NotTo(HaveOccurred())
156-
LogWithTimestamp(test.T(), "Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)
157-
158-
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
159-
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
160-
Should(WithTransform(IsRayServiceReady, BeTrue()))
141+
rayServiceName := "test-rayservice"
142+
applyRayServiceYAMLAndWaitReady(g, test, rayserviceYamlFile, namespace.Name, rayServiceName)
161143

162144
// Create Locust RayCluster
163145
KubectlApplyYAML(test, locustYamlFile, namespace.Name)
@@ -187,7 +169,7 @@ func TestRayServiceZeroDowntimeUpgrade(t *testing.T) {
187169
time.Sleep(30 * time.Second)
188170

189171
LogWithTimestamp(test.T(), "Updating RayService")
190-
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
172+
rayService, err := GetRayService(test, namespace.Name, rayServiceName)
191173
g.Expect(err).NotTo(HaveOccurred())
192174
rayClusterName := rayService.Status.ActiveServiceStatus.RayClusterName
193175

@@ -224,20 +206,14 @@ func TestRayServiceGCSFaultTolerance(t *testing.T) {
224206
LogWithTimestamp(test.T(), "Created ConfigMap %s/%s successfully", configMap.Namespace, configMap.Name)
225207

226208
// Create the RayService for testing
227-
KubectlApplyYAML(test, rayserviceYamlFile, namespace.Name)
228-
rayService, err := GetRayService(test, namespace.Name, "test-rayservice")
229-
g.Expect(err).NotTo(HaveOccurred())
230-
LogWithTimestamp(test.T(), "Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)
231-
232-
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
233-
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutMedium).
234-
Should(WithTransform(IsRayServiceReady, BeTrue()))
209+
rayServiceName := "test-rayservice"
210+
applyRayServiceYAMLAndWaitReady(g, test, rayserviceYamlFile, namespace.Name, rayServiceName)
235211

236-
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort).
212+
g.Eventually(RayService(test, namespace.Name, rayServiceName), TestTimeoutShort).
237213
Should(WithTransform(RayServicesNumEndPoints, Equal(int32(1))))
238214

239215
// Get the underlying RayCluster of the RayService
240-
rayService, err = GetRayService(test, namespace.Name, rayService.Name)
216+
rayService, err := GetRayService(test, namespace.Name, rayServiceName)
241217
g.Expect(err).NotTo(HaveOccurred())
242218
rayServiceUnderlyingRayCluster, err := GetRayCluster(test, namespace.Name, rayService.Status.ActiveServiceStatus.RayClusterName)
243219
g.Expect(err).NotTo(HaveOccurred())
@@ -287,3 +263,29 @@ func TestRayServiceGCSFaultTolerance(t *testing.T) {
287263
g.Expect(GetHeadPod(test, rayServiceUnderlyingRayCluster)).Should(WithTransform(IsPodRunningAndReady, BeTrue()))
288264
g.Expect(GetWorkerPods(test, rayServiceUnderlyingRayCluster)).Should(WithTransform(AllPodsRunningAndReady, BeTrue()))
289265
}
266+
267+
func TestRayServiceRayClusterDeletionDelaySeconds(t *testing.T) {
268+
rayserviceYamlFile := "testdata/rayservice.deletiondelay.yaml"
269+
270+
test := With(t)
271+
g := NewWithT(t)
272+
namespace := test.NewTestNamespace()
273+
274+
// Apply the RayService YAML with deletion delay set to 10 seconds
275+
rayServiceName := "test-rayservice-deletion-delay"
276+
applyRayServiceYAMLAndWaitReady(g, test, rayserviceYamlFile, namespace.Name, rayServiceName)
277+
278+
// Save the current RayCluster name
279+
rayService, err := GetRayService(test, namespace.Name, rayServiceName)
280+
g.Expect(err).NotTo(HaveOccurred())
281+
oldClusterName := rayService.Status.ActiveServiceStatus.RayClusterName
282+
283+
// Try updating and see if the new cluster created
284+
LogWithTimestamp(test.T(), "Updating RayService")
285+
newRayService := rayService.DeepCopy()
286+
newRayService.Spec.RayClusterSpec.RayVersion = ""
287+
newRayService, err = test.Client().Ray().RayV1().RayServices(newRayService.Namespace).Update(test.Ctx(), newRayService, metav1.UpdateOptions{})
288+
g.Expect(err).NotTo(HaveOccurred())
289+
290+
waitingForRayClusterSwitchWithDeletionDelay(g, test, newRayService, oldClusterName, 10*time.Second)
291+
}

ray-operator/test/e2erayservice/support.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"embed"
66
"fmt"
7+
"time"
78

89
. "github.com/onsi/gomega"
910
"github.com/stretchr/testify/require"
@@ -180,6 +181,21 @@ func RayServiceSampleYamlApplyConfiguration() *rayv1ac.RayServiceSpecApplyConfig
180181
})))))))
181182
}
182183

184+
func applyRayServiceYAMLAndWaitReady(g *WithT, t Test, filename string, namespace string, name string) {
185+
t.T().Helper()
186+
187+
// Apply the RayService YAML
188+
KubectlApplyYAML(t, filename, namespace)
189+
rayService, err := GetRayService(t, namespace, name)
190+
g.Expect(err).NotTo(HaveOccurred())
191+
LogWithTimestamp(t.T(), "Created RayService %s/%s successfully", rayService.Namespace, rayService.Name)
192+
193+
// Wait for RayService to be ready
194+
LogWithTimestamp(t.T(), "Waiting for RayService %s/%s to be ready", rayService.Namespace, rayService.Name)
195+
g.Eventually(RayService(t, rayService.Namespace, rayService.Name), TestTimeoutMedium).
196+
Should(WithTransform(IsRayServiceReady, BeTrue()))
197+
}
198+
183199
func waitingForRayClusterSwitch(g *WithT, test Test, rayService *rayv1.RayService, oldRayClusterName string) {
184200
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s UpgradeInProgress condition to be true", rayService.Namespace, rayService.Name)
185201
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort).Should(WithTransform(IsRayServiceUpgrading, BeTrue()))
@@ -195,3 +211,33 @@ func waitingForRayClusterSwitch(g *WithT, test Test, rayService *rayv1.RayServic
195211
g.Expect(err).NotTo(HaveOccurred())
196212
g.Expect(IsRayServiceUpgrading(rayService)).To(BeFalse())
197213
}
214+
215+
func waitingForRayClusterSwitchWithDeletionDelay(g *WithT, test Test, rayService *rayv1.RayService, oldRayClusterName string, deletionDelayDuration time.Duration) {
216+
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s UpgradeInProgress condition to be true", rayService.Namespace, rayService.Name)
217+
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutShort).Should(WithTransform(IsRayServiceUpgrading, BeTrue()))
218+
219+
// Assert that the active RayCluster is eventually different
220+
LogWithTimestamp(test.T(), "Waiting for RayService %s/%s to switch to a new cluster", rayService.Namespace, rayService.Name)
221+
g.Eventually(RayService(test, rayService.Namespace, rayService.Name), TestTimeoutLong).Should(WithTransform(func(rayService *rayv1.RayService) string {
222+
return rayService.Status.ActiveServiceStatus.RayClusterName
223+
}, Not(Equal(oldRayClusterName))))
224+
225+
// Ensure the old RayCluster still exists during the deletion delay
226+
LogWithTimestamp(test.T(), "Ensuring old RayCluster %s/%s still exists for deletionDelayDuration (%v)", rayService.Namespace, oldRayClusterName, deletionDelayDuration)
227+
g.Consistently(func() error {
228+
_, err := GetRayCluster(test, rayService.Namespace, oldRayClusterName)
229+
return err
230+
}, deletionDelayDuration, time.Second).Should(Not(HaveOccurred()))
231+
232+
// Verify that the old RayCluster is eventually deleted with the grace period of 5 seconds
233+
LogWithTimestamp(test.T(), "Checking that old RayCluster %s/%s is eventually deleted", rayService.Namespace, oldRayClusterName)
234+
g.Eventually(func() error {
235+
_, err := GetRayCluster(test, rayService.Namespace, oldRayClusterName)
236+
return err
237+
}, 5*time.Second).Should(HaveOccurred())
238+
239+
LogWithTimestamp(test.T(), "Verifying RayService %s/%s UpgradeInProgress condition to be false", rayService.Namespace, rayService.Name)
240+
rayService, err := GetRayService(test, rayService.Namespace, rayService.Name)
241+
g.Expect(err).NotTo(HaveOccurred())
242+
g.Expect(IsRayServiceUpgrading(rayService)).To(BeFalse())
243+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
apiVersion: ray.io/v1
2+
kind: RayService
3+
metadata:
4+
name: test-rayservice-deletion-delay
5+
spec:
6+
rayClusterDeletionDelaySeconds: 10
7+
serveConfigV2: |
8+
proxy_location: EveryNode
9+
applications:
10+
- name: no_ops
11+
route_prefix: /
12+
import_path: microbenchmarks.no_ops:app_builder
13+
args:
14+
num_forwards: 0
15+
runtime_env:
16+
working_dir: https://github.com/ray-project/serve_workloads/archive/a9f184f4d9ddb7f9a578502ae106470f87a702ef.zip
17+
deployments:
18+
- name: NoOp
19+
num_replicas: 2
20+
max_replicas_per_node: 1
21+
ray_actor_options:
22+
num_cpus: 1
23+
rayClusterConfig:
24+
rayVersion: '2.46.0'
25+
headGroupSpec:
26+
template:
27+
spec:
28+
containers:
29+
- name: ray-head
30+
image: rayproject/ray:2.46.0
31+
resources:
32+
requests:
33+
cpu: 300m
34+
memory: 1G
35+
limits:
36+
cpu: 500m
37+
memory: 2G
38+
ports:
39+
- containerPort: 6379
40+
name: gcs-server
41+
- containerPort: 8265
42+
name: dashboard
43+
- containerPort: 10001
44+
name: client
45+
- containerPort: 8000
46+
name: serve
47+
workerGroupSpecs:
48+
- replicas: 1
49+
minReplicas: 1
50+
maxReplicas: 1
51+
groupName: small-group
52+
rayStartParams:
53+
num-cpus: "1"
54+
template:
55+
spec:
56+
containers:
57+
- name: ray-worker
58+
image: rayproject/ray:2.46.0
59+
resources:
60+
requests:
61+
cpu: 300m
62+
memory: 1G
63+
limits:
64+
cpu: 500m
65+
memory: 1G

0 commit comments

Comments
 (0)