Skip to content

Commit a851490

Browse files
authored
Add a test util function for killing the head Pod and wait (#3890)
* update Signed-off-by: You-Cheng Lin <[email protected]> * update Signed-off-by: You-Cheng Lin <[email protected]> * update Signed-off-by: You-Cheng Lin <[email protected]> * update Signed-off-by: You-Cheng Lin <[email protected]> --------- Signed-off-by: You-Cheng Lin <[email protected]>
1 parent a099da3 commit a851490

File tree

3 files changed

+41
-21
lines changed

3 files changed

+41
-21
lines changed

helm-chart/ray-cluster/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ helm uninstall raycluster
7878
| nameOverride | string | `"kuberay"` | String to partially override release name. |
7979
| fullnameOverride | string | `""` | String to fully override release name. |
8080
| imagePullSecrets | list | `[]` | Secrets with credentials to pull images from a private registry |
81+
| gcsFaultTolerance.enabled | bool | `false` | |
8182
| common.containerEnv | list | `[]` | containerEnv specifies environment variables for the Ray head and worker containers. Follows standard K8s container env schema. |
8283
| head.initContainers | list | `[]` | Init containers to add to the head pod |
8384
| head.labels | object | `{}` | Labels for the head pod |

ray-operator/test/e2eautoscaler/raycluster_autoscaler_part2_test.go

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"time"
88

99
"github.com/onsi/gomega"
10-
. "github.com/onsi/gomega"
1110
corev1 "k8s.io/api/core/v1"
1211
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1312
corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
@@ -518,7 +517,7 @@ func TestRayClusterAutoscalerGCSFT(t *testing.T) {
518517
LogWithTimestamp(test.T(), "Created ConfigMap %s/%s successfully", scripts.Namespace, scripts.Name)
519518

520519
checkRedisDBSize := DeployRedis(test, namespace.Name, RedisPassword)
521-
defer g.Eventually(checkRedisDBSize, time.Second*60, time.Second).Should(BeEquivalentTo("0"))
520+
defer g.Eventually(checkRedisDBSize, time.Second*60, time.Second).Should(gomega.BeEquivalentTo("0"))
522521

523522
rayClusterSpecAC := rayv1ac.RayClusterSpec().
524523
WithEnableInTreeAutoscaling(true).
@@ -570,19 +569,10 @@ func TestRayClusterAutoscalerGCSFT(t *testing.T) {
570569
g.Eventually(RayCluster(test, rayCluster.Namespace, rayCluster.Name), TestTimeoutMedium).
571570
Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0))))
572571

573-
// Delete the head Pod
574-
err = test.Client().Core().CoreV1().Pods(namespace.Name).Delete(test.Ctx(), headPod.Name, metav1.DeleteOptions{})
575-
g.Expect(err).NotTo(HaveOccurred())
576-
577-
PodUID := func(p *corev1.Pod) string { return string(p.UID) }
578-
g.Eventually(HeadPod(test, rayCluster), TestTimeoutMedium).
579-
ShouldNot(WithTransform(PodUID, Equal(string(headPod.UID)))) // Use UID to check if the new head pod is created.
580-
581-
g.Eventually(HeadPod(test, rayCluster), TestTimeoutMedium).
582-
Should(WithTransform(func(p *corev1.Pod) string { return string(p.Status.Phase) }, Equal("Running")))
583-
584-
headPod, err = GetHeadPod(test, rayCluster) // Replace the old head pod
585-
g.Expect(err).NotTo(HaveOccurred())
572+
// Delete the head Pod and wait for the new head pod to be ready.
573+
newHeadPod, err := DeletePodAndWait(test, rayCluster, namespace, headPod)
574+
g.Expect(err).NotTo(gomega.HaveOccurred())
575+
headPod = newHeadPod
586576

587577
// Create a detached actor, and a worker should be created after the new head pod is ready.
588578
ExecPodCmd(test, headPod, common.RayHeadContainer, []string{"python", "/home/ray/test_scripts/create_detached_actor.py", "actor1"})
@@ -595,7 +585,7 @@ func TestRayClusterAutoscalerGCSFT(t *testing.T) {
595585
Should(gomega.WithTransform(RayClusterDesiredWorkerReplicas, gomega.Equal(int32(0))))
596586

597587
err = test.Client().Ray().RayV1().RayClusters(namespace.Name).Delete(test.Ctx(), rayCluster.Name, metav1.DeleteOptions{})
598-
g.Expect(err).NotTo(HaveOccurred())
588+
g.Expect(err).NotTo(gomega.HaveOccurred())
599589
})
600590
}
601591
}

ray-operator/test/support/support.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import (
77

88
"github.com/onsi/gomega"
99
"github.com/onsi/gomega/format"
10-
v1 "k8s.io/api/core/v1"
10+
corev1 "k8s.io/api/core/v1"
1111
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
12+
13+
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
1214
)
1315

1416
var (
@@ -51,23 +53,50 @@ func init() {
5153
format.MaxLength = 0
5254
}
5355

54-
func IsPodRunningAndReady(pod *v1.Pod) bool {
55-
if pod.Status.Phase != v1.PodRunning {
56+
func IsPodRunningAndReady(pod *corev1.Pod) bool {
57+
if pod.Status.Phase != corev1.PodRunning {
5658
return false
5759
}
5860
for _, condition := range pod.Status.Conditions {
59-
if condition.Type == v1.PodReady && condition.Status == v1.ConditionTrue {
61+
if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue {
6062
return true
6163
}
6264
}
6365
return false
6466
}
6567

66-
func AllPodsRunningAndReady(pods []v1.Pod) bool {
68+
func AllPodsRunningAndReady(pods []corev1.Pod) bool {
6769
for _, pod := range pods {
6870
if !IsPodRunningAndReady(&pod) {
6971
return false
7072
}
7173
}
7274
return true
7375
}
76+
77+
func DeletePodAndWait(test Test, rayCluster *rayv1.RayCluster, namespace *corev1.Namespace, currentHeadPod *corev1.Pod) (*corev1.Pod, error) {
78+
g := gomega.NewWithT(test.T())
79+
80+
err := test.Client().Core().CoreV1().Pods(namespace.Name).Delete(test.Ctx(), currentHeadPod.Name, metav1.DeleteOptions{})
81+
if err != nil {
82+
return nil, fmt.Errorf("failed to delete head pod %s: %w", currentHeadPod.Name, err)
83+
}
84+
85+
PodUID := func(p *corev1.Pod) string { return string(p.UID) }
86+
87+
// Wait for a new head pod to be created (different UID)
88+
g.Eventually(HeadPod(test, rayCluster), TestTimeoutMedium).
89+
ShouldNot(gomega.WithTransform(PodUID, gomega.Equal(string(currentHeadPod.UID))),
90+
"New head pod should have different UID than the deleted one")
91+
92+
g.Eventually(HeadPod(test, rayCluster), TestTimeoutMedium).
93+
Should(gomega.WithTransform(func(p *corev1.Pod) string { return string(p.Status.Phase) }, gomega.Equal("Running")),
94+
"New head pod should be in Running state")
95+
96+
newHeadPod, err := GetHeadPod(test, rayCluster)
97+
if err != nil {
98+
return nil, fmt.Errorf("failed to get new head pod: %w", err)
99+
}
100+
101+
return newHeadPod, nil
102+
}

0 commit comments

Comments
 (0)