Skip to content

Commit c78bdcf

Browse files
rueianmachichima
andauthored
[RayCluster] Make headpod name back to non-deterministic (#3872) (#3876)
Signed-off-by: Rueian <[email protected]> Co-authored-by: Nary Yeh <[email protected]>
1 parent 3d138cf commit c78bdcf

File tree

5 files changed

+18
-21
lines changed

5 files changed

+18
-21
lines changed

kubectl-plugin/test/e2e/kubectl_ray_log_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ var _ = Describe("Calling ray plugin `log` command on Ray Cluster", func() {
2929

3030
It("succeed in retrieving all ray cluster logs", func() {
3131
expectedDirPath := "./raycluster-kuberay"
32-
expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+`
32+
expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+`
3333

3434
cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "all")
3535
output, err := cmd.CombinedOutput()
@@ -84,7 +84,7 @@ var _ = Describe("Calling ray plugin `log` command on Ray Cluster", func() {
8484

8585
It("succeed in retrieving ray cluster head logs", func() {
8686
expectedDirPath := "./raycluster-kuberay"
87-
expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve only head node logs\.\nDownloading log for Ray Node raycluster-kuberay-head`
87+
expectedOutputStringFormat := `No output directory specified, creating dir under current directory using resource name\.\nCommand set to retrieve only head node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+`
8888

8989
cmd := exec.Command("kubectl", "ray", "log", "--namespace", namespace, "raycluster-kuberay", "--node-type", "head")
9090
output, err := cmd.CombinedOutput()
@@ -191,7 +191,7 @@ var _ = Describe("Calling ray plugin `log` command on Ray Cluster", func() {
191191

192192
It("succeed in retrieving ray cluster logs within designated directory", func() {
193193
expectedDirPath := "./temporary-directory"
194-
expectedOutputStringFormat := `Command set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+`
194+
expectedOutputStringFormat := `Command set to retrieve both head and worker node logs\.\nDownloading log for Ray Node raycluster-kuberay-head-\w+\nDownloading log for Ray Node raycluster-kuberay-workergroup-worker-\w+`
195195

196196
err := os.MkdirAll(expectedDirPath, 0o755)
197197
Expect(err).NotTo(HaveOccurred())

kubectl-plugin/test/e2e/kubectl_ray_session_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,33 +90,33 @@ var _ = Describe("Calling ray plugin `session` command", Ordered, func() {
9090
}, 3*time.Second, 500*time.Millisecond).ShouldNot(HaveOccurred())
9191

9292
// Get the current head pod name
93-
cmd := exec.Command("kubectl", "get", "--namespace", namespace, "pod/raycluster-kuberay-head", "-o", "jsonpath={.metadata.uid}")
93+
cmd := exec.Command("kubectl", "get", "--namespace", namespace, "raycluster/raycluster-kuberay", "-o", "jsonpath={.status.head.podName}")
9494
output, err := cmd.CombinedOutput()
9595
Expect(err).NotTo(HaveOccurred())
96-
oldPodUID := string(output)
97-
var newPodUID string
96+
oldPodName := string(output)
97+
var newPodName string
9898

9999
// Delete the pod
100-
cmd = exec.Command("kubectl", "delete", "--namespace", namespace, "pod/raycluster-kuberay-head")
100+
cmd = exec.Command("kubectl", "delete", "--namespace", namespace, "pod", oldPodName)
101101
err = cmd.Run()
102102
Expect(err).NotTo(HaveOccurred())
103103

104104
// Wait for the new pod to be created
105105
Eventually(func() error {
106-
cmd := exec.Command("kubectl", "get", "--namespace", namespace, "pod/raycluster-kuberay-head", "-o", "jsonpath={.metadata.uid}")
106+
cmd := exec.Command("kubectl", "get", "--namespace", namespace, "raycluster/raycluster-kuberay", "-o", "jsonpath={.status.head.podName}")
107107
output, err := cmd.CombinedOutput()
108-
newPodUID = string(output)
108+
newPodName = string(output)
109109
if err != nil {
110110
return err
111111
}
112-
if newPodUID == oldPodUID {
113-
return fmt.Errorf("head pod has not changed (UID still %s)", oldPodUID)
112+
if newPodName == oldPodName {
113+
return fmt.Errorf("head pod has not changed (Name still %s)", oldPodName)
114114
}
115115
return nil
116116
}, 60*time.Second, 1*time.Second).ShouldNot(HaveOccurred())
117117

118118
// Wait for the new pod to be ready
119-
cmd = exec.Command("kubectl", "wait", "--namespace", namespace, "pod/raycluster-kuberay-head", "--for=condition=Ready", "--timeout=120s")
119+
cmd = exec.Command("kubectl", "wait", "--namespace", namespace, "pod", newPodName, "--for=condition=Ready", "--timeout=120s")
120120
err = cmd.Run()
121121
Expect(err).NotTo(HaveOccurred())
122122

ray-operator/controllers/ray/common/pod.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ func DefaultHeadPodTemplate(ctx context.Context, instance rayv1.RayCluster, head
164164
// headPort is passed into setMissingRayStartParams but unused there for the head pod.
165165
// To mitigate this awkwardness and reduce code redundancy, unify head and worker pod configuration logic.
166166
podTemplate := headSpec.Template
167-
podTemplate.Name = podName
167+
podTemplate.GenerateName = podName
168168
// Pods created by RayCluster should be restricted to the namespace of the RayCluster.
169169
// This ensures privilege of KubeRay users are contained within the namespace of the RayCluster.
170170
podTemplate.ObjectMeta.Namespace = instance.Namespace

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -689,17 +689,15 @@ func (r *RayClusterReconciler) reconcilePods(ctx context.Context, instance *rayv
689689
return errstd.Join(utils.ErrFailedCreateHeadPod, err)
690690
}
691691
} else if len(headPods.Items) > 1 { // This should never happen. This protects against the case that users manually create headpod.
692-
correctHeadPodName := instance.Name + "-head"
693692
headPodNames := make([]string, len(headPods.Items))
694693
for i, pod := range headPods.Items {
695694
headPodNames[i] = pod.Name
696695
}
697696

698697
logger.Info("Multiple head pods found, it should only exist one head pod. Please delete extra head pods.",
699698
"found pods", headPodNames,
700-
"should only leave", correctHeadPodName,
701699
)
702-
return fmt.Errorf("%d head pods found %v. Please delete extra head pods and leave only the head pod with name %s", len(headPods.Items), headPodNames, correctHeadPodName)
700+
return fmt.Errorf("%d head pods found %v. Please delete extra head pods", len(headPods.Items), headPodNames)
703701
}
704702

705703
// Reconcile worker pods now
@@ -1038,7 +1036,7 @@ func (r *RayClusterReconciler) createWorkerPod(ctx context.Context, instance ray
10381036
// Build head instance pod(s).
10391037
func (r *RayClusterReconciler) buildHeadPod(ctx context.Context, instance rayv1.RayCluster) corev1.Pod {
10401038
logger := ctrl.LoggerFrom(ctx)
1041-
podName := utils.PodName(instance.Name, rayv1.HeadNode, false)
1039+
podName := utils.PodName(instance.Name, rayv1.HeadNode, true)
10421040
fqdnRayIP := utils.GenerateFQDNServiceName(ctx, instance, instance.Namespace) // Fully Qualified Domain Name
10431041

10441042
// The Ray head port used by workers to connect to the cluster (GCS server port for Ray >= 1.11.0, Redis port for older Ray.)

ray-operator/controllers/ray/utils/util_test.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func TestPodName(t *testing.T) {
115115
name: "short cluster name, head pod",
116116
prefix: "ray-cluster-01",
117117
nodeType: rayv1.HeadNode,
118-
expected: "ray-cluster-01-head",
118+
expected: "ray-cluster-01-head-",
119119
},
120120
{
121121
name: "short cluster name, worker pod",
@@ -127,7 +127,7 @@ func TestPodName(t *testing.T) {
127127
name: "long cluster name, head pod",
128128
prefix: "ray-cluster-0000000000000000000000011111111122222233333333333333",
129129
nodeType: rayv1.HeadNode,
130-
expected: "ray-cluster-00000000000000000000000111111111222222-head",
130+
expected: "ray-cluster-00000000000000000000000111111111222222-head-",
131131
},
132132
{
133133
name: "long cluster name, worker pod",
@@ -139,8 +139,7 @@ func TestPodName(t *testing.T) {
139139

140140
for _, test := range tests {
141141
t.Run(test.name, func(t *testing.T) {
142-
isPodNameGenerated := test.nodeType == rayv1.WorkerNode // HeadPod name is now fixed
143-
str := PodName(test.prefix, test.nodeType, isPodNameGenerated)
142+
str := PodName(test.prefix, test.nodeType, true)
144143
if str != test.expected {
145144
t.Logf("expected: %q", test.expected)
146145
t.Logf("actual: %q", str)

0 commit comments

Comments
 (0)