Skip to content

Commit b8f6d06

Browse files
authored
RayCluster Headless Worker Service Should PublishNotReadyAddresses (#2375)
1 parent 0e1c248 commit b8f6d06

File tree

4 files changed

+56
-14
lines changed

4 files changed

+56
-14
lines changed

ray-operator/controllers/ray/common/association_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,7 @@ func TestRayClusterHeadlessServiceListOptions(t *testing.T) {
130130
Namespace: "test-ns",
131131
},
132132
}
133-
headlessSvc, err := BuildHeadlessServiceForRayCluster(*instance)
134-
assert.Nil(t, err)
133+
headlessSvc := BuildHeadlessServiceForRayCluster(*instance)
135134

136135
rayClusterName := ""
137136
for k, v := range headlessSvc.Labels {

ray-operator/controllers/ray/common/service.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ func BuildServeService(ctx context.Context, rayService rayv1.RayService, rayClus
288288
}
289289

290290
// BuildHeadlessService builds the headless service for workers in multi-host worker groups to communicate
291-
func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) (*corev1.Service, error) {
291+
func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) *corev1.Service {
292292
name := rayCluster.Name + utils.DashSymbol + utils.HeadlessServiceSuffix
293293
namespace := rayCluster.Namespace
294294

@@ -310,10 +310,13 @@ func BuildHeadlessServiceForRayCluster(rayCluster rayv1.RayCluster) (*corev1.Ser
310310
ClusterIP: "None",
311311
Selector: selectorLabels,
312312
Type: corev1.ServiceTypeClusterIP,
313+
// The headless worker service is used for peer communication between multi-host workers and should not be
314+
// dependent on Proxy Actor placement to publish DNS addresses.
315+
PublishNotReadyAddresses: true,
313316
},
314317
}
315318

316-
return headlessService, nil
319+
return headlessService
317320
}
318321

319322
func setServiceTypeForUserProvidedService(ctx context.Context, service *corev1.Service, defaultType corev1.ServiceType) {

ray-operator/controllers/ray/common/service_test.go

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ var (
9797
},
9898
},
9999
}
100-
instanceForServeSvc = &rayv1.RayCluster{
100+
instanceForSvc = &rayv1.RayCluster{
101101
ObjectMeta: metav1.ObjectMeta{
102102
Name: "raycluster-sample-svc",
103103
Namespace: "default",
@@ -450,6 +450,49 @@ func TestBuildServiceForHeadPodPortsOrder(t *testing.T) {
450450
}
451451
}
452452

453+
func TestBuildHeadlessServiceForRayCluster(t *testing.T) {
454+
svc := BuildHeadlessServiceForRayCluster(*instanceForSvc)
455+
456+
actualSelector := svc.Spec.Selector[utils.RayClusterLabelKey]
457+
expectedSelector := instanceForSvc.Name
458+
if !reflect.DeepEqual(expectedSelector, actualSelector) {
459+
t.Fatalf("Expected `%v` but got `%v`", expectedSelector, actualSelector)
460+
}
461+
462+
actualSelector = svc.Spec.Selector[utils.RayNodeTypeLabelKey]
463+
expectedSelector = string(rayv1.WorkerNode)
464+
if !reflect.DeepEqual(expectedSelector, actualSelector) {
465+
t.Fatalf("Expected `%v` but got `%v`", expectedSelector, actualSelector)
466+
}
467+
468+
actualLabel := svc.Labels[utils.RayClusterHeadlessServiceLabelKey]
469+
expectedLabel := instanceForSvc.Name
470+
if !reflect.DeepEqual(expectedLabel, actualLabel) {
471+
t.Fatalf("Expected `%v` but got `%v`", expectedLabel, actualLabel)
472+
}
473+
474+
actualType := svc.Spec.Type
475+
expectedType := corev1.ServiceTypeClusterIP
476+
if !reflect.DeepEqual(expectedType, actualType) {
477+
t.Fatalf("Expected `%v` but got `%v`", expectedType, actualType)
478+
}
479+
480+
actualClusterIP := svc.Spec.ClusterIP
481+
expectedClusterIP := corev1.ClusterIPNone
482+
if !reflect.DeepEqual(expectedClusterIP, actualClusterIP) {
483+
t.Fatalf("Expected `%v` but got `%v`", expectedClusterIP, actualClusterIP)
484+
}
485+
486+
actualPublishNotReadyAddresses := svc.Spec.PublishNotReadyAddresses
487+
expectedPublishNotReadyAddresses := true
488+
if !reflect.DeepEqual(expectedClusterIP, actualClusterIP) {
489+
t.Fatalf("Expected `%v` but got `%v`", expectedPublishNotReadyAddresses, actualPublishNotReadyAddresses)
490+
}
491+
492+
expectedName := fmt.Sprintf("%s-%s", instanceForSvc.Name, utils.HeadlessServiceSuffix)
493+
validateNameAndNamespaceForUserSpecifiedService(svc, serviceInstance.ObjectMeta.Namespace, expectedName, t)
494+
}
495+
453496
func TestBuildServeServiceForRayService(t *testing.T) {
454497
svc, err := BuildServeServiceForRayService(context.Background(), *serviceInstance, *instanceWithWrongSvc)
455498
assert.Nil(t, err)
@@ -483,30 +526,30 @@ func TestBuildServeServiceForRayService(t *testing.T) {
483526
}
484527

485528
func TestBuildServeServiceForRayCluster(t *testing.T) {
486-
svc, err := BuildServeServiceForRayCluster(context.Background(), *instanceForServeSvc)
529+
svc, err := BuildServeServiceForRayCluster(context.Background(), *instanceForSvc)
487530
assert.Nil(t, err)
488531

489532
actualResult := svc.Spec.Selector[utils.RayClusterLabelKey]
490-
expectedResult := instanceForServeSvc.Name
533+
expectedResult := instanceForSvc.Name
491534
if !reflect.DeepEqual(expectedResult, actualResult) {
492535
t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult)
493536
}
494537

495538
actualLabel := svc.Labels[utils.RayOriginatedFromCRNameLabelKey]
496-
expectedLabel := instanceForServeSvc.Name
539+
expectedLabel := instanceForSvc.Name
497540
assert.Equal(t, expectedLabel, actualLabel)
498541

499542
actualLabel = svc.Labels[utils.RayOriginatedFromCRDLabelKey]
500543
expectedLabel = utils.RayOriginatedFromCRDLabelValue(utils.RayClusterCRD)
501544
assert.Equal(t, expectedLabel, actualLabel)
502545

503546
actualType := svc.Spec.Type
504-
expectedType := instanceForServeSvc.Spec.HeadGroupSpec.ServiceType
547+
expectedType := instanceForSvc.Spec.HeadGroupSpec.ServiceType
505548
if !reflect.DeepEqual(expectedType, actualType) {
506549
t.Fatalf("Expected `%v` but got `%v`", expectedType, actualType)
507550
}
508551

509-
expectedName := fmt.Sprintf("%s-%s-%s", instanceForServeSvc.Name, "serve", "svc")
552+
expectedName := fmt.Sprintf("%s-%s-%s", instanceForSvc.Name, "serve", "svc")
510553
validateNameAndNamespaceForUserSpecifiedService(svc, serviceInstance.ObjectMeta.Namespace, expectedName, t)
511554
}
512555

ray-operator/controllers/ray/raycluster_controller.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -597,10 +597,7 @@ func (r *RayClusterReconciler) reconcileHeadlessService(ctx context.Context, ins
597597
return nil
598598
}
599599
// Create headless tpu worker service if there's no existing one in the cluster.
600-
headlessSvc, err := common.BuildHeadlessServiceForRayCluster(*instance)
601-
if err != nil {
602-
return err
603-
}
600+
headlessSvc := common.BuildHeadlessServiceForRayCluster(*instance)
604601

605602
if err := r.createService(ctx, headlessSvc, instance); err != nil {
606603
return err

0 commit comments

Comments
 (0)