Skip to content

Commit 77a1023

Browse files
authored
[RayService] Add unit tests for isZeroDowntimeUpgradeEnabled (#2871)
1 parent 2f8ee7f commit 77a1023

File tree

3 files changed

+81
-45
lines changed

3 files changed

+81
-45
lines changed

ray-operator/controllers/ray/rayservice_controller.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,14 +463,13 @@ func (r *RayServiceReconciler) getRayServiceInstance(ctx context.Context, reques
463463
return rayServiceInstance, nil
464464
}
465465

466-
func isZeroDowntimeUpgradeEnabled(ctx context.Context, rayService *rayv1.RayService) bool {
466+
func isZeroDowntimeUpgradeEnabled(ctx context.Context, upgradeStrategy *rayv1.RayServiceUpgradeStrategy) bool {
467467
// For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously.
468468
// Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades.
469469
// There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeStrategy.Type.
470470
// If no fields are set, zero downtime upgrade by default is enabled.
471471
// Spec.UpgradeStrategy.Type takes precedence over ENABLE_ZERO_DOWNTIME.
472472
logger := ctrl.LoggerFrom(ctx)
473-
upgradeStrategy := rayService.Spec.UpgradeStrategy
474473
if upgradeStrategy != nil {
475474
upgradeType := upgradeStrategy.Type
476475
if upgradeType != nil {
@@ -691,7 +690,7 @@ func shouldPrepareNewCluster(ctx context.Context, rayServiceInstance *rayv1.RayS
691690
// KubeRay should update the RayCluster instead of creating a new one.
692691
return false
693692
}
694-
return isZeroDowntimeUpgradeEnabled(ctx, rayServiceInstance)
693+
return isZeroDowntimeUpgradeEnabled(ctx, rayServiceInstance.Spec.UpgradeStrategy)
695694
}
696695

697696
// updateRayClusterInstance updates the RayCluster instance.

ray-operator/controllers/ray/rayservice_controller_test.go

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package ray
1818
import (
1919
"context"
2020
"fmt"
21-
"os"
2221
"time"
2322

2423
"github.com/ray-project/kuberay/ray-operator/controllers/ray/common"
@@ -344,47 +343,6 @@ var _ = Context("RayService env tests", func() {
344343
Expect(meta.IsStatusConditionFalse(rayService.Status.Conditions, string(rayv1.UpgradeInProgress))).Should(BeTrue())
345344
})
346345

347-
It("Disable zero-downtime upgrade", func() {
348-
// Disable zero-downtime upgrade.
349-
os.Setenv("ENABLE_ZERO_DOWNTIME", "false")
350-
351-
// Try to trigger a zero-downtime upgrade.
352-
oldRayVersion := rayService.Spec.RayClusterSpec.RayVersion
353-
newRayVersion := "2.198.0"
354-
Expect(oldRayVersion).ShouldNot(Equal(newRayVersion))
355-
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
356-
Eventually(
357-
getResourceFunc(ctx, client.ObjectKey{Name: rayService.Name, Namespace: "default"}, rayService),
358-
time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayService = %v", rayService.Name)
359-
rayService.Spec.RayClusterSpec.RayVersion = newRayVersion
360-
return k8sClient.Update(ctx, rayService)
361-
})
362-
Expect(err).NotTo(HaveOccurred(), "failed to update test RayService resource")
363-
364-
// Because the zero-downtime upgrade is disabled, the RayService controller will not prepare a new RayCluster.
365-
Consistently(
366-
getPreparingRayClusterNameFunc(ctx, rayService),
367-
time.Second*5, time.Millisecond*500).Should(BeEmpty(), "Pending RayCluster name = %v", rayService.Status.PendingServiceStatus.RayClusterName)
368-
369-
// Set the RayVersion back to the old value to avoid triggering the zero-downtime upgrade.
370-
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
371-
Eventually(
372-
getResourceFunc(ctx, client.ObjectKey{Name: rayService.Name, Namespace: "default"}, rayService),
373-
time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayService = %v", rayService.Name)
374-
rayService.Spec.RayClusterSpec.RayVersion = oldRayVersion
375-
return k8sClient.Update(ctx, rayService)
376-
})
377-
Expect(err).NotTo(HaveOccurred(), "failed to update test RayService resource")
378-
379-
// Enable zero-downtime upgrade again.
380-
os.Unsetenv("ENABLE_ZERO_DOWNTIME")
381-
382-
// Zero-downtime upgrade should not be triggered.
383-
Consistently(
384-
getPreparingRayClusterNameFunc(ctx, rayService),
385-
time.Second*5, time.Millisecond*500).Should(BeEmpty(), "Pending RayCluster name = %v", rayService.Status.PendingServiceStatus.RayClusterName)
386-
})
387-
388346
It("Autoscaler updates the active RayCluster and should not switch to a new RayCluster", func() {
389347
// Simulate autoscaler by updating the active RayCluster directly. Note that the autoscaler
390348
// will not update the RayService directly.

ray-operator/controllers/ray/rayservice_controller_unit_test.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package ray
33
import (
44
"context"
55
"fmt"
6+
"os"
67
"reflect"
78
"strconv"
89
"testing"
@@ -1155,3 +1156,81 @@ func TestShouldPrepareNewCluster_ZeroDowntimeUpgrade(t *testing.T) {
11551156
shouldPrepareNewCluster := shouldPrepareNewCluster(ctx, &rayService, activeCluster, nil)
11561157
assert.True(t, shouldPrepareNewCluster)
11571158
}
1159+
1160+
func TestIsZeroDowntimeUpgradeEnabled(t *testing.T) {
1161+
tests := []struct {
1162+
name string
1163+
upgradeStrategy *rayv1.RayServiceUpgradeStrategy
1164+
enableZeroDowntimeEnvVar string // "true" or "false" or "" (not set)
1165+
expected bool
1166+
}{
1167+
{
1168+
// The most common case.
1169+
name: "both upgrade strategy and env var are not set",
1170+
upgradeStrategy: nil,
1171+
enableZeroDowntimeEnvVar: "",
1172+
expected: true,
1173+
},
1174+
{
1175+
name: "upgrade strategy is not set, but env var is set to true",
1176+
upgradeStrategy: nil,
1177+
enableZeroDowntimeEnvVar: "true",
1178+
expected: true,
1179+
},
1180+
{
1181+
name: "upgrade strategy is not set, but env var is set to false",
1182+
upgradeStrategy: nil,
1183+
enableZeroDowntimeEnvVar: "false",
1184+
expected: false,
1185+
},
1186+
{
1187+
name: "upgrade strategy is set to NewCluster",
1188+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.NewCluster)},
1189+
enableZeroDowntimeEnvVar: "",
1190+
expected: true,
1191+
},
1192+
{
1193+
name: "upgrade strategy is set to NewCluster, and env var is not set",
1194+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.NewCluster)},
1195+
enableZeroDowntimeEnvVar: "true",
1196+
expected: true,
1197+
},
1198+
{
1199+
name: "upgrade strategy is set to NewCluster, and env var is set to false",
1200+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.NewCluster)},
1201+
enableZeroDowntimeEnvVar: "false",
1202+
expected: true,
1203+
},
1204+
{
1205+
name: "upgrade strategy is set to None, and env var is not set",
1206+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.None)},
1207+
enableZeroDowntimeEnvVar: "",
1208+
expected: false,
1209+
},
1210+
{
1211+
name: "upgrade strategy is set to None, and env var is set to true",
1212+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.None)},
1213+
enableZeroDowntimeEnvVar: "true",
1214+
expected: false,
1215+
},
1216+
{
1217+
name: "upgrade strategy is set to None, and env var is set to false",
1218+
upgradeStrategy: &rayv1.RayServiceUpgradeStrategy{Type: ptr.To(rayv1.None)},
1219+
enableZeroDowntimeEnvVar: "false",
1220+
expected: false,
1221+
},
1222+
}
1223+
1224+
ctx := context.TODO()
1225+
for _, tt := range tests {
1226+
t.Run(tt.name, func(t *testing.T) {
1227+
defer func() {
1228+
os.Unsetenv(ENABLE_ZERO_DOWNTIME)
1229+
}()
1230+
1231+
os.Setenv(ENABLE_ZERO_DOWNTIME, tt.enableZeroDowntimeEnvVar)
1232+
isEnabled := isZeroDowntimeUpgradeEnabled(ctx, tt.upgradeStrategy)
1233+
assert.Equal(t, tt.expected, isEnabled)
1234+
})
1235+
}
1236+
}

0 commit comments

Comments
 (0)