Skip to content

Integration: KAI Scheduler #3886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
8 changes: 6 additions & 2 deletions helm-chart/kuberay-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,16 @@ logging:
# 4. Use PodGroup
# batchScheduler:
# name: scheduler-plugins
#

# 5. Use Kai Scheduler
# batchScheduler:
# name: kai-scheduler

batchScheduler:
# Deprecated. This option will be removed in the future.
# Note, for backwards compatibility. When it sets to true, it enables volcano scheduler integration.
enabled: false
# Set the customized scheduler name, supported values are "volcano", "yunikorn" or "scheduler-plugins", do not set
# Set the customized scheduler name, supported values are "volcano", "yunikorn", "kai-scheduler" or "scheduler-plugins", do not set
# "batchScheduler.enabled=true" at the same time as it will override this option.
name: ""

Expand Down
3 changes: 2 additions & 1 deletion ray-operator/apis/config/v1alpha1/config_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (

"github.com/go-logr/logr"

kaischeduler "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/kai-scheduler"
schedulerplugins "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/scheduler-plugins"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn"
Expand All @@ -23,7 +24,7 @@ func ValidateBatchSchedulerConfig(logger logr.Logger, config Configuration) erro

if len(config.BatchScheduler) > 0 {
// if a customized scheduler is configured, check it is supported
if config.BatchScheduler == volcano.GetPluginName() || config.BatchScheduler == yunikorn.GetPluginName() || config.BatchScheduler == schedulerplugins.GetPluginName() {
if config.BatchScheduler == volcano.GetPluginName() || config.BatchScheduler == yunikorn.GetPluginName() || config.BatchScheduler == schedulerplugins.GetPluginName() || config.BatchScheduler == kaischeduler.GetPluginName() {
logger.Info("Feature flag batch-scheduler is enabled",
"scheduler name", config.BatchScheduler)
} else {
Expand Down
11 changes: 11 additions & 0 deletions ray-operator/apis/config/v1alpha1/config_utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/go-logr/logr"
"github.com/go-logr/logr/testr"

kaischeduler "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/kai-scheduler"
schedulerPlugins "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/scheduler-plugins"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/volcano"
"github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/yunikorn"
Expand Down Expand Up @@ -71,6 +72,16 @@ func TestValidateBatchSchedulerConfig(t *testing.T) {
},
wantErr: false,
},
{
name: "valid option, batch-scheduler=kai-scheduler",
args: args{
logger: testr.New(t),
config: Configuration{
BatchScheduler: kaischeduler.GetPluginName(),
},
},
wantErr: false,
},
{
name: "invalid option, invalid scheduler name",
args: args{
Expand Down
2 changes: 1 addition & 1 deletion ray-operator/apis/config/v1alpha1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type Configuration struct {
LogStdoutEncoder string `json:"logStdoutEncoder,omitempty"`

// BatchScheduler enables the batch scheduler integration with a specific scheduler
// based on the given name, currently, supported values are volcano and yunikorn.
// based on the given name, currently, supported values are volcano, yunikorn, kai-scheduler.
BatchScheduler string `json:"batchScheduler,omitempty"`

// HeadSidecarContainers includes specification for a sidecar container
Expand Down
35 changes: 35 additions & 0 deletions ray-operator/config/samples/ray-cluster.kai-gpu-sharing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: raycluster-half-gpu
labels:
kai.scheduler/queue: team-a
spec:
headGroupSpec:
template:
spec:
containers:
- name: head
image: rayproject/ray:2.46.0
resources:
limits:
cpu: "1"
memory: "2Gi"

# ---- Two workers share one GPU (0.5 each) ----
workerGroupSpecs:
- groupName: shared-gpu
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you share what the Pod looks like after it's created, using kubectl describe pod ...?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we go:



Name:             raycluster-half-gpu-shared-gpu-worker-6sx5d
Namespace:        default
Priority:         0
Service Account:  default
Node:             ip-xxxxx
Start Time:       Wed, 06 Aug 2025 21:01:54 +0200
Labels:           app.kubernetes.io/created-by=kuberay-operator
                  app.kubernetes.io/name=kuberay
                  kai.scheduler/queue=team-a
                  ray.io/cluster=raycluster-half-gpu
                  ray.io/group=shared-gpu
                  ray.io/identifier=raycluster-half-gpu-worker
                  ray.io/is-ray-node=yes
                  ray.io/node-type=worker
                  runai-gpu-group=518b1881-bd3c-4593-9bf3-2e59e98d6cb9
Annotations:      gpu-fraction: 0.5
                  pod-group-name: pg-raycluster-half-gpu-b1ee6048-1369-4ee3-a5a5-a66a377e769f
                  received-resource-type: Fraction
                  runai/shared-gpu-configmap: raycluster-half-gpu-6hl5xvs-shared-gpu
Status:           Running
IP:               xxxxxx
IPs:
  IP:           xxxxxx
Controlled By:  RayCluster/raycluster-half-gpu
Init Containers:
  wait-gcs-ready:
    Container ID:  containerd://27bf1b6c4f5723594b77658697c8a9be3bf9f72579e2f230e2e8ae28d2d74459
    Image:         rayproject/ray:2.46.0
    Image ID:      docker.io/rayproject/ray@sha256:764d7d4bf276143fac2fe322fe41593bb36bbd4dbe7fe9a2d94b67acb736eae3
    Port:          <none>
    Host Port:     <none>
    Command:
      /bin/bash
      -c
      --
    Args:

                            SECONDS=0
                            while true; do
                              if (( SECONDS <= 120 )); then
                                if ray health-check --address raycluster-half-gpu-head-svc.default.svc.cluster.local:6379 > /dev/null 2>&1; then
                                  echo "GCS is ready."
                                  break
                                fi
                                echo "$SECONDS seconds elapsed: Waiting for GCS to be ready."
                              else
                                if ray health-check --address raycluster-half-gpu-head-svc.default.svc.cluster.local:6379; then
                                  echo "GCS is ready. Any error messages above can be safely ignored."
                                  break
                                fi
                                echo "$SECONDS seconds elapsed: Still waiting for GCS to be ready. For troubleshooting, refer to the FAQ at https://github.com/ray-project/kuberay/blob/master/docs/guidance/FAQ.md."
                              fi
                              sleep 5
                            done

    State:          Terminated
      Reason:       Completed
      Exit Code:    0
      Started:      Wed, 06 Aug 2025 21:01:55 +0200
      Finished:     Wed, 06 Aug 2025 21:02:20 +0200
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     200m
      memory:  256Mi
    Requests:
      cpu:     200m
      memory:  256Mi
    Environment:
      FQ_RAY_IP:  raycluster-half-gpu-head-svc.default.svc.cluster.local
      RAY_IP:     raycluster-half-gpu-head-svc
    Mounts:
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-5wz6g (ro)
Containers:
  worker:
    Container ID:  containerd://ae2297c4dd07bfb89a4a2795915cd0b7d8aadd0aacd84f8859c448ab95927f86
    Image:         rayproject/ray:2.46.0
    Image ID:      docker.io/rayproject/ray@sha256:764d7d4bf276143fac2fe322fe41593bb36bbd4dbe7fe9a2d94b67acb736eae3
    Port:          8080/TCP
    Host Port:     0/TCP
    Command:
      /bin/bash
      -c
      --
    Args:
      ulimit -n 65536; ray start  --address=raycluster-half-gpu-head-svc.default.svc.cluster.local:6379  --block  --dashboard-agent-listen-port=52365  --memory=2147483648  --metrics-export-port=8080  --num-cpus=1
    State:          Running
      Started:      Wed, 06 Aug 2025 21:02:21 +0200
    Ready:          True
    Restart Count:  0
    Limits:
      cpu:     1
      memory:  2Gi
    Requests:
      cpu:      1
      memory:   2Gi
    Liveness:   exec [bash -c wget --tries 1 -T 2 -q -O- http://localhost:52365/api/local_raylet_healthz | grep success] delay=30s timeout=2s period=5s #success=1 #failure=120
    Readiness:  exec [bash -c wget --tries 1 -T 2 -q -O- http://localhost:52365/api/local_raylet_healthz | grep success] delay=10s timeout=2s period=5s #success=1 #failure=10
    Environment Variables from:
      raycluster-half-gpu-6hl5xvs-shared-gpu-0-evar  ConfigMap  Optional: false
    Environment:
      FQ_RAY_IP:                            raycluster-half-gpu-head-svc.default.svc.cluster.local
      RAY_IP:                               raycluster-half-gpu-head-svc
      RAY_CLUSTER_NAME:                      (v1:metadata.labels['ray.io/cluster'])
      RAY_CLOUD_INSTANCE_ID:                raycluster-half-gpu-shared-gpu-worker-6sx5d (v1:metadata.name)
      RAY_NODE_TYPE_NAME:                    (v1:metadata.labels['ray.io/group'])
      KUBERAY_GEN_RAY_START_CMD:            ray start  --address=raycluster-half-gpu-head-svc.default.svc.cluster.local:6379  --block  --dashboard-agent-listen-port=52365  --memory=2147483648  --metrics-export-port=8080  --num-cpus=1
      RAY_PORT:                             6379
      RAY_ADDRESS:                          raycluster-half-gpu-head-svc.default.svc.cluster.local:6379
      RAY_USAGE_STATS_KUBERAY_IN_USE:       1
      RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE:  1
      NVIDIA_VISIBLE_DEVICES:               <set to the key 'NVIDIA_VISIBLE_DEVICES' of config map 'raycluster-half-gpu-6hl5xvs-shared-gpu-0'>  Optional: false
      RUNAI_NUM_OF_GPUS:                    <set to the key 'RUNAI_NUM_OF_GPUS' of config map 'raycluster-half-gpu-6hl5xvs-shared-gpu-0'>       Optional: false
    Mounts:
      /dev/shm from shared-mem (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-5wz6g (ro)
Conditions:
  Type              Status
  PodBound          True
  Initialized       True
  Ready             True
  ContainersReady   True
  PodScheduled      True
Volumes:
  shared-mem:
    Type:       EmptyDir (a temporary directory that shares a pod's lifetime)
    Medium:     Memory
    SizeLimit:  2Gi
  kube-api-access-5wz6g:
    Type:                    Projected (a volume that contains injected data from multiple sources)
    TokenExpirationSeconds:  3607
    ConfigMapName:           kube-root-ca.crt
    ConfigMapOptional:       <nil>
    DownwardAPI:             true
  raycluster-half-gpu-6hl5xvs-shared-gpu-0-vol:
    Type:        ConfigMap (a volume populated by a ConfigMap)
    Name:        raycluster-half-gpu-6hl5xvs-shared-gpu-0
    Optional:    false
QoS Class:       Guaranteed
Node-Selectors:  <none>
Tolerations:     node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                 node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
  Type    Reason     Age   From           Message
  ----    ------     ----  ----           -------
  Normal  Scheduled  100s  kai-scheduler  Successfully assigned pod default/raycluster-half-gpu-shared-gpu-worker-6sx5d to node ip-xxxxxxx at node-pool default
  Normal  Bound      100s  binder         Pod bound successfully to node ip-xxxxxxx
  Normal  Pulled     99s   kubelet        Container image "rayproject/ray:2.46.0" already present on machine
  Normal  Created    99s   kubelet        Created container wait-gcs-ready
  Normal  Started    99s   kubelet        Started container wait-gcs-ready
  Normal  Pulled     73s   kubelet        Container image "rayproject/ray:2.46.0" already present on machine
  Normal  Created    73s   kubelet        Created container worker
  Normal  Started    73s   kubelet        Started container worker

replicas: 2
minReplicas: 2
template:
metadata:
annotations:
gpu-fraction: "0.5"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this mean? Are you using DRA to mount the same GPU to two different Pods?

Additionally, do we need to specify GPUs in the resource requests and limits? If not, KubeRay won’t pass GPU information to Ray, and Ray will be unable to map physical GPU resources in Kubernetes to logical resources within Ray.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add comments for the KAI Scheduler–specific configuration so that users can understand what this YAML is for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The example is using KAI Scheduler's native GPU sharing feature that is through time slicing. I made it clear in the comments with the new changes that I pushed.

We do not need to specify it - When using gpu-fraction, KAI Scheduler manages the GPU allocation internally

I added comments to the YAML files to explain the KAI specific configurations now. Let me know what you think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not need to specify it - When using gpu-fraction, KAI Scheduler manages the GPU allocation internally

Can you try testing whether Ray tasks or actors are actually using the GPUs? Since the CR doesn't specify nvidia.com/gpu, KubeRay doesn't automatically map physical resources to Ray's logical resources. You may need to specify num-gpus in rayStartParams.

@ray.remote(num_gpus=0.5)
def f():
  # check GPU

ref = f.remote()
ray.get(ref)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the pointer. My tests showed it getting allocated, but the test-cluster has since been removed, so i have no quick way to try it again rn. I will try later again. If you have a way to test it in the mean time, please let me know if you find anything.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested this now.

It's working fine. Ray tasks can access GPUs and the workers are sharing the GPU correctly.
Both worker pods see the same Tesla T4:

=== Worker Pod 1 GPU Visibility ===
Defaulted container "worker" out of: worker, wait-gcs-ready (init)
GPU 0: Tesla T4 (UUID: GPU-82b4fb2e-d25b-0fb8-480c-7e61e49760f3)

=== Worker Pod 2 GPU Visibility ===
Defaulted container "worker" out of: worker, wait-gcs-ready (init)
GPU 0: Tesla T4 (UUID: GPU-82b4fb2e-d25b-0fb8-480c-7e61e49760f3)

The test pattern you suggested also works.

One quirk I noticed: Ray shows 2.0 GPU total (1 per worker) instead of recognizing the fractional allocations. I think this happens because, as you pointed out, without nvidia.com/gpu in resource limits, Ray just auto-detects the physical GPU on each worker node - correct me if I am wrong. But the actual sharing works fine since KAI Scheduler handles it.
I think this is fine to document as expected behavior - users need to manage their memory anyway - if they request 0.5 from kai in their deployment, they should not use more with Ray workloads. Let me know what you think

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If GPU sharing is achieved by time slicing, does that mean each worker would feel they own the entire GPU? Is that why Ray shows 2.0 GPU?

spec:
containers:
- name: worker
image: rayproject/ray:2.46.0
resources:
limits:
cpu: "1"
memory: "2Gi"
38 changes: 38 additions & 0 deletions ray-operator/config/samples/ray-cluster.kai-scheduler-queues.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: scheduling.run.ai/v2
kind: Queue
metadata:
name: department-1
spec:
resources:
cpu:
quota: -1
limit: -1
overQuotaWeight: 1
gpu:
quota: -1
limit: -1
overQuotaWeight: 1
memory:
quota: -1
limit: -1
overQuotaWeight: 1
---
apiVersion: scheduling.run.ai/v2
kind: Queue
metadata:
name: team-a
spec:
parentQueue: department-1
resources:
cpu:
quota: -1
limit: -1
overQuotaWeight: 1
gpu:
quota: -1
limit: -1
overQuotaWeight: 1
memory:
quota: -1
limit: -1
overQuotaWeight: 1
31 changes: 31 additions & 0 deletions ray-operator/config/samples/ray-cluster.kai-scheduler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#A simple example raycluster with KAI
apiVersion: ray.io/v1
kind: RayCluster
metadata:
name: raycluster-sample
labels:
kai.scheduler/queue: team-a
spec:
headGroupSpec:
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.46.0
resources:
requests:
cpu: "1"
memory: "2Gi"
workerGroupSpecs:
- groupName: worker
replicas: 2
minReplicas: 2
template:
spec:
containers:
- name: ray-worker
image: rayproject/ray:2.46.0
resources:
requests:
cpu: "1"
memory: "1Gi"
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package kaischeduler

// This KAI plugin relies on KAI-Scheduler's
// built-in PodGrouper to create PodGroups at
// runtime, so the plugin itself only needs to:
// 1. expose the scheduler name,
// 2. stamp pods with schedulerName + queue label.
// No PodGroup create/patch logic is included.

import (
"context"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
schedulerinterface "github.com/ray-project/kuberay/ray-operator/controllers/ray/batchscheduler/interface"
)

const (
QueueLabelName = "kai.scheduler/queue"
)

type KaiScheduler struct{}

type KaiSchedulerFactory struct{}

func GetPluginName() string { return "kai-scheduler" }

func (k *KaiScheduler) Name() string { return GetPluginName() }

func (k *KaiScheduler) DoBatchSchedulingOnSubmission(_ context.Context, _ *rayv1.RayCluster) error {
return nil
}

func (k *KaiScheduler) AddMetadataToPod(ctx context.Context, app *rayv1.RayCluster, _ string, pod *corev1.Pod) {
pod.Spec.SchedulerName = k.Name()

queue, ok := app.Labels[QueueLabelName]
if !ok || queue == "" {
logger := ctrl.LoggerFrom(ctx).WithName("kai-scheduler")
logger.Info("Queue label missing from RayCluster; pods will remain pending",
"requiredLabel", QueueLabelName,
"rayCluster", app.Name)
return
}
if pod.Labels == nil {
pod.Labels = make(map[string]string)
}
pod.Labels[QueueLabelName] = queue
}

func (kf *KaiSchedulerFactory) New(_ context.Context, _ *rest.Config, _ client.Client) (schedulerinterface.BatchScheduler, error) {
return &KaiScheduler{}, nil
}

func (kf *KaiSchedulerFactory) AddToScheme(_ *runtime.Scheme) {
}

func (kf *KaiSchedulerFactory) ConfigureReconciler(b *builder.Builder) *builder.Builder {
return b
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package kaischeduler

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
)

func createTestRayCluster(labels map[string]string) *rayv1.RayCluster {
return &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Namespace: "default",
Labels: labels,
},
}
}

func createTestPod() *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "test-pod",
Namespace: "default",
Labels: map[string]string{
"ray.io/cluster": "test-cluster",
"ray.io/node-type": "worker",
"app": "ray",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{{
Name: "ray-worker",
Image: "rayproject/ray:latest",
}},
},
}
}

func TestAddMetadataToPod_WithQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()

// Create RayCluster with queue label
rayCluster := createTestRayCluster(map[string]string{
QueueLabelName: "test-queue",
})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)

// Assert scheduler name is set to kai-scheduler
a.Equal("kai-scheduler", pod.Spec.SchedulerName)

// Assert queue label is propagated to pod
a.NotNil(pod.Labels)
a.Equal("test-queue", pod.Labels[QueueLabelName])
}

func TestAddMetadataToPod_WithoutQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()

// Create RayCluster without queue label
rayCluster := createTestRayCluster(map[string]string{})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)

// Assert scheduler name is still set (always required)
a.Equal("kai-scheduler", pod.Spec.SchedulerName)

// Assert queue label is not added to pod when missing from RayCluster
if pod.Labels != nil {
_, exists := pod.Labels[QueueLabelName]
a.False(exists)
}
}

func TestAddMetadataToPod_WithEmptyQueueLabel(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()

// Create RayCluster with empty queue label
rayCluster := createTestRayCluster(map[string]string{
QueueLabelName: "",
})
pod := createTestPod()

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)

// Assert scheduler name is still set
a.Equal("kai-scheduler", pod.Spec.SchedulerName)

// Assert empty queue label is treated as missing
if pod.Labels != nil {
_, exists := pod.Labels[QueueLabelName]
a.False(exists)
}
}

func TestAddMetadataToPod_PreservesExistingPodLabels(t *testing.T) {
a := assert.New(t)
scheduler := &KaiScheduler{}
ctx := context.Background()

// Create RayCluster with queue label
rayCluster := createTestRayCluster(map[string]string{
QueueLabelName: "test-queue",
})

// Create pod with existing labels
pod := createTestPod()
pod.Labels = map[string]string{
"existing-label": "existing-value",
"app": "ray",
}

// Call AddMetadataToPod
scheduler.AddMetadataToPod(ctx, rayCluster, "test-group", pod)

// Assert scheduler name is set
a.Equal("kai-scheduler", pod.Spec.SchedulerName)

// Assert queue label is added
a.Equal("test-queue", pod.Labels[QueueLabelName])

// Assert existing labels are preserved
a.Equal("existing-value", pod.Labels["existing-label"])
a.Equal("ray", pod.Labels["app"])
}
Loading
Loading