diff --git a/docs/api-reference/operator-api.md b/docs/api-reference/operator-api.md
index ed22aab08..a46d3f98d 100644
--- a/docs/api-reference/operator-api.md
+++ b/docs/api-reference/operator-api.md
@@ -742,6 +742,10 @@ _Appears in:_
| `enableProfiling` _boolean_ | EnableProfiling enables profiling via host:port/debug/pprof/ endpoints. | | |
+
+
+
+
#### LeaderElectionConfiguration
@@ -865,6 +869,57 @@ _Appears in:_
| `concurrentSyncs` _integer_ | ConcurrentSyncs is the number of workers used for the controller to concurrently work on events. | | |
+#### SchedulerConfiguration
+
+
+
+SchedulerConfiguration configures scheduler profiles and which is the default.
+
+
+
+_Appears in:_
+- [OperatorConfiguration](#operatorconfiguration)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `profiles` _[SchedulerProfile](#schedulerprofile) array_ | Profiles is the list of scheduler profiles. Each profile has a backend name and optional config.
The kube-scheduler backend is always enabled; use profile name "kube-scheduler" to configure or set it as default.
Valid profile names: "kube-scheduler", "kai-scheduler". Use defaultProfileName to designate the default backend. If not set, defaulting sets it to "kube-scheduler". | | |
+| `defaultProfileName` _string_ | DefaultProfileName is the name of the default scheduler profile. If unset, defaulting sets it to "kube-scheduler". | | |
+
+
+#### SchedulerName
+
+_Underlying type:_ _string_
+
+SchedulerName defines the name of the scheduler backend (used in OperatorConfiguration scheduler.profiles[].name).
+
+
+
+_Appears in:_
+- [SchedulerProfile](#schedulerprofile)
+
+| Field | Description |
+| --- | --- |
+| `kai-scheduler` | SchedulerNameKai is the KAI scheduler backend.
|
+| `kube-scheduler` | SchedulerNameKube is the profile name for the Kubernetes default scheduler in OperatorConfiguration.
|
+
+
+#### SchedulerProfile
+
+
+
+SchedulerProfile defines a scheduler backend profile with optional backend-specific config.
+
+
+
+_Appears in:_
+- [SchedulerConfiguration](#schedulerconfiguration)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _[SchedulerName](#schedulername)_ | Name is the scheduler profile name. Valid values: "kube-scheduler", "kai-scheduler".
For the Kubernetes default scheduler use "kube-scheduler"; Pod.Spec.SchedulerName will be set to "default-scheduler". | | Enum: [kai-scheduler kube-scheduler]
Required: \{\}
|
+| `config` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.33/#rawextension-runtime-pkg)_ | Config holds backend-specific options. The operator unmarshals it into the config type for this backend (see backend config types). | | |
+
+
#### Server
diff --git a/docs/proposals/375-scheduler-backend-framework/README.md b/docs/proposals/375-scheduler-backend-framework/README.md
index 1a6576f27..95ac8e42a 100644
--- a/docs/proposals/375-scheduler-backend-framework/README.md
+++ b/docs/proposals/375-scheduler-backend-framework/README.md
@@ -215,7 +215,7 @@ func Initialize(client client.Client, scheme *runtime.Scheme, eventRecorder reco
// Get returns the backend for the given name. kube-scheduler is always available; other backends return nil if not enabled via a profile.
func Get(name string) SchedulerBackend
-// GetDefault returns the backend designated as default in OperatorConfiguration (the profile with default: true; if none, kube-scheduler). The manager does not define the default; it exposes the one from config.
+// GetDefault returns the backend designated as default in OperatorConfiguration (scheduler.defaultProfileName).
func GetDefault() SchedulerBackend
```
@@ -241,12 +241,15 @@ type OperatorConfiguration struct {
// SchedulerConfiguration configures scheduler profiles and which is the default.
type SchedulerConfiguration struct {
- // Profiles is the list of scheduler profiles. Each profile has a backend name, optional config, and whether it is the default.
+ // Profiles is the list of scheduler profiles. Each profile has a backend name and optional config.
// The kube-scheduler backend is always enabled and active even if not listed here. Listing "kube-scheduler" in profiles
- // only adds a profile (e.g. with config like GangScheduling: false) and allows marking it as default.
- // Valid backend names: "kube-scheduler", "kai-scheduler". Exactly one profile should have default: true; if none, kube-scheduler is the default.
+ // only adds a profile (e.g. with config like GangScheduling: false). Use defaultProfileName to designate the default backend.
+ // Valid backend names: "kube-scheduler", "kai-scheduler". If defaultProfileName is unset, defaulting sets it to "kube-scheduler".
// +optional
Profiles []SchedulerProfile `json:"profiles,omitempty"`
+ // DefaultProfileName is the name of the default scheduler profile.
+ // +optional
+ DefaultProfileName string `json:"defaultProfileName,omitempty"`
}
// SchedulerName is the name for a supported scheduler backend.
@@ -270,7 +273,7 @@ var SupportedSchedulerNames = []SchedulerName {
//
}
-// SchedulerProfile defines a scheduler backend profile with optional backend-specific config and default flag.
+// SchedulerProfile defines a scheduler backend profile with optional backend-specific config.
type SchedulerProfile struct {
// Name is the scheduler backend name. Valid values: "kube-scheduler", "kai-scheduler".
// +kubebuilder:validation:Enum=kai-scheduler;kube-scheduler
@@ -279,10 +282,6 @@ type SchedulerProfile struct {
// Config holds backend-specific options. The operator unmarshals it into the config type for this backend (see backend config types below).
// +optional
Config *runtime.RawExtension `json:"config,omitempty"`
-
- // Default indicates this profile is the default backend when a workload does not specify one. Exactly one profile should have default: true.
- // +optional
- Default bool `json:"default,omitempty"`
}
```
@@ -290,7 +289,8 @@ The `OperatorConfiguration` provides a way to enable and configure one or more s
- **Name:** This is the name of the scheduler backend. This must be one of the supported schedulers.
- **Config:** Optional scheduler-specific configuration as `runtime.RawExtension`. It is the responsibility of the scheduler backend implementation to interpret and possibly deserialize it to type.
-- **Default:** Indicates if this scheduler should be the default. In case no scheduler name is set in any `PodSpec` across all `PodCliqueTemplateSpec` then the default scheduler as indicated via this field will be set.
+
+`SchedulerConfiguration.defaultProfileName` designates which profile is the default. When no scheduler name is set in any `PodSpec` across all `PodCliqueTemplateSpec`, the default scheduler indicated by `defaultProfileName` will be used.
**Backend Enabling Behavior:**
@@ -300,22 +300,20 @@ The kube-scheduler backend has special behavior compared to other scheduler back
2. **Explicit Configuration Optional**: You only need to add kube-scheduler to `profiles` if you want to:
- Configure it with specific options (e.g., `gangScheduling: true`)
- - Explicitly mark it as the default (though it's already the default if no other profile sets `default: true`)
+ - Set it as the default via `defaultProfileName` (defaulting sets kube-scheduler as default when `defaultProfileName` is unset)
3. **Other Schedulers Require Explicit Enablement**: All non-kube-scheduler backends (kai-scheduler, third-party schedulers) must be explicitly listed in `profiles` to be enabled. If a workload references a scheduler that is not in the profiles list, the validating webhook will reject the PodCliqueSet.
4. **Default Selection Logic**:
- - If `profiles` is empty → kube-scheduler is the default
- - If exactly one profile has `default: true` → that backend is the default
- - If multiple profiles have `default: true` → operator startup fails with validation error
- - If no profile has `default: true` → kube-scheduler is the default (even if not in the list)
+ - If `profiles` is empty → defaulting adds kube-scheduler and sets `defaultProfileName: "kube-scheduler"`
+ - `defaultProfileName` must be one of the configured profile names; validation rejects invalid or missing default profile name
If no `SchedulerProfile` has been set, then Grove operator behaves as if you specified:
```yaml
scheduler:
+ defaultProfileName: kube-scheduler
profiles:
- - name: "kube-scheduler"
- default: true
+ - name: kube-scheduler
```
> NOTE: If you as a workload operator wish to use a specific scheduler, please ensure that it has been enabled and properly configured as part of `OperatorConfiguration`. If PodCliqueSet uses a scheduler which has not been enabled, then the validating webhook will reject any creation request for this PodCliqueSet.
@@ -336,46 +334,46 @@ type KubeSchedulerConfig struct {
```yaml
# --- Omit scheduler profiles completely ---
-# Same as profiles: [{ name: "kube-scheduler", default: true }]
+# Same as defaultProfileName: kube-scheduler, profiles: [{ name: "kube-scheduler" }]
```
```yaml
# --- Single scheduler profile, no specific configuration ---
scheduler:
+ defaultProfileName: kube-scheduler
profiles:
- - name: "kube-scheduler"
- default: true
+ - name: kube-scheduler
# In this configuration Gang Scheduling will not be enabled
```
```yaml
# --- Single scheduler profile with configuration ---
scheduler:
+ defaultProfileName: kube-scheduler
profiles:
- - name: "kube-scheduler"
+ - name: kube-scheduler
config:
gangScheduling: true
- default: true
```
```yaml
# --- Multiple scheduler profiles; default is kube-scheduler ---
scheduler:
+ defaultProfileName: kube-scheduler
profiles:
- - name: "kube-scheduler"
+ - name: kube-scheduler
config:
gangScheduling: true
- default: true
- - name: "kai-scheduler" # no scheduler-specific configuration is defined
+ - name: kai-scheduler # no scheduler-specific configuration is defined
```
```yaml
# --- Only kai-scheduler profile; kube-scheduler is still implicitly available but kai-scheduler is the default ---
scheduler:
+ defaultProfileName: kai-scheduler
profiles:
- - name: "kai-scheduler"
+ - name: kai-scheduler
config: {}
- default: true
```
diff --git a/operator/api/common/labels.go b/operator/api/common/labels.go
index b453fea33..5aa81a194 100644
--- a/operator/api/common/labels.go
+++ b/operator/api/common/labels.go
@@ -43,6 +43,8 @@ const (
LabelPodCliqueScalingGroupReplicaIndex = "grove.io/podcliquescalinggroup-replica-index"
// LabelPodTemplateHash is a key for a label that sets the hash of the PodSpec. This label will be set on a PodClique and will be shared by all pods in the PodClique.
LabelPodTemplateHash = "grove.io/pod-template-hash"
+ // LabelSchedulerName is a label on PodGang that indicates which scheduler backend should sync this PodGang.
+ LabelSchedulerName = "grove.io/scheduler-name"
)
// Labels for setting component names for all managed resources whose lifecycle
diff --git a/operator/api/config/v1alpha1/defaults.go b/operator/api/config/v1alpha1/defaults.go
index ec91bc3aa..7652e1032 100644
--- a/operator/api/config/v1alpha1/defaults.go
+++ b/operator/api/config/v1alpha1/defaults.go
@@ -69,6 +69,37 @@ func SetDefaults_OperatorConfiguration(operatorConfig *OperatorConfiguration) {
}
}
+// SetDefaults_SchedulerConfiguration sets defaults for scheduler configuration.
+// Principle: respect all user-explicit values first.
+//
+// 1. If user did not include kube in profiles, add kube.
+// 2. If defaultProfileName is unset, set it to "kube-scheduler". Validation will reject invalid cases.
+func SetDefaults_SchedulerConfiguration(cfg *SchedulerConfiguration) {
+ if len(cfg.Profiles) == 0 {
+ cfg.Profiles = []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ }
+ cfg.DefaultProfileName = string(SchedulerNameKube)
+ return
+ }
+ // 1. If user didn't add kube, add it.
+ hasKube := false
+ for i := range cfg.Profiles {
+ if cfg.Profiles[i].Name == SchedulerNameKube {
+ hasKube = true
+ break
+ }
+ }
+ if !hasKube {
+ cfg.Profiles = append(cfg.Profiles, SchedulerProfile{Name: SchedulerNameKube})
+ }
+
+ // 2. No default profile name → set kube as default.
+ if cfg.DefaultProfileName == "" {
+ cfg.DefaultProfileName = string(SchedulerNameKube)
+ }
+}
+
// SetDefaults_ServerConfiguration sets defaults for the server configuration.
func SetDefaults_ServerConfiguration(serverConfig *ServerConfiguration) {
if serverConfig.Webhooks.Port == 0 {
diff --git a/operator/api/config/v1alpha1/defaults_test.go b/operator/api/config/v1alpha1/defaults_test.go
new file mode 100644
index 000000000..b8fa6cc91
--- /dev/null
+++ b/operator/api/config/v1alpha1/defaults_test.go
@@ -0,0 +1,128 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package v1alpha1
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestSetDefaults_SchedulerConfiguration(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg *SchedulerConfiguration
+ wantProfiles []SchedulerProfile
+ wantDefaultProfile string
+ }{
+ {
+ name: "empty profiles: add kube and set defaultProfileName",
+ cfg: &SchedulerConfiguration{},
+ wantProfiles: []SchedulerProfile{{Name: SchedulerNameKube}},
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "nil profiles (len 0): add kube and set defaultProfileName",
+ cfg: &SchedulerConfiguration{
+ Profiles: nil,
+ DefaultProfileName: "",
+ },
+ wantProfiles: []SchedulerProfile{{Name: SchedulerNameKube}},
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "only kai in profiles: append kube and set defaultProfileName",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{{Name: SchedulerNameKai}},
+ DefaultProfileName: "",
+ },
+ wantProfiles: []SchedulerProfile{{Name: SchedulerNameKai}, {Name: SchedulerNameKube}},
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "only kube in profiles, defaultProfileName unset: set defaultProfileName",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{{Name: SchedulerNameKube}},
+ DefaultProfileName: "",
+ },
+ wantProfiles: []SchedulerProfile{{Name: SchedulerNameKube}},
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "kube and kai in profiles, defaultProfileName unset: set defaultProfileName to kube",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ DefaultProfileName: "",
+ },
+ wantProfiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "kube and kai in profiles, defaultProfileName already set to kube: no change",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ DefaultProfileName: string(SchedulerNameKube),
+ },
+ wantProfiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ wantDefaultProfile: string(SchedulerNameKube),
+ },
+ {
+ name: "kube and kai in profiles, defaultProfileName already set to kai: no change",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ DefaultProfileName: string(SchedulerNameKai),
+ },
+ wantProfiles: []SchedulerProfile{
+ {Name: SchedulerNameKube},
+ {Name: SchedulerNameKai},
+ },
+ wantDefaultProfile: string(SchedulerNameKai),
+ },
+ {
+ name: "only kai in profiles, defaultProfileName already kai: append kube only",
+ cfg: &SchedulerConfiguration{
+ Profiles: []SchedulerProfile{{Name: SchedulerNameKai}},
+ DefaultProfileName: string(SchedulerNameKai),
+ },
+ wantProfiles: []SchedulerProfile{{Name: SchedulerNameKai}, {Name: SchedulerNameKube}},
+ wantDefaultProfile: string(SchedulerNameKai),
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ SetDefaults_SchedulerConfiguration(tt.cfg)
+ assert.Equal(t, tt.wantProfiles, tt.cfg.Profiles, "Profiles after defaulting")
+ assert.Equal(t, tt.wantDefaultProfile, tt.cfg.DefaultProfileName, "DefaultProfileName after defaulting")
+ })
+ }
+}
diff --git a/operator/api/config/v1alpha1/types.go b/operator/api/config/v1alpha1/types.go
index 9420783df..fea869cff 100644
--- a/operator/api/config/v1alpha1/types.go
+++ b/operator/api/config/v1alpha1/types.go
@@ -20,6 +20,7 @@ import (
corev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
)
// LogFormat defines the format of the log.
@@ -51,6 +52,59 @@ var (
AllLogFormats = []LogFormat{LogFormatJSON, LogFormatText}
)
+// SchedulerName defines the name of the scheduler backend (used in OperatorConfiguration scheduler.profiles[].name).
+type SchedulerName string
+
+const (
+ // SchedulerNameKai is the KAI scheduler backend.
+ SchedulerNameKai SchedulerName = "kai-scheduler"
+ // SchedulerNameKube is the profile name for the Kubernetes default scheduler in OperatorConfiguration.
+ SchedulerNameKube SchedulerName = "kube-scheduler"
+)
+
+var (
+ // SupportedSchedulerNames is the list of profile names allowed in scheduler.profiles[].name.
+ SupportedSchedulerNames = []SchedulerName{SchedulerNameKai, SchedulerNameKube}
+)
+
+// SchedulerConfiguration configures scheduler profiles and which is the default.
+type SchedulerConfiguration struct {
+ // Profiles is the list of scheduler profiles. Each profile has a backend name and optional config.
+ // The kube-scheduler backend is always enabled; use profile name "kube-scheduler" to configure or set it as default.
+ // Valid profile names: "kube-scheduler", "kai-scheduler". Use defaultProfileName to designate the default backend. If not set, defaulting sets it to "kube-scheduler".
+ // +optional
+ Profiles []SchedulerProfile `json:"profiles,omitempty"`
+ // DefaultProfileName is the name of the default scheduler profile. If unset, defaulting sets it to "kube-scheduler".
+ // +optional
+ DefaultProfileName string `json:"defaultProfileName,omitempty"`
+}
+
+// SchedulerProfile defines a scheduler backend profile with optional backend-specific config.
+type SchedulerProfile struct {
+ // Name is the scheduler profile name. Valid values: "kube-scheduler", "kai-scheduler".
+ // For the Kubernetes default scheduler use "kube-scheduler"; Pod.Spec.SchedulerName will be set to "default-scheduler".
+ // +kubebuilder:validation:Required
+ // +kubebuilder:validation:Enum=kai-scheduler;kube-scheduler
+ Name SchedulerName `json:"name"`
+
+ // Config holds backend-specific options. The operator unmarshals it into the config type for this backend (see backend config types).
+ // +optional
+ Config *runtime.RawExtension `json:"config,omitempty"`
+}
+
+// KaiSchedulerConfiguration defines the configuration for the kai-scheduler backend.
+type KaiSchedulerConfiguration struct {
+ // Reserved for future kai-scheduler-specific options.
+}
+
+// KubeSchedulerConfig holds the configuration for the default scheduler.
+// Used when unmarshalling SchedulerProfile.Config for default-scheduler.
+type KubeSchedulerConfig struct {
+ // GangScheduling indicates if Gang scheduling capability is enabled.
+ // +optional
+ GangScheduling bool `json:"gangScheduling,omitempty"`
+}
+
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// OperatorConfiguration defines the configuration for the Grove operator.
@@ -67,6 +121,8 @@ type OperatorConfiguration struct {
TopologyAwareScheduling TopologyAwareSchedulingConfiguration `json:"topologyAwareScheduling"`
// +optional
Network NetworkAcceleration `json:"network,omitempty"` // Network is the configuration for network acceleration features like MNNVL.
+ // Scheduler configures which scheduler backends are active and their per-backend options.
+ Scheduler SchedulerConfiguration `json:"scheduler"`
}
// LeaderElectionConfiguration defines the configuration for the leader election.
diff --git a/operator/api/config/v1alpha1/zz_generated.deepcopy.go b/operator/api/config/v1alpha1/zz_generated.deepcopy.go
index fa34b9f1e..6a4804b23 100644
--- a/operator/api/config/v1alpha1/zz_generated.deepcopy.go
+++ b/operator/api/config/v1alpha1/zz_generated.deepcopy.go
@@ -103,6 +103,38 @@ func (in *DebuggingConfiguration) DeepCopy() *DebuggingConfiguration {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KaiSchedulerConfiguration) DeepCopyInto(out *KaiSchedulerConfiguration) {
+ *out = *in
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KaiSchedulerConfiguration.
+func (in *KaiSchedulerConfiguration) DeepCopy() *KaiSchedulerConfiguration {
+ if in == nil {
+ return nil
+ }
+ out := new(KaiSchedulerConfiguration)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KubeSchedulerConfig) DeepCopyInto(out *KubeSchedulerConfig) {
+ *out = *in
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeSchedulerConfig.
+func (in *KubeSchedulerConfig) DeepCopy() *KubeSchedulerConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(KubeSchedulerConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LeaderElectionConfiguration) DeepCopyInto(out *LeaderElectionConfiguration) {
*out = *in
@@ -154,6 +186,7 @@ func (in *OperatorConfiguration) DeepCopyInto(out *OperatorConfiguration) {
in.Authorizer.DeepCopyInto(&out.Authorizer)
in.TopologyAwareScheduling.DeepCopyInto(&out.TopologyAwareScheduling)
out.Network = in.Network
+ in.Scheduler.DeepCopyInto(&out.Scheduler)
return
}
@@ -238,6 +271,50 @@ func (in *PodCliqueSetControllerConfiguration) DeepCopy() *PodCliqueSetControlle
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulerConfiguration) DeepCopyInto(out *SchedulerConfiguration) {
+ *out = *in
+ if in.Profiles != nil {
+ in, out := &in.Profiles, &out.Profiles
+ *out = make([]SchedulerProfile, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerConfiguration.
+func (in *SchedulerConfiguration) DeepCopy() *SchedulerConfiguration {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulerConfiguration)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SchedulerProfile) DeepCopyInto(out *SchedulerProfile) {
+ *out = *in
+ if in.Config != nil {
+ in, out := &in.Config, &out.Config
+ *out = new(runtime.RawExtension)
+ (*in).DeepCopyInto(*out)
+ }
+ return
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulerProfile.
+func (in *SchedulerProfile) DeepCopy() *SchedulerProfile {
+ if in == nil {
+ return nil
+ }
+ out := new(SchedulerProfile)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Server) DeepCopyInto(out *Server) {
*out = *in
diff --git a/operator/api/config/v1alpha1/zz_generated.defaults.go b/operator/api/config/v1alpha1/zz_generated.defaults.go
index 25590c97a..45365fa50 100644
--- a/operator/api/config/v1alpha1/zz_generated.defaults.go
+++ b/operator/api/config/v1alpha1/zz_generated.defaults.go
@@ -41,4 +41,5 @@ func SetObjectDefaults_OperatorConfiguration(in *OperatorConfiguration) {
SetDefaults_PodCliqueSetControllerConfiguration(&in.Controllers.PodCliqueSet)
SetDefaults_PodCliqueControllerConfiguration(&in.Controllers.PodClique)
SetDefaults_PodCliqueScalingGroupControllerConfiguration(&in.Controllers.PodCliqueScalingGroup)
+ SetDefaults_SchedulerConfiguration(&in.Scheduler)
}
diff --git a/operator/api/config/validation/validation.go b/operator/api/config/validation/validation.go
index 32f46758e..057780579 100644
--- a/operator/api/config/validation/validation.go
+++ b/operator/api/config/validation/validation.go
@@ -34,6 +34,7 @@ import (
func ValidateOperatorConfiguration(config *configv1alpha1.OperatorConfiguration) field.ErrorList {
allErrs := field.ErrorList{}
allErrs = append(allErrs, validateLogConfiguration(config)...)
+ allErrs = append(allErrs, validateSchedulerConfiguration(&config.Scheduler, field.NewPath("scheduler"))...)
allErrs = append(allErrs, validateLeaderElectionConfiguration(config.LeaderElection, field.NewPath("leaderElection"))...)
allErrs = append(allErrs, validateClientConnectionConfiguration(config.ClientConnection, field.NewPath("clientConnection"))...)
allErrs = append(allErrs, validateControllerConfiguration(config.Controllers, field.NewPath("controllers"))...)
@@ -52,6 +53,32 @@ func validateLogConfiguration(config *configv1alpha1.OperatorConfiguration) fiel
return allErrs
}
+func validateSchedulerConfiguration(scheduler *configv1alpha1.SchedulerConfiguration, fldPath *field.Path) field.ErrorList {
+ allErrs := field.ErrorList{}
+ profilesPath := fldPath.Child("profiles")
+ defaultProfileNamePath := fldPath.Child("defaultProfileName")
+ seenNames := sets.New[configv1alpha1.SchedulerName]()
+ for i, p := range scheduler.Profiles {
+ idxPath := profilesPath.Index(i)
+ if len(strings.TrimSpace(string(p.Name))) == 0 {
+ allErrs = append(allErrs, field.Required(idxPath.Child("name"), "scheduler profile name is required"))
+ } else if !slices.Contains(configv1alpha1.SupportedSchedulerNames, p.Name) {
+ allErrs = append(allErrs, field.NotSupported(idxPath.Child("name"), p.Name, configv1alpha1.SupportedSchedulerNames))
+ } else {
+ if seenNames.Has(p.Name) {
+ allErrs = append(allErrs, field.Duplicate(idxPath.Child("name"), p.Name))
+ }
+ seenNames.Insert(p.Name)
+ }
+ }
+ if strings.TrimSpace(scheduler.DefaultProfileName) == "" {
+ allErrs = append(allErrs, field.Required(defaultProfileNamePath, "default scheduler profile name is required"))
+ } else if !seenNames.Has(configv1alpha1.SchedulerName(scheduler.DefaultProfileName)) {
+ allErrs = append(allErrs, field.Invalid(defaultProfileNamePath, scheduler.DefaultProfileName, "default profile must be one of the configured profiles"))
+ }
+ return allErrs
+}
+
func validateLeaderElectionConfiguration(cfg configv1alpha1.LeaderElectionConfiguration, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if !cfg.Enabled {
diff --git a/operator/api/config/validation/validation_test.go b/operator/api/config/validation/validation_test.go
index d802ead67..116dbcd0d 100644
--- a/operator/api/config/validation/validation_test.go
+++ b/operator/api/config/validation/validation_test.go
@@ -208,3 +208,164 @@ func TestValidateTopologyAwareSchedulingConfiguration(t *testing.T) {
})
}
}
+
+func TestValidateSchedulerConfiguration(t *testing.T) {
+ fldPath := field.NewPath("scheduler")
+ tests := []struct {
+ name string
+ scheduler *configv1alpha1.SchedulerConfiguration
+ expectErrors int
+ expectedFields []string
+ expectedTypes []field.ErrorType
+ }{
+ // Here we test pre-defaulting: empty profiles + empty defaultProfileName → Required for defaultProfileName
+ {
+ name: "invalid: empty profiles and empty defaultProfileName",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{},
+ DefaultProfileName: "",
+ },
+ expectErrors: 1,
+ expectedFields: []string{"scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeRequired},
+ },
+ // single kube
+ {
+ name: "valid: single kube default",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}},
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKube),
+ },
+ expectErrors: 0,
+ },
+ // single kai
+ {
+ name: "valid: single kai default",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKai}},
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKai),
+ },
+ expectErrors: 0,
+ },
+ // multiple schedulers, kube default
+ {
+ name: "valid: multiple schedulers kube default",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerNameKube},
+ {Name: configv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKube),
+ },
+ expectErrors: 0,
+ },
+ // multiple schedulers, kai default
+ {
+ name: "valid: multiple schedulers kai default",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerNameKube},
+ {Name: configv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKai),
+ },
+ expectErrors: 0,
+ },
+ // defaultProfileName omitted (pre-defaulting → Required)
+ {
+ name: "invalid: defaultProfileName omitted",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerNameKube},
+ {Name: configv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: "",
+ },
+ expectErrors: 1,
+ expectedFields: []string{"scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeRequired},
+ },
+ // invalid defaultProfileName (not in supported list; not in profiles → Invalid)
+ {
+ name: "invalid: defaultProfileName not in profiles (e.g. invalid-scheduler)",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerNameKube},
+ {Name: configv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: "invalid-scheduler",
+ },
+ expectErrors: 1,
+ expectedFields: []string{"scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeInvalid},
+ },
+ // defaultProfileName is kube but kube not in profiles
+ {
+ name: "invalid: defaultProfileName not in profiles (kube-scheduler but only kai in profiles)",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKai}},
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKube),
+ },
+ expectErrors: 1,
+ expectedFields: []string{"scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeInvalid},
+ },
+ // empty name in profile
+ {
+ name: "invalid: profile with empty name",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: ""},
+ },
+ DefaultProfileName: "kube-scheduler",
+ },
+ expectErrors: 2,
+ expectedFields: []string{"scheduler.profiles[0].name", "scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeRequired, field.ErrorTypeInvalid},
+ },
+ // unsupported profile name
+ {
+ name: "invalid: unsupported profile name",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerName("volcano")},
+ },
+ DefaultProfileName: "volcano",
+ },
+ expectErrors: 2,
+ expectedFields: []string{"scheduler.profiles[0].name", "scheduler.defaultProfileName"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeNotSupported, field.ErrorTypeInvalid},
+ },
+ // duplicate profile names
+ {
+ name: "invalid: duplicate profile names",
+ scheduler: &configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: configv1alpha1.SchedulerNameKube},
+ {Name: configv1alpha1.SchedulerNameKube},
+ },
+ DefaultProfileName: string(configv1alpha1.SchedulerNameKube),
+ },
+ expectErrors: 1,
+ expectedFields: []string{"scheduler.profiles[1].name"},
+ expectedTypes: []field.ErrorType{field.ErrorTypeDuplicate},
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ errs := validateSchedulerConfiguration(test.scheduler, fldPath)
+
+ assert.Len(t, errs, test.expectErrors, "expected %d validation errors but got %d: %v", test.expectErrors, len(errs), errs)
+
+ if test.expectErrors > 0 {
+ for i, expectedField := range test.expectedFields {
+ assert.Equal(t, expectedField, errs[i].Field, "error %d: expected field %s but got %s", i, expectedField, errs[i].Field)
+ if i < len(test.expectedTypes) {
+ assert.Equal(t, test.expectedTypes[i], errs[i].Type, "error %d: expected type %s but got %s", i, test.expectedTypes[i], errs[i].Type)
+ }
+ }
+ }
+ })
+ }
+}
diff --git a/operator/charts/templates/_helpers.tpl b/operator/charts/templates/_helpers.tpl
index 2337e3e39..7d5298d16 100644
--- a/operator/charts/templates/_helpers.tpl
+++ b/operator/charts/templates/_helpers.tpl
@@ -30,6 +30,19 @@ config.yaml: |
concurrentSyncs: {{ .Values.config.controllers.podClique.concurrentSyncs }}
podCliqueScalingGroup:
concurrentSyncs: {{ .Values.config.controllers.podCliqueScalingGroup.concurrentSyncs }}
+ {{- if and .Values.config.scheduler .Values.config.scheduler.profiles }}
+ scheduler:
+ {{- if .Values.config.scheduler.defaultProfileName }}
+ defaultProfileName: {{ .Values.config.scheduler.defaultProfileName }}
+ {{- end }}
+ profiles:
+ {{- range .Values.config.scheduler.profiles }}
+ - name: {{ .name }}
+ {{- if hasKey . "config" }}
+ config: {{ toYaml .config | nindent 4 }}
+ {{- end }}
+ {{- end }}
+ {{- end }}
{{- if .Values.config.debugging }}
debugging:
enableProfiling: {{ .Values.config.debugging.enableProfiling }}
diff --git a/operator/charts/templates/clusterrole.yaml b/operator/charts/templates/clusterrole.yaml
index af0661fc0..d6da405f6 100644
--- a/operator/charts/templates/clusterrole.yaml
+++ b/operator/charts/templates/clusterrole.yaml
@@ -9,6 +9,7 @@ rules:
- scheduler.grove.io
resources:
- podgangs
+ - podgangs/status
verbs:
- create
- get
diff --git a/operator/charts/values.yaml b/operator/charts/values.yaml
index 1cbb88993..5c2c59492 100644
--- a/operator/charts/values.yaml
+++ b/operator/charts/values.yaml
@@ -83,6 +83,12 @@ config:
concurrentSyncs: 3
podCliqueScalingGroup:
concurrentSyncs: 3
+ # Scheduler configures which scheduler backends are active. default-scheduler is always available.
+ # List profiles to enable backends; set defaultProfileName to the profile that is the default backend.
+ scheduler:
+ profiles:
+ - name: kube-scheduler
+ - name: kai-scheduler
logLevel: info
logFormat: json
topologyAwareScheduling:
diff --git a/operator/cmd/cli/cli.go b/operator/cmd/cli/cli.go
index e5f24f1fd..56aa4730f 100644
--- a/operator/cmd/cli/cli.go
+++ b/operator/cmd/cli/cli.go
@@ -41,6 +41,8 @@ const (
// ExitErrInitializeManager indicates that the application exited due to an error initializing the manager.
// This includes registration of controllers and webhooks and setting up probes.
ExitErrInitializeManager
+ // ExitErrInitializeSchedulerBackend indicates that the application exited due to an error initializing the scheduler backend.
+ ExitErrInitializeSchedulerBackend
// ExitErrStart indicates that the application exited due to an error when starting the application.
ExitErrStart
// ExitErrMNNVLPrerequisites indicates that the application exited because MNNVL prerequisites are not met.
diff --git a/operator/cmd/cli/testdata/valid-config-mnnvl-enabled.yaml b/operator/cmd/cli/testdata/valid-config-mnnvl-enabled.yaml
index b7c06f7a1..a9af417e7 100644
--- a/operator/cmd/cli/testdata/valid-config-mnnvl-enabled.yaml
+++ b/operator/cmd/cli/testdata/valid-config-mnnvl-enabled.yaml
@@ -29,6 +29,9 @@ controllers:
concurrentSyncs: 3
podCliqueScalingGroup:
concurrentSyncs: 2
+scheduler:
+ profiles:
+ - name: kai-scheduler
logLevel: info
logFormat: json
authorizer:
diff --git a/operator/cmd/cli/testdata/valid-config.yaml b/operator/cmd/cli/testdata/valid-config.yaml
index 2fe57fc4c..d928a9e9a 100644
--- a/operator/cmd/cli/testdata/valid-config.yaml
+++ b/operator/cmd/cli/testdata/valid-config.yaml
@@ -29,6 +29,9 @@ controllers:
concurrentSyncs: 3
podCliqueScalingGroup:
concurrentSyncs: 2
+scheduler:
+ profiles:
+ - name: kai-scheduler
logLevel: info
logFormat: json
authorizer:
diff --git a/operator/cmd/main.go b/operator/cmd/main.go
index 309e0abcf..25702e725 100644
--- a/operator/cmd/main.go
+++ b/operator/cmd/main.go
@@ -31,6 +31,7 @@ import (
"github.com/ai-dynamo/grove/operator/internal/controller/cert"
grovelogger "github.com/ai-dynamo/grove/operator/internal/logger"
"github.com/ai-dynamo/grove/operator/internal/mnnvl"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
groveversion "github.com/ai-dynamo/grove/operator/internal/version"
"github.com/spf13/pflag"
@@ -87,6 +88,18 @@ func main() {
handleErrorAndExit(err, cli.ExitErrInitializeManager)
}
+ // Initialize scheduler backends with the configured schedulers.
+ if err := schedulerbackend.Initialize(
+ mgr.GetClient(),
+ mgr.GetScheme(),
+ mgr.GetEventRecorderFor("scheduler-backend"),
+ operatorConfig.Scheduler,
+ ); err != nil {
+ logger.Error(err, "failed to initialize scheduler backend")
+ handleErrorAndExit(err, cli.ExitErrInitializeSchedulerBackend)
+ }
+
+ // TODO: Move this to the proper scheduler backend.
// Initialize or clean up ClusterTopology based on operator configuration.
// This must be done before starting the controllers that may depend on the ClusterTopology resource.
// NOTE: In this version of the operator the synchronization will additionally ensure that the KAI Topology resource
diff --git a/operator/go.mod b/operator/go.mod
index f138b4eb7..6358dd004 100644
--- a/operator/go.mod
+++ b/operator/go.mod
@@ -131,7 +131,7 @@ require (
go.opentelemetry.io/otel/trace v1.35.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
- go.yaml.in/yaml/v2 v2.4.2 // indirect
+ go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/crypto v0.45.0 // indirect
golang.org/x/net v0.47.0 // indirect
diff --git a/operator/go.sum b/operator/go.sum
index 8fc19746b..a2a83fcb7 100644
--- a/operator/go.sum
+++ b/operator/go.sum
@@ -365,8 +365,8 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
-go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
-go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
+go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
+go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
diff --git a/operator/internal/controller/manager.go b/operator/internal/controller/manager.go
index 8191c32c1..e0cc00fbc 100644
--- a/operator/internal/controller/manager.go
+++ b/operator/internal/controller/manager.go
@@ -59,10 +59,10 @@ func RegisterControllersAndWebhooks(mgr ctrl.Manager, logger logr.Logger, operat
// Controllers will not work unless the webhooks are fully configured and operational.
// For webhooks to work cert-controller should finish its work of generating and injecting certificates.
waitTillWebhookCertsReady(logger, certsReady)
- if err := registerControllersWithMgr(mgr, operatorCfg.Controllers, operatorCfg.TopologyAwareScheduling, operatorCfg.Network); err != nil {
+ if err := registerControllersWithMgr(mgr, operatorCfg); err != nil {
return err
}
- if err := registerWebhooksWithMgr(mgr, operatorCfg.Authorizer, operatorCfg.TopologyAwareScheduling, operatorCfg.Network); err != nil {
+ if err := registerWebhooksWithMgr(mgr, operatorCfg); err != nil {
return err
}
return nil
diff --git a/operator/internal/controller/manager_test.go b/operator/internal/controller/manager_test.go
index 6c55daa18..b17ef6467 100644
--- a/operator/internal/controller/manager_test.go
+++ b/operator/internal/controller/manager_test.go
@@ -552,11 +552,12 @@ func TestRegisterControllersAndWebhooks(t *testing.T) {
tc.waitFn(logger, ch)
}
}
- registerControllersWithMgr = func(_ ctrl.Manager, _ configv1alpha1.ControllerConfiguration, _ configv1alpha1.TopologyAwareSchedulingConfiguration, _ configv1alpha1.NetworkAcceleration) error {
+
+ registerControllersWithMgr = func(_ ctrl.Manager, _ *configv1alpha1.OperatorConfiguration) error {
controllersCalled = true
return tc.controllerErr
}
- registerWebhooksWithMgr = func(_ ctrl.Manager, _ configv1alpha1.AuthorizerConfig, _ configv1alpha1.TopologyAwareSchedulingConfiguration, _ configv1alpha1.NetworkAcceleration) error {
+ registerWebhooksWithMgr = func(_ ctrl.Manager, _ *configv1alpha1.OperatorConfiguration) error {
webhooksCalled = true
return tc.webhookErr
}
diff --git a/operator/internal/controller/podclique/components/pod/pod.go b/operator/internal/controller/podclique/components/pod/pod.go
index 61d434bee..fff56f9da 100644
--- a/operator/internal/controller/podclique/components/pod/pod.go
+++ b/operator/internal/controller/podclique/components/pod/pod.go
@@ -28,6 +28,7 @@ import (
componentutils "github.com/ai-dynamo/grove/operator/internal/controller/common/component/utils"
groveerr "github.com/ai-dynamo/grove/operator/internal/errors"
"github.com/ai-dynamo/grove/operator/internal/expect"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
"github.com/ai-dynamo/grove/operator/internal/utils"
k8sutils "github.com/ai-dynamo/grove/operator/internal/utils/kubernetes"
@@ -160,6 +161,20 @@ func (r _resource) buildResource(pcs *grovecorev1alpha1.PodCliqueSet, pclq *grov
}
pod.Spec = *pclq.Spec.PodSpec.DeepCopy()
pod.Spec.SchedulingGates = []corev1.PodSchedulingGate{{Name: podGangSchedulingGate}}
+
+ // Resolve scheduler: from template or default backend; then prepare pod (schedulerName, annotations, etc.)
+ schedulerName := pclq.Spec.PodSpec.SchedulerName
+ backend := schedulerbackend.Get(schedulerName)
+ if backend == nil {
+ return groveerr.WrapError(
+ fmt.Errorf("scheduler backend not found or not initialized: %q", schedulerName),
+ errCodeBuildPodResource,
+ component.OperationSync,
+ "failed to prepare pod spec with scheduler backend",
+ )
+ }
+ backend.PreparePod(pod)
+
// Add GROVE specific Pod environment variables
addEnvironmentVariables(pod, pclq, pcsName, pcsReplicaIndex, podIndex)
// Configure hostname and subdomain for service discovery
diff --git a/operator/internal/controller/podclique/components/pod/syncflow.go b/operator/internal/controller/podclique/components/pod/syncflow.go
index 575bcc642..cc540cb6d 100644
--- a/operator/internal/controller/podclique/components/pod/syncflow.go
+++ b/operator/internal/controller/podclique/components/pod/syncflow.go
@@ -238,7 +238,7 @@ func selectExcessPodsToDelete(sc *syncContext, logger logr.Logger) []*corev1.Pod
return candidatePodsToDelete
}
-// checkAndRemovePodSchedulingGates removes scheduling gates from pods when their dependencies are satisfied
+// checkAndRemovePodSchedulingGates removes scheduling gates from pods when PodGang is initialized
func (r _resource) checkAndRemovePodSchedulingGates(sc *syncContext, logger logr.Logger) ([]string, error) {
tasks := make([]utils.Task, 0, len(sc.existingPCLQPods))
skippedScheduleGatedPods := make([]string, 0, len(sc.existingPCLQPods))
diff --git a/operator/internal/controller/podclique/register.go b/operator/internal/controller/podclique/register.go
index e77fef883..c5839cd22 100644
--- a/operator/internal/controller/podclique/register.go
+++ b/operator/internal/controller/podclique/register.go
@@ -31,6 +31,7 @@ import (
groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
@@ -288,16 +289,49 @@ func extractPCLQNameFromPodName(podName string) string {
return podName[:endIndex]
}
-// podGangPredicate allows all PodGang create and update events to trigger PodClique reconciliation
+// podGangPredicate filters PodGang events to trigger on initialization and spec updates
func podGangPredicate() predicate.Predicate {
return predicate.Funcs{
- CreateFunc: func(_ event.CreateEvent) bool { return true },
- DeleteFunc: func(_ event.DeleteEvent) bool { return false },
- UpdateFunc: func(_ event.UpdateEvent) bool { return true },
+ CreateFunc: func(_ event.CreateEvent) bool { return false },
+ DeleteFunc: func(_ event.DeleteEvent) bool { return false },
+ UpdateFunc: func(e event.UpdateEvent) bool {
+ oldPG, okOld := e.ObjectOld.(*groveschedulerv1alpha1.PodGang)
+ newPG, okNew := e.ObjectNew.(*groveschedulerv1alpha1.PodGang)
+ if !okOld || !okNew {
+ return false
+ }
+
+ // Trigger when PodGang transitions to Initialized=True
+ oldInitialized := isPodGangInitialized(e.ObjectOld)
+ newInitialized := isPodGangInitialized(e.ObjectNew)
+ if !oldInitialized && newInitialized {
+ return true
+ }
+
+ // Also trigger when PodGang spec changes (e.g., scale out/in adds/removes pod references)
+ // This ensures scheduling gates are removed from newly added pods
+ // Check if metadata.generation changed (Kubernetes increments this on spec changes)
+ if newInitialized && oldPG.GetGeneration() != newPG.GetGeneration() {
+ return true
+ }
+
+ return false
+ },
GenericFunc: func(_ event.GenericEvent) bool { return false },
}
}
+// isPodGangInitialized checks if a PodGang has Initialized condition set to True.
+func isPodGangInitialized(obj client.Object) bool {
+ podGang, ok := obj.(*groveschedulerv1alpha1.PodGang)
+ if !ok {
+ return false
+ }
+
+ // Check if Initialized condition is True
+ return meta.IsStatusConditionTrue(podGang.Status.Conditions, string(groveschedulerv1alpha1.PodGangConditionTypeInitialized))
+}
+
// isManagedPod checks if a Pod is managed by Grove and owned by a PodClique
func isManagedPod(obj client.Object) bool {
pod, ok := obj.(*corev1.Pod)
diff --git a/operator/internal/controller/podcliqueset/components/podgang/podgang.go b/operator/internal/controller/podcliqueset/components/podgang/podgang.go
index b5379223c..aad897f1c 100644
--- a/operator/internal/controller/podcliqueset/components/podgang/podgang.go
+++ b/operator/internal/controller/podcliqueset/components/podgang/podgang.go
@@ -27,6 +27,7 @@ import (
"github.com/ai-dynamo/grove/operator/internal/controller/common/component"
componentutils "github.com/ai-dynamo/grove/operator/internal/controller/common/component/utils"
groveerr "github.com/ai-dynamo/grove/operator/internal/errors"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
k8sutils "github.com/ai-dynamo/grove/operator/internal/utils/kubernetes"
groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
@@ -50,6 +51,7 @@ const (
errCodeSetControllerReference grovecorev1alpha1.ErrorCode = "ERR_SET_CONTROLLER_REFERENCE"
errCodeCreateOrPatchPodGang grovecorev1alpha1.ErrorCode = "ERR_CREATE_OR_PATCH_PODGANG"
errCodeGetClusterTopologyLevels grovecorev1alpha1.ErrorCode = "ERR_GET_CLUSTER_TOPOLOGY_LEVELS"
+ errCodeUpdatePodGang grovecorev1alpha1.ErrorCode = "ERR_UPDATE_PODGANG_WITH_POD_REFS"
)
type _resource struct {
@@ -89,6 +91,7 @@ func (r _resource) GetExistingResourceNames(ctx context.Context, logger logr.Log
}
// Sync creates, updates, or deletes PodGang resources to match the desired state.
+// NEW FLOW: PodGangs are created with empty podReferences before Pods are created.
func (r _resource) Sync(ctx context.Context, logger logr.Logger, pcs *grovecorev1alpha1.PodCliqueSet) error {
logger.Info("Syncing PodGang resources")
sc, err := r.prepareSyncFlow(ctx, logger, pcs)
@@ -99,12 +102,6 @@ func (r _resource) Sync(ctx context.Context, logger logr.Logger, pcs *grovecorev
if result.hasErrors() {
return result.getAggregatedError()
}
- if result.hasPodGangsPendingCreation() {
- return groveerr.New(groveerr.ErrCodeRequeueAfter,
- component.OperationSync,
- fmt.Sprintf("PodGangs pending creation: %v", result.podsGangsPendingCreation),
- )
- }
return nil
}
@@ -128,6 +125,13 @@ func (r _resource) Delete(ctx context.Context, logger logr.Logger, pcsObjectMeta
// buildResource configures a PodGang with pod groups and priority.
func (r _resource) buildResource(pcs *grovecorev1alpha1.PodCliqueSet, pgi *podGangInfo, pg *groveschedulerv1alpha1.PodGang) error {
pg.Labels = getLabels(pcs.Name)
+ // Set scheduler name so the podgang controller can resolve the correct backend
+ if schedName := getSchedulerNameForPCS(pcs); schedName != "" {
+ if pg.Labels == nil {
+ pg.Labels = make(map[string]string)
+ }
+ pg.Labels[apicommon.LabelSchedulerName] = schedName
+ }
if r.tasConfig.Enabled {
if pg.Annotations == nil {
pg.Annotations = make(map[string]string)
@@ -142,13 +146,58 @@ func (r _resource) buildResource(pcs *grovecorev1alpha1.PodCliqueSet, pgi *podGa
fmt.Sprintf("failed to set the controller reference on PodGang %s to PodCliqueSet %v", pgi.fqn, client.ObjectKeyFromObject(pcs)),
)
}
- pg.Spec.PodGroups = createPodGroupsForPodGang(pg.Namespace, pgi)
pg.Spec.PriorityClassName = pcs.Spec.Template.PriorityClassName
pg.Spec.TopologyConstraint = pgi.topologyConstraint
pg.Spec.TopologyConstraintGroupConfigs = pgi.pcsgTopologyConstraints
+
+ // Only create PodGroups if they don't exist yet (initial creation)
+ // Once populated, we preserve existing podReferences to avoid clearing them on subsequent reconciles
+ if len(pg.Spec.PodGroups) == 0 {
+ // Create PodGroups with EMPTY podReferences initially
+ pg.Spec.PodGroups = createEmptyPodGroupsForPodGang(*pgi)
+ } else {
+ // PodGroups already exist - preserve them but update MinReplicas and TopologyConstraint if needed
+ expectedPodGroups := make(map[string]struct {
+ minAvailable int32
+ topologyConstraint *groveschedulerv1alpha1.TopologyConstraint
+ })
+ for _, pclq := range pgi.pclqs {
+ expectedPodGroups[pclq.fqn] = struct {
+ minAvailable int32
+ topologyConstraint *groveschedulerv1alpha1.TopologyConstraint
+ }{
+ minAvailable: pclq.minAvailable,
+ topologyConstraint: pclq.topologyConstraint,
+ }
+ }
+
+ // Update MinReplicas and TopologyConstraint for existing PodGroups
+ for i := range pg.Spec.PodGroups {
+ podGroup := &pg.Spec.PodGroups[i]
+ if expectedPG, ok := expectedPodGroups[podGroup.Name]; ok {
+ podGroup.MinReplicas = expectedPG.minAvailable
+ podGroup.TopologyConstraint = expectedPG.topologyConstraint
+ }
+ }
+ }
+
return nil
}
+// createEmptyPodGroupsForPodGang creates PodGroups with empty podReferences.
+// These will be populated later when pods are created.
+func createEmptyPodGroupsForPodGang(pgInfo podGangInfo) []groveschedulerv1alpha1.PodGroup {
+ podGroups := lo.Map(pgInfo.pclqs, func(pclq pclqInfo, _ int) groveschedulerv1alpha1.PodGroup {
+ return groveschedulerv1alpha1.PodGroup{
+ Name: pclq.fqn,
+ PodReferences: []groveschedulerv1alpha1.NamespacedName{},
+ MinReplicas: pclq.minAvailable,
+ TopologyConstraint: pclq.topologyConstraint,
+ }
+ })
+ return podGroups
+}
+
// getPodGangSelectorLabels returns labels for selecting all PodGangs of a PodCliqueSet.
func getPodGangSelectorLabels(pcsObjMeta metav1.ObjectMeta) map[string]string {
return lo.Assign(
@@ -176,3 +225,51 @@ func getLabels(pcsName string) map[string]string {
apicommon.LabelComponentKey: apicommon.LabelComponentNamePodGang,
})
}
+
+// getSchedulerNameForPCS returns the scheduler backend name for the PodCliqueSet:
+// the template's schedulerName if set (same across all cliques per validation), else the default backend.
+func getSchedulerNameForPCS(pcs *grovecorev1alpha1.PodCliqueSet) string {
+ for _, c := range pcs.Spec.Template.Cliques {
+ if c != nil && c.Spec.PodSpec.SchedulerName != "" {
+ return c.Spec.PodSpec.SchedulerName
+ }
+ }
+ if def := schedulerbackend.GetDefault(); def != nil {
+ return def.Name()
+ }
+ return ""
+}
+
+// setInitializedCondition sets or updates the PodGangInitialized condition on the PodGang status.
+func setInitializedCondition(pg *groveschedulerv1alpha1.PodGang, status metav1.ConditionStatus, reason, message string) {
+ condition := metav1.Condition{
+ Type: string(groveschedulerv1alpha1.PodGangConditionTypeInitialized),
+ Status: status,
+ ObservedGeneration: pg.Generation,
+ LastTransitionTime: metav1.Now(),
+ Reason: reason,
+ Message: message,
+ }
+
+ found := false
+ for i, cond := range pg.Status.Conditions {
+ if cond.Type == string(groveschedulerv1alpha1.PodGangConditionTypeInitialized) {
+ pg.Status.Conditions[i] = condition
+ found = true
+ break
+ }
+ }
+ if !found {
+ pg.Status.Conditions = append(pg.Status.Conditions, condition)
+ }
+}
+
+// hasInitializedCondition returns true if the PodGang has an Initialized condition.
+func hasInitializedCondition(pg *groveschedulerv1alpha1.PodGang) bool {
+ for _, cond := range pg.Status.Conditions {
+ if cond.Type == string(groveschedulerv1alpha1.PodGangConditionTypeInitialized) {
+ return true
+ }
+ }
+ return false
+}
diff --git a/operator/internal/controller/podcliqueset/components/podgang/podgang_test.go b/operator/internal/controller/podcliqueset/components/podgang/podgang_test.go
new file mode 100644
index 000000000..23988408a
--- /dev/null
+++ b/operator/internal/controller/podcliqueset/components/podgang/podgang_test.go
@@ -0,0 +1,54 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package podgang
+
+import (
+ "testing"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestSetInitializedCondition(t *testing.T) {
+ pg := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{Name: "pg-1", Namespace: "default", Generation: 1},
+ }
+ setInitializedCondition(pg, metav1.ConditionFalse, "PodsPending", "waiting")
+ require.Len(t, pg.Status.Conditions, 1)
+ assert.Equal(t, string(groveschedulerv1alpha1.PodGangConditionTypeInitialized), pg.Status.Conditions[0].Type)
+ assert.Equal(t, metav1.ConditionFalse, pg.Status.Conditions[0].Status)
+ assert.Equal(t, "PodsPending", pg.Status.Conditions[0].Reason)
+ assert.Equal(t, "waiting", pg.Status.Conditions[0].Message)
+
+ // Update existing condition to ready
+ setInitializedCondition(pg, metav1.ConditionTrue, "Ready", "all ready")
+ require.Len(t, pg.Status.Conditions, 1)
+ assert.Equal(t, metav1.ConditionTrue, pg.Status.Conditions[0].Status)
+ assert.Equal(t, "Ready", pg.Status.Conditions[0].Reason)
+}
+
+func TestHasInitializedCondition(t *testing.T) {
+ pg := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{Name: "pg-1", Namespace: "default"},
+ }
+ assert.False(t, hasInitializedCondition(pg))
+
+ setInitializedCondition(pg, metav1.ConditionFalse, "PodsPending", "waiting")
+ assert.True(t, hasInitializedCondition(pg))
+}
diff --git a/operator/internal/controller/podcliqueset/components/podgang/syncflow.go b/operator/internal/controller/podcliqueset/components/podgang/syncflow.go
index 85d25379d..b07decfd4 100644
--- a/operator/internal/controller/podcliqueset/components/podgang/syncflow.go
+++ b/operator/internal/controller/podcliqueset/components/podgang/syncflow.go
@@ -462,20 +462,15 @@ func (r _resource) deleteExcessPodGangs(sc *syncContext) error {
return nil
}
-// createOrUpdatePodGangs creates or updates all expected PodGangs when ready.
+// createOrUpdatePodGangs creates or updates all expected PodGangs.
+// PodGangs are created with empty podReferences, Initialized=False.
+// Once all pods are created, PodReferences are populated and the PodGang is marked as Initialized=True.
func (r _resource) createOrUpdatePodGangs(sc *syncContext) syncFlowResult {
result := syncFlowResult{}
- pendingPodGangNames := sc.getPodGangNamesPendingCreation()
+
+ // Step 1: Create or update all expected PodGangs with basic structure
for _, podGang := range sc.expectedPodGangs {
sc.logger.Info("[createOrUpdatePodGangs] processing PodGang", "fqn", podGang.fqn)
- isPodGangPendingCreation := slices.Contains(pendingPodGangNames, podGang.fqn)
- // check the health of each podclique
- numPendingPods := r.getPodsPendingCreationOrAssociation(sc, podGang)
- if isPodGangPendingCreation && numPendingPods > 0 {
- sc.logger.Info("skipping creation of PodGang as all desired replicas have not yet been created or assigned", "fqn", podGang.fqn, "numPendingPodsToCreateOrAssociate", numPendingPods)
- result.recordPodGangPendingCreation(podGang.fqn)
- continue
- }
if err := r.createOrUpdatePodGang(sc, podGang); err != nil {
sc.logger.Error(err, "failed to create PodGang", "PodGangName", podGang.fqn)
result.recordError(err)
@@ -483,9 +478,189 @@ func (r _resource) createOrUpdatePodGangs(sc *syncContext) syncFlowResult {
}
result.recordPodGangCreation(podGang.fqn)
}
+
+ // Step 2: For existing PodGangs, try to update PodReferences if all pods are created
+ // Skip newly created PodGangs as their pods won't be ready yet
+ for _, podGangName := range sc.existingPodGangNames {
+ if err := r.updatePodGangWithPodReferences(sc, podGangName); err != nil {
+ // Check if this is a "waiting for pods" error
+ var groveErr *groveerr.GroveError
+ if errors.As(err, &groveErr) && groveErr.Code == groveerr.ErrCodeRequeueAfter {
+ // Expected error: pods not ready yet, record but continue with other PodGangs
+ sc.logger.Info("PodGang waiting for pods to be created, will retry in next reconcile",
+ "podGang", podGangName)
+ result.recordError(err)
+ } else {
+ // Unexpected error: log and record, but continue with other PodGangs
+ sc.logger.Error(err, "Failed to update PodGang with pod references",
+ "podGang", podGangName)
+ result.recordError(err)
+ }
+ }
+ }
+
return result
}
+// updatePodGangWithPodReferences updates a PodGang with pod references and sets Initialized condition.
+func (r _resource) updatePodGangWithPodReferences(sc *syncContext, podGangName string) error {
+ // Find the podGangInfo from expectedPodGangs
+ podGangInfo, found := r.findPodGangInfo(sc, podGangName)
+ if !found {
+ return nil
+ }
+
+ // Verify all pods are created before proceeding
+ if err := r.verifyAllPodsCreated(sc, podGangName, podGangInfo); err != nil {
+ return err
+ }
+
+ // Update pod references using Patch (no need to fetch from API server!)
+ if err := r.patchPodGangWithPodReferences(sc, podGangName, podGangInfo); err != nil {
+ return err
+ }
+
+ // Update status to set Initialized=True (idempotent - no need to check current state)
+ if err := r.patchPodGangInitializedStatus(sc, podGangName, metav1.ConditionTrue, "Ready", "PodGang is fully initialized"); err != nil {
+ return err
+ }
+ return nil
+}
+
+// patchPodGangInitializedStatus patches the Initialized condition with the given status.
+func (r _resource) patchPodGangInitializedStatus(sc *syncContext, podGangName string, status metav1.ConditionStatus, reason, message string) error {
+ // Create a PodGang object with only the status we want to patch
+ statusPatch := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: podGangName,
+ Namespace: sc.pcs.Namespace,
+ },
+ }
+
+ setInitializedCondition(statusPatch, status, reason, message)
+ statusPatch.Status.Phase = groveschedulerv1alpha1.PodGangPhasePending
+
+ if err := r.client.Status().Patch(sc.ctx, statusPatch, client.Merge); err != nil {
+ return err
+ }
+
+ sc.logger.Info("Successfully patched PodGang Initialized condition",
+ "podGang", podGangName, "status", status)
+ return nil
+}
+
+// patchPodGangWithPodReferences uses strategic merge patch to update pod references
+func (r _resource) patchPodGangWithPodReferences(sc *syncContext, podGangName string, podGangInfo *podGangInfo) error {
+ // Build PodGroups with pod references from syncContext
+ podGroups := r.buildPodGroupsFromContext(sc, podGangInfo)
+
+ // Create patch object
+ patchPodGang := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: podGangName,
+ Namespace: sc.pcs.Namespace,
+ },
+ Spec: groveschedulerv1alpha1.PodGangSpec{
+ PodGroups: podGroups,
+ },
+ }
+
+ // Apply patch
+ if err := r.client.Patch(sc.ctx, patchPodGang, client.Merge); err != nil {
+ return groveerr.WrapError(err,
+ errCodeCreateOrPatchPodGang,
+ component.OperationSync,
+ fmt.Sprintf("Failed to patch PodGang %s with pod references", podGangName),
+ )
+ }
+
+ sc.logger.Info("Successfully patched PodGang with pod references",
+ "podGang", podGangName,
+ "numPodGroups", len(podGroups))
+ return nil
+}
+
+// buildPodGroupsFromContext constructs PodGroups with pod references from syncContext data
+func (r _resource) buildPodGroupsFromContext(sc *syncContext, podGangInfo *podGangInfo) []groveschedulerv1alpha1.PodGroup {
+ podsByGroup := r.groupPodsByPodClique(sc, podGangInfo)
+
+ podGroups := make([]groveschedulerv1alpha1.PodGroup, 0, len(podGangInfo.pclqs))
+ for _, pclqInfo := range podGangInfo.pclqs {
+ pods := podsByGroup[pclqInfo.fqn]
+
+ // Build podReferences list
+ podReferences := make([]groveschedulerv1alpha1.NamespacedName, 0, len(pods))
+ for _, pod := range pods {
+ podReferences = append(podReferences, groveschedulerv1alpha1.NamespacedName{
+ Namespace: pod.Namespace,
+ Name: pod.Name,
+ })
+ }
+
+ // Sort for consistency
+ // TODO: Consider not trying to sort the podReferences here
+ sort.Slice(podReferences, func(i, j int) bool {
+ return podReferences[i].Name < podReferences[j].Name
+ })
+
+ podGroups = append(podGroups, groveschedulerv1alpha1.PodGroup{
+ Name: pclqInfo.fqn,
+ PodReferences: podReferences,
+ MinReplicas: pclqInfo.minAvailable,
+ TopologyConstraint: pclqInfo.topologyConstraint, // Preserve PodClique-level topology constraint
+ })
+ }
+
+ return podGroups
+}
+
+// findPodGangInfo locates the podGangInfo from expectedPodGangs
+func (r _resource) findPodGangInfo(sc *syncContext, podGangName string) (*podGangInfo, bool) {
+ podGangInfo, found := lo.Find(sc.expectedPodGangs, func(pg *podGangInfo) bool {
+ return pg.fqn == podGangName
+ })
+ if !found {
+ sc.logger.Info("PodGang not found in expectedPodGangs, skipping update",
+ "podGang", podGangName)
+ return nil, false
+ }
+ return podGangInfo, true
+}
+
+// verifyAllPodsCreated checks if all required pods exist before updating PodGang
+func (r _resource) verifyAllPodsCreated(sc *syncContext, podGangName string, podGangInfo *podGangInfo) error {
+ pclqs := sc.getPodCliques(podGangInfo)
+ if len(pclqs) != len(podGangInfo.pclqs) {
+ // Not all constituent PCLQs exist yet
+ sc.logger.Info("Not all constituent PCLQs exist yet", "podGang", podGangName, "expected", len(podGangInfo.pclqs), "actual", len(pclqs))
+ return groveerr.New(groveerr.ErrCodeRequeueAfter,
+ component.OperationSync,
+ fmt.Sprintf("Waiting for all pods to be created for PodGang %s", podGangName),
+ )
+ }
+ // check the health of each podclique
+ numPendingPods := r.getPodsPendingCreationOrAssociation(sc, podGangInfo)
+ if numPendingPods > 0 {
+ sc.logger.Info("skipping creation of PodGang as all desired replicas have not yet been created or assigned", "podGang", podGangName, "numPendingPodsToCreateOrAssociate", numPendingPods)
+ return groveerr.New(groveerr.ErrCodeRequeueAfter,
+ component.OperationSync,
+ fmt.Sprintf("Waiting for all pods to be created or assigned for PodGang %s", podGangName),
+ )
+ }
+ return nil
+}
+
+// groupPodsByPodClique organizes pods by their PodClique names
+func (r _resource) groupPodsByPodClique(sc *syncContext, podGangInfo *podGangInfo) map[string][]corev1.Pod {
+ podsByGroup := make(map[string][]corev1.Pod)
+ for _, pclqInfo := range podGangInfo.pclqs {
+ if pods, ok := sc.existingPCLQPods[pclqInfo.fqn]; ok {
+ podsByGroup[pclqInfo.fqn] = pods
+ }
+ }
+ return podsByGroup
+}
+
// getPodsForPodCliquesPendingCreation counts expected pods from non-existent PodCliques.
func (r _resource) getPodsForPodCliquesPendingCreation(sc *syncContext, podGang *podGangInfo) int {
existingPCLQNames := lo.Map(sc.existingPCLQs, func(pclq grovecorev1alpha1.PodClique, _ int) string {
@@ -551,35 +726,20 @@ func (r _resource) createOrUpdatePodGang(sc *syncContext, pgInfo *podGangInfo) e
fmt.Sprintf("Failed to CreateOrPatch PodGang %v", pgObjectKey),
)
}
+
+ // Update status with Initialized=False condition and Phase if not already set
+ // This needs to be done separately since CreateOrPatch doesn't handle status subresource
+ if !hasInitializedCondition(pg) {
+ if err := r.patchPodGangInitializedStatus(sc, pg.Name, metav1.ConditionFalse, "PodsPending", "Not all constituent pods have been created yet"); err != nil {
+ return err
+ }
+ }
+
r.eventRecorder.Eventf(sc.pcs, corev1.EventTypeNormal, constants.ReasonPodGangCreateOrUpdateSuccessful, "Created/Updated PodGang %v", pgObjectKey)
sc.logger.Info("Triggered CreateOrPatch of PodGang", "objectKey", pgObjectKey)
return nil
}
-// createPodGroupsForPodGang constructs PodGroups from constituent PodCliques.
-func createPodGroupsForPodGang(namespace string, pgInfo *podGangInfo) []groveschedulerv1alpha1.PodGroup {
- podGroups := lo.Map(pgInfo.pclqs, func(pi pclqInfo, _ int) groveschedulerv1alpha1.PodGroup {
- namespacedNames := lo.Map(pi.associatedPodNames, func(associatedPodName string, _ int) groveschedulerv1alpha1.NamespacedName {
- return groveschedulerv1alpha1.NamespacedName{
- Namespace: namespace,
- Name: associatedPodName,
- }
- })
- // sorting the slice of NamespaceName. This prevents unnecessary updates to the PodGang resource if the only thing
- // that is difference is the order of NamespaceNames.
- sort.Slice(namespacedNames, func(i, j int) bool {
- return namespacedNames[i].Name < namespacedNames[j].Name
- })
- return groveschedulerv1alpha1.PodGroup{
- Name: pi.fqn,
- PodReferences: namespacedNames,
- MinReplicas: pi.minAvailable,
- TopologyConstraint: pi.topologyConstraint,
- }
- })
- return podGroups
-}
-
// Convenience types and methods on these types that are used during sync flow run.
// ------------------------------------------------------------------------------------------------
@@ -656,9 +816,6 @@ func (sc *syncContext) determinePCSGReplicas(pcsgFQN string, pcsgConfig grovecor
// syncFlowResult captures the result of a sync flow run.
type syncFlowResult struct {
- // podsGangsPendingCreation are the names of PodGangs that could not be created in this sync run.
- // It could be due to all PCLQs not present, or it could be due to presence of at least one PCLQ that is not ready.
- podsGangsPendingCreation []string
// createdPodGangNames are the names of the PodGangs that got created during the sync flow run.
createdPodGangNames []string
// errs are the list of errors during the sync flow run.
@@ -675,21 +832,11 @@ func (sfr *syncFlowResult) recordError(err error) {
sfr.errs = append(sfr.errs, err)
}
-// hasPodGangsPendingCreation returns true if any PodGangs are waiting to be created.
-func (sfr *syncFlowResult) hasPodGangsPendingCreation() bool {
- return len(sfr.podsGangsPendingCreation) > 0
-}
-
// recordPodGangCreation adds a PodGang to the created list.
func (sfr *syncFlowResult) recordPodGangCreation(podGangName string) {
sfr.createdPodGangNames = append(sfr.createdPodGangNames, podGangName)
}
-// recordPodGangPendingCreation adds a PodGang to the pending creation list.
-func (sfr *syncFlowResult) recordPodGangPendingCreation(podGangName string) {
- sfr.podsGangsPendingCreation = append(sfr.podsGangsPendingCreation, podGangName)
-}
-
// getAggregatedError combines all errors into a single error.
func (sfr *syncFlowResult) getAggregatedError() error {
return errors.Join(sfr.errs...)
diff --git a/operator/internal/controller/podcliqueset/components/podgang/syncflow_test.go b/operator/internal/controller/podcliqueset/components/podgang/syncflow_test.go
index 8275c2c61..d95c1dc44 100644
--- a/operator/internal/controller/podcliqueset/components/podgang/syncflow_test.go
+++ b/operator/internal/controller/podcliqueset/components/podgang/syncflow_test.go
@@ -18,19 +18,26 @@ package podgang
import (
"context"
+ "errors"
"slices"
"testing"
apicommon "github.com/ai-dynamo/grove/operator/api/common"
grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+ groveclientscheme "github.com/ai-dynamo/grove/operator/internal/client"
+ groveerr "github.com/ai-dynamo/grove/operator/internal/errors"
testutils "github.com/ai-dynamo/grove/operator/test/utils"
groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
"github.com/samber/lo"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
+ v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
+ "sigs.k8s.io/controller-runtime/pkg/client"
ctrllogger "sigs.k8s.io/controller-runtime/pkg/log"
)
@@ -200,6 +207,95 @@ func TestMinAvailableWithHPAScaling(t *testing.T) {
}
}
+// TestVerifyAllPodsCreated tests verifyAllPodsCreated with minimal sc + podGangInfo (no PCS/prepareSyncFlow).
+// It covers both the PCLQ existence check and getPodsPendingCreationOrAssociation logic (Replicas and podgang label).
+func TestVerifyAllPodsCreated(t *testing.T) {
+ makePod := func(name string, podGangLabel string) v1.Pod {
+ pod := v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "default"}}
+ if podGangLabel != "" {
+ pod.Labels = map[string]string{apicommon.LabelPodGang: podGangLabel}
+ }
+ return pod
+ }
+ makePCLQ := func(name string, replicas, minAvailable int32) grovecorev1alpha1.PodClique {
+ return grovecorev1alpha1.PodClique{
+ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: "default"},
+ Spec: grovecorev1alpha1.PodCliqueSpec{Replicas: replicas, MinAvailable: ptr.To(minAvailable)},
+ }
+ }
+
+ tests := []struct {
+ name string
+ existingPods map[string][]v1.Pod
+ existingPCLQs []grovecorev1alpha1.PodClique
+ podGang *podGangInfo
+ wantRequeue bool
+ }{
+ {
+ name: "requeue when not all constituent PCLQs exist yet",
+ existingPods: map[string][]v1.Pod{"pclq-a": {makePod("a1", "pg-1")}},
+ existingPCLQs: []grovecorev1alpha1.PodClique{makePCLQ("pclq-a", 1, 1)},
+ podGang: &podGangInfo{fqn: "pg-1", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 1, minAvailable: 1}, {fqn: "pclq-b", replicas: 1, minAvailable: 1}}},
+ wantRequeue: true,
+ },
+ {
+ name: "requeue when PCLQ has fewer pods than Replicas (even if >= MinAvailable)",
+ existingPods: map[string][]v1.Pod{
+ "pclq-a": {makePod("a1", "pg-1"), makePod("a2", "pg-1")}, // 2 pods, Replicas=5, MinAvailable=2
+ },
+ existingPCLQs: []grovecorev1alpha1.PodClique{makePCLQ("pclq-a", 5, 2)},
+ podGang: &podGangInfo{fqn: "pg-1", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 5, minAvailable: 2}}},
+ wantRequeue: true, // Still pending: 5-2=3 pods to create
+ },
+ {
+ name: "requeue when Pod missing podgang label",
+ existingPods: map[string][]v1.Pod{
+ "pclq-a": {makePod("a1", ""), makePod("a2", "pg-1")}, // a1 missing label
+ },
+ existingPCLQs: []grovecorev1alpha1.PodClique{makePCLQ("pclq-a", 2, 1)},
+ podGang: &podGangInfo{fqn: "pg-1", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 2, minAvailable: 1}}},
+ wantRequeue: true, // a1 needs association
+ },
+ {
+ name: "requeue when Pod has wrong podgang label",
+ existingPods: map[string][]v1.Pod{
+ "pclq-a": {makePod("a1", "pg-wrong"), makePod("a2", "pg-1")},
+ },
+ existingPCLQs: []grovecorev1alpha1.PodClique{makePCLQ("pclq-a", 2, 1)},
+ podGang: &podGangInfo{fqn: "pg-1", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 2, minAvailable: 1}}},
+ wantRequeue: true, // a1 has wrong label
+ },
+ {
+ name: "success when all Replicas created and all pods have correct podgang label",
+ existingPods: map[string][]v1.Pod{
+ "pclq-a": {makePod("a1", "pg-1"), makePod("a2", "pg-1"), makePod("a3", "pg-1"), makePod("a4", "pg-1"), makePod("a5", "pg-1")},
+ },
+ existingPCLQs: []grovecorev1alpha1.PodClique{makePCLQ("pclq-a", 5, 2)},
+ podGang: &podGangInfo{fqn: "pg-1", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 5, minAvailable: 2}}},
+ wantRequeue: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ sc := &syncContext{
+ logger: ctrllogger.FromContext(context.Background()).WithName("test"),
+ existingPCLQPods: tt.existingPods,
+ existingPCLQs: tt.existingPCLQs,
+ }
+ r := &_resource{}
+ err := r.verifyAllPodsCreated(sc, tt.podGang.fqn, tt.podGang)
+ if tt.wantRequeue {
+ require.Error(t, err)
+ var groveErr *groveerr.GroveError
+ require.True(t, errors.As(err, &groveErr))
+ assert.Equal(t, groveerr.ErrCodeRequeueAfter, groveErr.Code)
+ } else {
+ require.NoError(t, err)
+ }
+ })
+ }
+}
+
// This test checks the accounting of the number of pending pods before creating a PodGang
func TestGetPodsPendingCreation(t *testing.T) {
tests := []struct {
@@ -311,6 +407,183 @@ func TestGetPodsPendingCreation(t *testing.T) {
}
}
+// TestUpdatePodGangWithPodReferences unit tests updatePodGangWithPodReferences.
+func TestUpdatePodGangWithPodReferences(t *testing.T) {
+ ctx := context.Background()
+ logger := ctrllogger.FromContext(ctx).WithName("test")
+
+ t.Run("returns nil when PodGang not in expectedPodGangs", func(t *testing.T) {
+ sc := &syncContext{logger: logger, expectedPodGangs: []*podGangInfo{{fqn: "pg-a"}}}
+ r := &_resource{}
+ err := r.updatePodGangWithPodReferences(sc, "pg-other")
+ require.NoError(t, err)
+ })
+
+ t.Run("returns requeue error when verifyAllPodsCreated fails", func(t *testing.T) {
+ sc := &syncContext{
+ logger: logger,
+ pcs: &grovecorev1alpha1.PodCliqueSet{ObjectMeta: metav1.ObjectMeta{Namespace: "default"}},
+ expectedPodGangs: []*podGangInfo{{fqn: "pg-a", pclqs: []pclqInfo{{fqn: "pclq-a", minAvailable: 1}}}},
+ existingPCLQPods: map[string][]v1.Pod{},
+ existingPCLQs: []grovecorev1alpha1.PodClique{},
+ }
+ r := &_resource{}
+ err := r.updatePodGangWithPodReferences(sc, "pg-a")
+ require.Error(t, err)
+ var groveErr *groveerr.GroveError
+ require.True(t, errors.As(err, &groveErr))
+ assert.Equal(t, groveerr.ErrCodeRequeueAfter, groveErr.Code)
+ })
+
+ t.Run("patches PodReferences and Initialized when all pods ready", func(t *testing.T) {
+ ns := "default"
+ pcs := &grovecorev1alpha1.PodCliqueSet{ObjectMeta: metav1.ObjectMeta{Name: "pcs", Namespace: ns}}
+ pclq := &grovecorev1alpha1.PodClique{
+ ObjectMeta: metav1.ObjectMeta{Name: "pclq-a", Namespace: ns},
+ Spec: grovecorev1alpha1.PodCliqueSpec{Replicas: 1, MinAvailable: ptr.To(int32(1))},
+ }
+ pgExisting := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{Name: "pg-a", Namespace: ns},
+ Spec: groveschedulerv1alpha1.PodGangSpec{},
+ }
+ // Pod must have podgang label to pass verifyAllPodsCreated
+ pod := v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "pod-1",
+ Namespace: ns,
+ Labels: map[string]string{apicommon.LabelPodGang: "pg-a"},
+ },
+ }
+ fakeClient := testutils.NewTestClientBuilder().
+ WithObjects(pcs, pclq, pgExisting).
+ WithStatusSubresource(&groveschedulerv1alpha1.PodGang{}).
+ Build()
+ sc := &syncContext{
+ ctx: ctx,
+ logger: logger,
+ pcs: pcs,
+ expectedPodGangs: []*podGangInfo{{fqn: "pg-a", pclqs: []pclqInfo{{fqn: "pclq-a", replicas: 1, minAvailable: 1}}}},
+ existingPodGangNames: []string{"pg-a"},
+ existingPCLQPods: map[string][]v1.Pod{"pclq-a": {pod}},
+ existingPCLQs: []grovecorev1alpha1.PodClique{*pclq},
+ }
+ r := &_resource{client: fakeClient}
+ err := r.updatePodGangWithPodReferences(sc, "pg-a")
+ require.NoError(t, err)
+ pgAfter := &groveschedulerv1alpha1.PodGang{}
+ require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Namespace: ns, Name: "pg-a"}, pgAfter))
+ require.Len(t, pgAfter.Spec.PodGroups, 1)
+ assert.Equal(t, "pclq-a", pgAfter.Spec.PodGroups[0].Name)
+ assert.Equal(t, []groveschedulerv1alpha1.NamespacedName{{Namespace: ns, Name: "pod-1"}}, pgAfter.Spec.PodGroups[0].PodReferences)
+ if len(pgAfter.Status.Conditions) > 0 {
+ assert.True(t, lo.ContainsBy(pgAfter.Status.Conditions, func(c metav1.Condition) bool {
+ return c.Type == string(groveschedulerv1alpha1.PodGangConditionTypeInitialized) && c.Status == metav1.ConditionTrue
+ }))
+ }
+ })
+}
+
+// TestCreateOrUpdatePodGangs tests the new flow: create PodGangs first, then update PodReferences when all pods are ready.
+func TestCreateOrUpdatePodGangs(t *testing.T) {
+ ctx := context.Background()
+ ns := "default"
+ pcsName := "test-pcs"
+ pcsLabels := apicommon.GetDefaultLabelsForPodCliqueSetManagedResources(pcsName)
+ pcs := &grovecorev1alpha1.PodCliqueSet{
+ ObjectMeta: metav1.ObjectMeta{Name: pcsName, Namespace: ns, UID: "pcs-uid"},
+ Spec: grovecorev1alpha1.PodCliqueSetSpec{
+ Replicas: 1,
+ Template: grovecorev1alpha1.PodCliqueSetTemplateSpec{
+ Cliques: []*grovecorev1alpha1.PodCliqueTemplateSpec{
+ {Name: "worker", Spec: grovecorev1alpha1.PodCliqueSpec{Replicas: 2, MinAvailable: ptr.To(int32(1))}},
+ },
+ },
+ },
+ }
+ pclqName := "test-pcs-0-worker"
+ pclq := &grovecorev1alpha1.PodClique{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pclqName, Namespace: ns, UID: types.UID("pclq-uid"),
+ Labels: pcsLabels,
+ OwnerReferences: []metav1.OwnerReference{{Name: pcsName, UID: pcs.UID, Controller: ptr.To(true)}},
+ },
+ Spec: grovecorev1alpha1.PodCliqueSpec{Replicas: 2, MinAvailable: ptr.To(int32(1))},
+ }
+ pgLabels := lo.Assign(pcsLabels, map[string]string{apicommon.LabelComponentKey: apicommon.LabelComponentNamePodGang})
+ pgCreated := &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pcs-0", Namespace: ns,
+ Labels: pgLabels,
+ OwnerReferences: []metav1.OwnerReference{{APIVersion: "grove.io/v1alpha1", Kind: "PodCliqueSet", Name: pcsName, UID: pcs.UID, Controller: ptr.To(true)}},
+ },
+ Spec: groveschedulerv1alpha1.PodGangSpec{},
+ }
+ pod1 := &v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "worker-0", Namespace: ns,
+ Labels: lo.Assign(pcsLabels, map[string]string{apicommon.LabelPodGang: "test-pcs-0"}),
+ OwnerReferences: []metav1.OwnerReference{{Name: pclqName, UID: pclq.UID, Controller: ptr.To(true)}},
+ },
+ }
+ pod2 := &v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "worker-1", Namespace: ns,
+ Labels: lo.Assign(pcsLabels, map[string]string{apicommon.LabelPodGang: "test-pcs-0"}),
+ OwnerReferences: []metav1.OwnerReference{{Name: pclqName, UID: pclq.UID, Controller: ptr.To(true)}},
+ },
+ }
+
+ t.Run("creates PodGang when not present (Step 1 loop)", func(t *testing.T) {
+ // No PodGang in cluster: Step 1 must create it via createOrUpdatePodGang.
+ fakeClient := testutils.NewTestClientBuilder().
+ WithObjects(pcs, pclq).
+ WithStatusSubresource(&groveschedulerv1alpha1.PodGang{}).
+ Build()
+ r := &_resource{client: fakeClient, scheme: groveclientscheme.Scheme, eventRecorder: &record.FakeRecorder{}}
+ sc, err := r.prepareSyncFlow(ctx, ctrllogger.FromContext(ctx).WithName("test"), pcs)
+ require.NoError(t, err)
+ require.Len(t, sc.expectedPodGangs, 1, "expected one PodGang to create")
+ require.Empty(t, sc.existingPodGangNames, "PodGang should not exist yet")
+
+ result := r.createOrUpdatePodGangs(sc)
+ require.False(t, result.hasErrors(), "createOrUpdatePodGangs should not fail: %v", result.errs)
+ require.Len(t, result.createdPodGangNames, 1, "Step 1 loop should have recorded one creation")
+ assert.Equal(t, "test-pcs-0", result.createdPodGangNames[0])
+
+ pgAfter := &groveschedulerv1alpha1.PodGang{}
+ require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Namespace: ns, Name: "test-pcs-0"}, pgAfter),
+ "PodGang should exist after Step 1 create")
+ assert.Equal(t, pcsName, pgAfter.OwnerReferences[0].Name)
+ })
+
+ t.Run("updates existing PodGang and fills PodReferences (Step 1 + Step 2)", func(t *testing.T) {
+ fakeClient := testutils.NewTestClientBuilder().
+ WithObjects(pcs, pclq, pgCreated, pod1, pod2).
+ WithStatusSubresource(&groveschedulerv1alpha1.PodGang{}).
+ Build()
+ r := &_resource{client: fakeClient, scheme: groveclientscheme.Scheme, eventRecorder: &record.FakeRecorder{}}
+ sc, err := r.prepareSyncFlow(ctx, ctrllogger.FromContext(ctx).WithName("test"), pcs)
+ require.NoError(t, err)
+ require.Contains(t, sc.existingPodGangNames, "test-pcs-0")
+
+ result := r.createOrUpdatePodGangs(sc)
+ require.False(t, result.hasErrors(), "createOrUpdatePodGangs should not fail: %v", result.errs)
+ // Step 1 still runs and records (createOrUpdatePodGang does patch when exists)
+ require.Len(t, result.createdPodGangNames, 1)
+
+ pgAfter := &groveschedulerv1alpha1.PodGang{}
+ require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Namespace: ns, Name: "test-pcs-0"}, pgAfter))
+ require.Len(t, pgAfter.Spec.PodGroups, 1)
+ assert.Equal(t, "test-pcs-0-worker", pgAfter.Spec.PodGroups[0].Name)
+ assert.Len(t, pgAfter.Spec.PodGroups[0].PodReferences, 2)
+ if len(pgAfter.Status.Conditions) > 0 {
+ assert.True(t, lo.ContainsBy(pgAfter.Status.Conditions, func(c metav1.Condition) bool {
+ return c.Type == string(groveschedulerv1alpha1.PodGangConditionTypeInitialized) && c.Status == metav1.ConditionTrue
+ }))
+ }
+ })
+}
+
// TestComputeExpectedPodGangs tests the computeExpectedPodGangs function
func TestComputeExpectedPodGangs(t *testing.T) {
tests := []struct {
diff --git a/operator/internal/controller/podgang/reconciler.go b/operator/internal/controller/podgang/reconciler.go
new file mode 100644
index 000000000..bc67aaa81
--- /dev/null
+++ b/operator/internal/controller/podgang/reconciler.go
@@ -0,0 +1,87 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package podgang
+
+import (
+ "context"
+
+ apicommon "github.com/ai-dynamo/grove/operator/api/common"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ "k8s.io/apimachinery/pkg/runtime"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// Reconciler reconciles PodGang objects and converts them to scheduler-specific CRs
+type Reconciler struct {
+ client.Client
+ scheme *runtime.Scheme
+}
+
+// NewReconciler creates a new Reconciler. Backend is resolved per PodGang from the grove.io/scheduler-name label or default.
+func NewReconciler(mgr ctrl.Manager) *Reconciler {
+ return &Reconciler{
+ Client: mgr.GetClient(),
+ scheme: mgr.GetScheme(),
+ }
+}
+
+func resolveBackend(podGang *groveschedulerv1alpha1.PodGang) schedulerbackend.SchedBackend {
+ if name := podGang.Labels[apicommon.LabelSchedulerName]; name != "" {
+ if b := schedulerbackend.Get(name); b != nil {
+ return b
+ }
+ }
+ return schedulerbackend.GetDefault()
+}
+
+// Reconcile processes PodGang changes and synchronizes to backend-specific CRs
+func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ podGang := &groveschedulerv1alpha1.PodGang{}
+ if err := r.Get(ctx, req.NamespacedName, podGang); err != nil {
+ if client.IgnoreNotFound(err) != nil {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ backend := resolveBackend(podGang)
+ if backend == nil {
+ log.FromContext(ctx).Error(nil, "No scheduler backend available for PodGang", "podgang", req.NamespacedName)
+ return ctrl.Result{}, nil
+ }
+
+ logger := log.FromContext(ctx).WithValues("scheduler", backend.Name(), "podGang", req.NamespacedName)
+ if !podGang.DeletionTimestamp.IsZero() {
+ logger.Info("PodGang is being deleted")
+ if err := backend.OnPodGangDelete(ctx, podGang); err != nil {
+ logger.Error(err, "Failed to delete scheduler backend resources on-delete of PodGang")
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ if err := backend.SyncPodGang(ctx, podGang); err != nil {
+ logger.Error(err, "Failed to SyncPodGang on spec change")
+ return ctrl.Result{}, err
+ }
+ logger.Info("Successfully synced PodGang")
+ return ctrl.Result{}, nil
+}
diff --git a/operator/internal/controller/podgang/register.go b/operator/internal/controller/podgang/register.go
new file mode 100644
index 000000000..6243f3d6b
--- /dev/null
+++ b/operator/internal/controller/podgang/register.go
@@ -0,0 +1,54 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package podgang
+
+import (
+ grovectrlutils "github.com/ai-dynamo/grove/operator/internal/controller/utils"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/builder"
+ "sigs.k8s.io/controller-runtime/pkg/event"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
+)
+
+// RegisterWithManager registers the backend controller with the manager
+func (r *Reconciler) RegisterWithManager(mgr ctrl.Manager) error {
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&groveschedulerv1alpha1.PodGang{}, builder.WithPredicates(podGangSpecChangePredicate())).
+ Named("podgang").
+ Complete(r)
+}
+
+// podGangSpecChangePredicate filters PodGang events to only process spec changes
+// Status-only updates (like Initialized condition) are ignored
+func podGangSpecChangePredicate() predicate.Predicate {
+ return predicate.Funcs{
+ CreateFunc: func(e event.CreateEvent) bool {
+ return grovectrlutils.IsManagedPodGang(e.Object)
+ },
+ DeleteFunc: func(e event.DeleteEvent) bool {
+ return grovectrlutils.IsManagedPodGang(e.Object)
+ },
+ UpdateFunc: func(e event.UpdateEvent) bool {
+ return grovectrlutils.IsManagedPodGang(e.ObjectOld) &&
+ grovectrlutils.IsManagedPodGang(e.ObjectNew) &&
+ (e.ObjectOld.GetGeneration() != e.ObjectNew.GetGeneration())
+ },
+ GenericFunc: func(_ event.GenericEvent) bool { return false },
+ }
+}
diff --git a/operator/internal/controller/podgang/register_test.go b/operator/internal/controller/podgang/register_test.go
new file mode 100644
index 000000000..9d3fa8607
--- /dev/null
+++ b/operator/internal/controller/podgang/register_test.go
@@ -0,0 +1,135 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package podgang
+
+import (
+ "testing"
+
+ testutils "github.com/ai-dynamo/grove/operator/test/utils"
+
+ "github.com/stretchr/testify/assert"
+ "sigs.k8s.io/controller-runtime/pkg/event"
+)
+
+// predicateTestCase describes a scenario and expected predicate result per event type.
+type predicateTestCase struct {
+ name string
+ managedOld bool
+ managedNew bool
+ generationChanged bool
+ shouldAllowCreateEvent bool
+ shouldAllowDeleteEvent bool
+ shouldAllowGenericEvent bool
+ shouldAllowUpdateEvent bool
+}
+
+func TestPodGangSpecChangePredicate(t *testing.T) {
+ pred := podGangSpecChangePredicate()
+
+ tests := []predicateTestCase{
+ {
+ name: "managed PodGang create",
+ managedOld: true,
+ managedNew: true,
+ shouldAllowCreateEvent: true,
+ shouldAllowDeleteEvent: true,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false,
+ },
+ {
+ name: "unmanaged PodGang create",
+ managedOld: false,
+ managedNew: false,
+ shouldAllowCreateEvent: false,
+ shouldAllowDeleteEvent: false,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false,
+ },
+ {
+ name: "managed PodGang update with spec change (generation changed)",
+ managedOld: true,
+ managedNew: true,
+ generationChanged: true,
+ shouldAllowCreateEvent: true,
+ shouldAllowDeleteEvent: true,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: true,
+ },
+ {
+ name: "managed PodGang update with status-only change (generation unchanged)",
+ managedOld: true,
+ managedNew: true,
+ generationChanged: false,
+ shouldAllowCreateEvent: true,
+ shouldAllowDeleteEvent: true,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false,
+ },
+ {
+ name: "update with old managed and new unmanaged",
+ managedOld: true,
+ managedNew: false,
+ generationChanged: true,
+ shouldAllowCreateEvent: false, // Create/Delete use newPG which is unmanaged
+ shouldAllowDeleteEvent: false,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false,
+ },
+ {
+ name: "update with old unmanaged and new managed",
+ managedOld: false,
+ managedNew: true,
+ generationChanged: true,
+ shouldAllowCreateEvent: true, // Create/Delete use newPG which is managed
+ shouldAllowDeleteEvent: true,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false, // old is unmanaged
+ },
+ {
+ name: "generic event always rejected",
+ managedOld: true,
+ managedNew: true,
+ shouldAllowCreateEvent: true,
+ shouldAllowDeleteEvent: true,
+ shouldAllowGenericEvent: false,
+ shouldAllowUpdateEvent: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ oldPG := testutils.NewPodGangBuilder("test-pg", "default").
+ WithGeneration(1).
+ WithManaged(tc.managedOld).
+ WithPodGroup("pg0", 1).
+ Build()
+ newPG := testutils.NewPodGangBuilder("test-pg", "default").
+ WithGeneration(1).
+ WithManaged(tc.managedNew).
+ WithPodGroup("pg0", 1).
+ Build()
+ if tc.generationChanged {
+ newPG.SetGeneration(oldPG.GetGeneration() + 1)
+ }
+
+ assert.Equal(t, tc.shouldAllowCreateEvent, pred.Create(event.CreateEvent{Object: newPG}), "Create")
+ assert.Equal(t, tc.shouldAllowDeleteEvent, pred.Delete(event.DeleteEvent{Object: newPG}), "Delete")
+ assert.Equal(t, tc.shouldAllowGenericEvent, pred.Generic(event.GenericEvent{Object: newPG}), "Generic")
+ assert.Equal(t, tc.shouldAllowUpdateEvent, pred.Update(event.UpdateEvent{ObjectOld: oldPG, ObjectNew: newPG}), "Update")
+ })
+ }
+}
diff --git a/operator/internal/controller/register.go b/operator/internal/controller/register.go
index 5e613d80f..c8bcb817e 100644
--- a/operator/internal/controller/register.go
+++ b/operator/internal/controller/register.go
@@ -17,27 +17,39 @@
package controller
import (
+ "fmt"
+
configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
"github.com/ai-dynamo/grove/operator/internal/controller/podclique"
"github.com/ai-dynamo/grove/operator/internal/controller/podcliquescalinggroup"
"github.com/ai-dynamo/grove/operator/internal/controller/podcliqueset"
+ "github.com/ai-dynamo/grove/operator/internal/controller/podgang"
ctrl "sigs.k8s.io/controller-runtime"
)
// RegisterControllers registers all controllers with the manager.
-func RegisterControllers(mgr ctrl.Manager, controllerConfig configv1alpha1.ControllerConfiguration, topologyAwareSchedulingConfig configv1alpha1.TopologyAwareSchedulingConfiguration, networkConfig configv1alpha1.NetworkAcceleration) error {
- pcsReconciler := podcliqueset.NewReconciler(mgr, controllerConfig.PodCliqueSet, topologyAwareSchedulingConfig, networkConfig)
+func RegisterControllers(mgr ctrl.Manager, config *configv1alpha1.OperatorConfiguration) error {
+ if config == nil {
+ return fmt.Errorf("operator configuration must not be nil")
+ }
+ pcsReconciler := podcliqueset.NewReconciler(mgr, config.Controllers.PodCliqueSet, config.TopologyAwareScheduling, config.Network)
if err := pcsReconciler.RegisterWithManager(mgr); err != nil {
return err
}
- pcReconciler := podclique.NewReconciler(mgr, controllerConfig.PodClique)
+ pcReconciler := podclique.NewReconciler(mgr, config.Controllers.PodClique)
if err := pcReconciler.RegisterWithManager(mgr); err != nil {
return err
}
- pcsgReconciler := podcliquescalinggroup.NewReconciler(mgr, controllerConfig.PodCliqueScalingGroup)
+ pcsgReconciler := podcliquescalinggroup.NewReconciler(mgr, config.Controllers.PodCliqueScalingGroup)
if err := pcsgReconciler.RegisterWithManager(mgr); err != nil {
return err
}
+
+ podgangReconciler := podgang.NewReconciler(mgr)
+ if err := podgangReconciler.RegisterWithManager(mgr); err != nil {
+ return err
+ }
+
return nil
}
diff --git a/operator/internal/controller/register_test.go b/operator/internal/controller/register_test.go
index d647ba4c2..d42250471 100644
--- a/operator/internal/controller/register_test.go
+++ b/operator/internal/controller/register_test.go
@@ -49,40 +49,22 @@ func TestRegisterControllers(t *testing.T) {
mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
require.NoError(t, err)
- controllerConfig := configv1alpha1.ControllerConfiguration{
- PodCliqueSet: configv1alpha1.PodCliqueSetControllerConfiguration{
- ConcurrentSyncs: ptr.To(1),
- },
- PodClique: configv1alpha1.PodCliqueControllerConfiguration{
- ConcurrentSyncs: ptr.To(1),
- },
- PodCliqueScalingGroup: configv1alpha1.PodCliqueScalingGroupControllerConfiguration{
- ConcurrentSyncs: ptr.To(1),
- },
- }
-
- err = RegisterControllers(mgr, controllerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
- require.NoError(t, err)
- })
-
- // Test registration with different concurrency settings
- t.Run("registration with higher concurrency", func(t *testing.T) {
- mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
- require.NoError(t, err)
-
- controllerConfig := configv1alpha1.ControllerConfiguration{
- PodCliqueSet: configv1alpha1.PodCliqueSetControllerConfiguration{
- ConcurrentSyncs: ptr.To(5),
- },
- PodClique: configv1alpha1.PodCliqueControllerConfiguration{
- ConcurrentSyncs: ptr.To(10),
- },
- PodCliqueScalingGroup: configv1alpha1.PodCliqueScalingGroupControllerConfiguration{
- ConcurrentSyncs: ptr.To(3),
+ operatorConfig := configv1alpha1.OperatorConfiguration{
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKai}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKai)},
+ Controllers: configv1alpha1.ControllerConfiguration{
+ PodCliqueSet: configv1alpha1.PodCliqueSetControllerConfiguration{
+ ConcurrentSyncs: ptr.To(1),
+ },
+ PodClique: configv1alpha1.PodCliqueControllerConfiguration{
+ ConcurrentSyncs: ptr.To(1),
+ },
+ PodCliqueScalingGroup: configv1alpha1.PodCliqueScalingGroupControllerConfiguration{
+ ConcurrentSyncs: ptr.To(1),
+ },
},
}
- err = RegisterControllers(mgr, controllerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ err = RegisterControllers(mgr, &operatorConfig)
require.NoError(t, err)
})
}
diff --git a/operator/internal/controller/utils/managedresource.go b/operator/internal/controller/utils/managedresource.go
index ed93503d0..de9752342 100644
--- a/operator/internal/controller/utils/managedresource.go
+++ b/operator/internal/controller/utils/managedresource.go
@@ -20,6 +20,7 @@ import (
apicommon "github.com/ai-dynamo/grove/operator/api/common"
grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
"github.com/samber/lo"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -53,3 +54,12 @@ func IsManagedPodClique(obj client.Object, expectedOwnerKinds ...string) bool {
}, false)
return IsManagedByGrove(podClique.GetLabels()) && hasExpectedOwner
}
+
+// IsManagedPodGang checks if the PodGang is managed by Grove.
+func IsManagedPodGang(obj client.Object) bool {
+ podGang, ok := obj.(*groveschedulerv1alpha1.PodGang)
+ if !ok {
+ return false
+ }
+ return IsManagedByGrove(podGang.Labels)
+}
diff --git a/operator/internal/schedulerbackend/kaischeduler/backend.go b/operator/internal/schedulerbackend/kaischeduler/backend.go
new file mode 100644
index 000000000..adfc2feb3
--- /dev/null
+++ b/operator/internal/schedulerbackend/kaischeduler/backend.go
@@ -0,0 +1,83 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package kaischeduler
+
+import (
+ "context"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/client-go/tools/record"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// Backend implements the scheduler backend interface (SchedBackend in schedulerbackend package) for KAI scheduler.
+// TODO: Converts PodGang → PodGroup
+type Backend struct {
+ client client.Client
+ scheme *runtime.Scheme
+ name string
+ eventRecorder record.EventRecorder
+ profile configv1alpha1.SchedulerProfile
+}
+
+// New creates a new KAI backend instance. profile is the scheduler profile for kai-scheduler;
+// Backend uses profile.Name and may unmarshal profile.Config for kai-specific options.
+func New(cl client.Client, scheme *runtime.Scheme, eventRecorder record.EventRecorder, profile configv1alpha1.SchedulerProfile) *Backend {
+ return &Backend{
+ client: cl,
+ scheme: scheme,
+ name: "kai-scheduler",
+ eventRecorder: eventRecorder,
+ profile: profile,
+ }
+}
+
+// Name returns the pod-facing scheduler name (kai-scheduler), for lookup and logging.
+func (b *Backend) Name() string {
+ return b.name
+}
+
+// Init initializes the KAI backend
+func (b *Backend) Init() error {
+ return nil
+}
+
+// SyncPodGang converts PodGang to KAI PodGroup and synchronizes it
+func (b *Backend) SyncPodGang(_ context.Context, _ *groveschedulerv1alpha1.PodGang) error {
+ return nil
+}
+
+// OnPodGangDelete removes the PodGroup owned by this PodGang
+func (b *Backend) OnPodGangDelete(_ context.Context, _ *groveschedulerv1alpha1.PodGang) error {
+ return nil
+}
+
+// PreparePod adds KAI scheduler-specific configuration to the Pod.
+// Sets Pod.Spec.SchedulerName so the pod is scheduled by KAI.
+func (b *Backend) PreparePod(pod *corev1.Pod) {
+ pod.Spec.SchedulerName = b.Name()
+}
+
+// ValidatePodCliqueSet runs KAI-specific validations on the PodCliqueSet.
+func (b *Backend) ValidatePodCliqueSet(_ context.Context, _ *grovecorev1alpha1.PodCliqueSet) error {
+ return nil
+}
diff --git a/operator/internal/schedulerbackend/kaischeduler/backend_test.go b/operator/internal/schedulerbackend/kaischeduler/backend_test.go
new file mode 100644
index 000000000..3fbd11ce1
--- /dev/null
+++ b/operator/internal/schedulerbackend/kaischeduler/backend_test.go
@@ -0,0 +1,42 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package kaischeduler
+
+import (
+ "testing"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ testutils "github.com/ai-dynamo/grove/operator/test/utils"
+
+ "github.com/stretchr/testify/assert"
+ "k8s.io/client-go/tools/record"
+)
+
+func TestBackend_PreparePod(t *testing.T) {
+ cl := testutils.CreateDefaultFakeClient(nil)
+ recorder := record.NewFakeRecorder(10)
+ profile := configv1alpha1.SchedulerProfile{Name: configv1alpha1.SchedulerNameKai}
+ b := New(cl, cl.Scheme(), recorder, profile)
+
+ pod := testutils.NewPodBuilder("test-pod", "default").
+ WithSchedulerName("default-scheduler").
+ Build()
+
+ b.PreparePod(pod)
+
+ assert.Equal(t, "kai-scheduler", pod.Spec.SchedulerName)
+}
diff --git a/operator/internal/schedulerbackend/kube/backend.go b/operator/internal/schedulerbackend/kube/backend.go
new file mode 100644
index 000000000..2a678097d
--- /dev/null
+++ b/operator/internal/schedulerbackend/kube/backend.go
@@ -0,0 +1,91 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package kube
+
+import (
+ "context"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/client-go/tools/record"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// PodSchedulerName is the value set on Pod.Spec.SchedulerName for the Kubernetes default scheduler.
+const PodSchedulerName = "default-scheduler"
+
+// Backend implements the scheduler backend interface (SchedBackend in schedulerbackend package) for Kubernetes default scheduler.
+// This backend does minimal work - just sets the scheduler name on pods
+type Backend struct {
+ client client.Client
+ scheme *runtime.Scheme
+ name string
+ eventRecorder record.EventRecorder
+ profile configv1alpha1.SchedulerProfile
+}
+
+// New creates a new Kube backend instance. profile is the scheduler profile for default-scheduler;
+// Backend uses profile.Name and may unmarshal profile.Config into KubeSchedulerConfig.
+func New(cl client.Client, scheme *runtime.Scheme, eventRecorder record.EventRecorder, profile configv1alpha1.SchedulerProfile) *Backend {
+ return &Backend{
+ client: cl,
+ scheme: scheme,
+ name: "default-scheduler",
+ eventRecorder: eventRecorder,
+ profile: profile,
+ }
+}
+
+// Name returns the pod-facing scheduler name (default-scheduler), for lookup and logging.
+func (b *Backend) Name() string {
+ return b.name
+}
+
+// Init initializes the Kube backend
+// For Kube backend, no special initialization is needed
+func (b *Backend) Init() error {
+ return nil
+}
+
+// SyncPodGang synchronizes PodGang resources
+// For default kube scheduler, no additional resources are needed
+func (b *Backend) SyncPodGang(_ context.Context, _ *groveschedulerv1alpha1.PodGang) error {
+ // No-op: default kube scheduler doesn't need any custom resources
+ return nil
+}
+
+// OnPodGangDelete handles PodGang deletion
+// For default kube scheduler, no cleanup is needed
+func (b *Backend) OnPodGangDelete(_ context.Context, _ *groveschedulerv1alpha1.PodGang) error {
+ // No-op: default kube scheduler doesn't have any resources to clean up
+ return nil
+}
+
+// PreparePod adds Kubernetes default scheduler-specific configuration to the Pod.
+// Pod.Spec.SchedulerName is set to "default-scheduler" (the value expected by kube-apiserver / kube-scheduler).
+func (b *Backend) PreparePod(pod *corev1.Pod) {
+ pod.Spec.SchedulerName = b.name
+}
+
+// ValidatePodCliqueSet runs default-scheduler-specific validations on the PodCliqueSet.
+func (b *Backend) ValidatePodCliqueSet(_ context.Context, _ *grovecorev1alpha1.PodCliqueSet) error {
+ return nil
+}
diff --git a/operator/internal/schedulerbackend/kube/backend_test.go b/operator/internal/schedulerbackend/kube/backend_test.go
new file mode 100644
index 000000000..dbed8885b
--- /dev/null
+++ b/operator/internal/schedulerbackend/kube/backend_test.go
@@ -0,0 +1,40 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package kube
+
+import (
+ "testing"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ testutils "github.com/ai-dynamo/grove/operator/test/utils"
+
+ "github.com/stretchr/testify/assert"
+ "k8s.io/client-go/tools/record"
+)
+
+func TestBackend_PreparePod(t *testing.T) {
+ cl := testutils.CreateDefaultFakeClient(nil)
+ recorder := record.NewFakeRecorder(10)
+ profile := configv1alpha1.SchedulerProfile{Name: configv1alpha1.SchedulerNameKube}
+ b := New(cl, cl.Scheme(), recorder, profile)
+
+ pod := testutils.NewPodBuilder("test-pod", "default").Build()
+
+ b.PreparePod(pod)
+
+ assert.Equal(t, PodSchedulerName, pod.Spec.SchedulerName)
+}
diff --git a/operator/internal/schedulerbackend/manager.go b/operator/internal/schedulerbackend/manager.go
new file mode 100644
index 000000000..a42218067
--- /dev/null
+++ b/operator/internal/schedulerbackend/manager.go
@@ -0,0 +1,95 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package schedulerbackend
+
+import (
+ "fmt"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend/kaischeduler"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend/kube"
+
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/client-go/tools/record"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// Compile-time checks that backend implementations satisfy SchedBackend.
+var (
+ _ SchedBackend = (*kaischeduler.Backend)(nil)
+ _ SchedBackend = (*kube.Backend)(nil)
+)
+
+// newBackendForProfile creates and initializes a SchedBackend for the given profile.
+// Add new scheduler backends by extending this switch (no global registry).
+func newBackendForProfile(cl client.Client, scheme *runtime.Scheme, rec record.EventRecorder, p configv1alpha1.SchedulerProfile) (SchedBackend, error) {
+ switch p.Name {
+ case configv1alpha1.SchedulerNameKube:
+ b := kube.New(cl, scheme, rec, p)
+ if err := b.Init(); err != nil {
+ return nil, err
+ }
+ return b, nil
+ case configv1alpha1.SchedulerNameKai:
+ b := kaischeduler.New(cl, scheme, rec, p)
+ if err := b.Init(); err != nil {
+ return nil, err
+ }
+ return b, nil
+ default:
+ return nil, fmt.Errorf("scheduler profile %q is not supported", p.Name)
+ }
+}
+
+var (
+ backends map[string]SchedBackend
+ defaultBackend SchedBackend
+)
+
+// Initialize creates and registers backend instances for each profile in config.Profiles.
+// Defaults are applied to config so that kube-scheduler is always present; only backends
+// named in config.Profiles are started. Called once during operator startup before controllers start.
+func Initialize(client client.Client, scheme *runtime.Scheme, eventRecorder record.EventRecorder, cfg configv1alpha1.SchedulerConfiguration) error {
+ backends = make(map[string]SchedBackend)
+
+ // New and init each backend from cfg.Profiles (order follows config; duplicate name overwrites).
+ for _, p := range cfg.Profiles {
+ backend, err := newBackendForProfile(client, scheme, eventRecorder, p)
+ if err != nil {
+ return fmt.Errorf("failed to initialize %s backend: %w", p.Name, err)
+ }
+ backends[backend.Name()] = backend
+ if cfg.DefaultProfileName != "" && string(p.Name) == cfg.DefaultProfileName {
+ defaultBackend = backend
+ }
+ }
+ return nil
+}
+
+// Get returns the backend for the given name. Empty string is valid and returns the default backend (e.g. when Pod.Spec.SchedulerName is unset).
+// default-scheduler is always available; other backends return nil if not enabled via a profile.
+func Get(name string) SchedBackend {
+ if name == "" {
+ return defaultBackend
+ }
+ return backends[name]
+}
+
+// GetDefault returns the backend designated as default in OperatorConfiguration (scheduler.defaultProfileName).
+func GetDefault() SchedBackend {
+ return defaultBackend
+}
diff --git a/operator/internal/schedulerbackend/manager_test.go b/operator/internal/schedulerbackend/manager_test.go
new file mode 100644
index 000000000..85b3b51de
--- /dev/null
+++ b/operator/internal/schedulerbackend/manager_test.go
@@ -0,0 +1,89 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package schedulerbackend
+
+import (
+ "testing"
+
+ configv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
+ testutils "github.com/ai-dynamo/grove/operator/test/utils"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "k8s.io/client-go/tools/record"
+)
+
+// TestInitialize tests backend initialization with different schedulers.
+func TestInitialize(t *testing.T) {
+ tests := []struct {
+ name string
+ schedulerName configv1alpha1.SchedulerName
+ wantErr bool
+ errContains string
+ expectedName string
+ }{
+ {
+ name: "kai scheduler initialization",
+ schedulerName: configv1alpha1.SchedulerNameKai,
+ wantErr: false,
+ expectedName: "kai-scheduler",
+ },
+ {
+ name: "default scheduler initialization",
+ schedulerName: configv1alpha1.SchedulerNameKube,
+ wantErr: false,
+ expectedName: "default-scheduler", // kube backend's Name() is the pod-facing name
+ },
+ {
+ name: "unsupported scheduler",
+ schedulerName: "volcano",
+ wantErr: true,
+ errContains: "not supported",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ // Reset global state before each test
+ backends = nil
+ defaultBackend = nil
+
+ cl := testutils.CreateDefaultFakeClient(nil)
+ recorder := record.NewFakeRecorder(10)
+
+ cfg := configv1alpha1.SchedulerConfiguration{
+ Profiles: []configv1alpha1.SchedulerProfile{
+ {Name: tt.schedulerName},
+ },
+ DefaultProfileName: string(tt.schedulerName),
+ }
+ err := Initialize(cl, cl.Scheme(), recorder, cfg)
+
+ if tt.wantErr {
+ require.Error(t, err)
+ assert.Contains(t, err.Error(), tt.errContains)
+ assert.Nil(t, GetDefault())
+ } else {
+ require.NoError(t, err)
+ require.NotNil(t, GetDefault())
+ name := GetDefault().Name()
+ assert.Equal(t, tt.expectedName, name)
+ assert.Equal(t, GetDefault(), Get(name)) // backend is stored under its Name()
+ }
+ })
+ }
+}
diff --git a/operator/internal/schedulerbackend/types.go b/operator/internal/schedulerbackend/types.go
new file mode 100644
index 000000000..f23055766
--- /dev/null
+++ b/operator/internal/schedulerbackend/types.go
@@ -0,0 +1,56 @@
+// /*
+// Copyright 2025 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package schedulerbackend
+
+import (
+ "context"
+
+ grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ corev1 "k8s.io/api/core/v1"
+)
+
+// SchedBackend defines the interface that different scheduler backends must implement.
+// It is defined in this package (consumer side) so that kube and kaischeduler subpackages
+// need not import schedulerbackend, avoiding circular dependencies (see "accept interfaces,
+// return structs" and consumer-defined interfaces in Go / Kubernetes).
+//
+// Architecture: SchedBackend validates PodCliqueSet at admission, converts PodGang to scheduler-specific
+// CR (PodGroup/Workload/etc), and prepares Pods with scheduler-specific configurations.
+type SchedBackend interface {
+ // Name is a unique name of the scheduler backend.
+ Name() string
+
+ // Init provides a hook to initialize/setup one-time scheduler resources,
+ // called at the startup of grove operator.
+ Init() error
+
+ // SyncPodGang synchronizes (creates/updates) scheduler-specific resources for a PodGang
+ // reacting to a creation or update of a PodGang resource.
+ SyncPodGang(ctx context.Context, podGang *groveschedulerv1alpha1.PodGang) error
+
+ // OnPodGangDelete cleans up scheduler-specific resources for the given PodGang.
+ OnPodGangDelete(ctx context.Context, podGang *groveschedulerv1alpha1.PodGang) error
+
+ // PreparePod adds scheduler-backend-specific configuration to the given Pod object
+ // prior to its creation (schedulerName, annotations, etc.).
+ PreparePod(pod *corev1.Pod)
+
+ // ValidatePodCliqueSet runs scheduler-specific validations on the PodCliqueSet (e.g. TAS required but not supported).
+ ValidatePodCliqueSet(ctx context.Context, pcs *grovecorev1alpha1.PodCliqueSet) error
+}
diff --git a/operator/internal/webhook/admission/pcs/validation/handler.go b/operator/internal/webhook/admission/pcs/validation/handler.go
index b0c779e28..f5dc885c3 100644
--- a/operator/internal/webhook/admission/pcs/validation/handler.go
+++ b/operator/internal/webhook/admission/pcs/validation/handler.go
@@ -24,6 +24,7 @@ import (
"github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/grove/operator/internal/errors"
"github.com/ai-dynamo/grove/operator/internal/mnnvl"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
"github.com/go-logr/logr"
admissionv1 "k8s.io/api/admission/v1"
@@ -42,17 +43,21 @@ const (
// Handler is a handler for validating PodCliqueSet resources.
type Handler struct {
- logger logr.Logger
- tasConfig configv1alpha1.TopologyAwareSchedulingConfiguration
- networkConfig configv1alpha1.NetworkAcceleration
+ logger logr.Logger
+ tasConfig configv1alpha1.TopologyAwareSchedulingConfiguration
+ networkConfig configv1alpha1.NetworkAcceleration
+ schedulerConfig configv1alpha1.SchedulerConfiguration
}
// NewHandler creates a new handler for PodCliqueSet Webhook.
-func NewHandler(mgr manager.Manager, tasConfig configv1alpha1.TopologyAwareSchedulingConfiguration, networkConfig configv1alpha1.NetworkAcceleration) *Handler {
+// It reads TopologyAwareScheduling, Network, and Scheduler from the operator configuration.
+// operatorCfg must not be nil.
+func NewHandler(mgr manager.Manager, operatorCfg *configv1alpha1.OperatorConfiguration) *Handler {
return &Handler{
- logger: mgr.GetLogger().WithName("webhook").WithName(Name),
- tasConfig: tasConfig,
- networkConfig: networkConfig,
+ logger: mgr.GetLogger().WithName("webhook").WithName(Name),
+ tasConfig: operatorCfg.TopologyAwareScheduling,
+ networkConfig: operatorCfg.Network,
+ schedulerConfig: operatorCfg.Scheduler,
}
}
@@ -64,7 +69,7 @@ func (h *Handler) ValidateCreate(ctx context.Context, obj runtime.Object) (admis
return nil, errors.WrapError(err, ErrValidateCreatePodCliqueSet, string(admissionv1.Create), "failed to cast object to PodCliqueSet")
}
- v := newPCSValidator(pcs, admissionv1.Create, h.tasConfig)
+ v := newPCSValidator(pcs, admissionv1.Create, h.tasConfig, h.schedulerConfig)
var allErrs field.ErrorList
allErrs = append(allErrs, v.validateTopologyConstraintsOnCreate()...)
warnings, errs := v.validate()
@@ -73,6 +78,11 @@ func (h *Handler) ValidateCreate(ctx context.Context, obj runtime.Object) (admis
// Validate MNNVL annotation: reject if annotation="true" but feature is disabled
allErrs = append(allErrs, mnnvl.ValidateMetadataOnCreate(pcs, h.networkConfig.AutoMNNVLEnabled)...)
+ // Scheduler-backend-specific validation
+ if err := validatePodCliqueSetWithBackend(ctx, pcs); err != nil {
+ allErrs = append(allErrs, field.Invalid(field.NewPath("spec"), pcs.Spec, err.Error()))
+ }
+
return warnings, allErrs.ToAggregate()
}
@@ -88,12 +98,17 @@ func (h *Handler) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Obj
return nil, errors.WrapError(err, ErrValidateUpdatePodCliqueSet, string(admissionv1.Update), "failed to cast old object to PodCliqueSet")
}
- v := newPCSValidator(newPCS, admissionv1.Update, h.tasConfig)
+ v := newPCSValidator(newPCS, admissionv1.Update, h.tasConfig, h.schedulerConfig)
warnings, errs := v.validate()
// Validate MNNVL annotation immutability
errs = append(errs, mnnvl.ValidateMetadataOnUpdate(oldPCS, newPCS)...)
+ // Scheduler-backend-specific validation
+ if err := validatePodCliqueSetWithBackend(ctx, newPCS); err != nil {
+ errs = append(errs, field.Invalid(field.NewPath("spec"), newPCS.Spec, err.Error()))
+ }
+
if len(errs) > 0 {
return warnings, errs.ToAggregate()
}
@@ -105,6 +120,20 @@ func (h *Handler) ValidateDelete(_ context.Context, _ runtime.Object) (admission
return nil, nil
}
+// validatePodCliqueSetWithBackend resolves the scheduler backend for the PCS and runs backend-specific validation.
+// All cliques share the same (resolved) schedulerName after validateSchedulerNames, so we use the first clique; empty is resolved by Get("").
+func validatePodCliqueSetWithBackend(ctx context.Context, pcs *v1alpha1.PodCliqueSet) error {
+ schedulerName := ""
+ if len(pcs.Spec.Template.Cliques) > 0 && pcs.Spec.Template.Cliques[0] != nil {
+ schedulerName = pcs.Spec.Template.Cliques[0].Spec.PodSpec.SchedulerName
+ }
+ backend := schedulerbackend.Get(schedulerName)
+ if backend == nil {
+ return nil
+ }
+ return backend.ValidatePodCliqueSet(ctx, pcs)
+}
+
// castToPodCliqueSet attempts to cast a runtime.Object to a PodCliqueSet.
func castToPodCliqueSet(obj runtime.Object) (*v1alpha1.PodCliqueSet, error) {
pcs, ok := obj.(*v1alpha1.PodCliqueSet)
diff --git a/operator/internal/webhook/admission/pcs/validation/handler_mnnvl_test.go b/operator/internal/webhook/admission/pcs/validation/handler_mnnvl_test.go
index 318497528..d45f1fd33 100644
--- a/operator/internal/webhook/admission/pcs/validation/handler_mnnvl_test.go
+++ b/operator/internal/webhook/admission/pcs/validation/handler_mnnvl_test.go
@@ -84,7 +84,12 @@ func TestValidateCreate_MNNVL(t *testing.T) {
networkConfig := configv1alpha1.NetworkAcceleration{
AutoMNNVLEnabled: tt.autoMNNVLEnabled,
}
- handler := NewHandler(mgr, getDefaultTASConfig(), networkConfig)
+ cfg := configv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: networkConfig,
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
ctx := context.Background()
warnings, err := handler.ValidateCreate(ctx, tt.pcs)
@@ -163,7 +168,12 @@ func TestValidateUpdate_MNNVL(t *testing.T) {
}
// MNNVL validation on update doesn't depend on feature flag
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := configv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
ctx := context.Background()
warnings, err := handler.ValidateUpdate(ctx, tt.oldPCS, tt.newPCS)
@@ -245,7 +255,12 @@ func TestMNNVL_WebhookPipeline_LegacyPCSUpdate(t *testing.T) {
require.NoError(t, err, "defaulting webhook should not error on update")
// Step 2: Simulate the validating webhook running with oldPCS vs (possibly mutated) newPCS.
- validationHandler := NewHandler(mgr, getDefaultTASConfig(), networkConfig)
+ validationCfg := configv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: networkConfig,
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ validationHandler := NewHandler(mgr, &validationCfg)
ctx := context.Background()
warnings, err := validationHandler.ValidateUpdate(ctx, oldPCS, newPCS)
diff --git a/operator/internal/webhook/admission/pcs/validation/handler_test.go b/operator/internal/webhook/admission/pcs/validation/handler_test.go
index ccf15f11b..2b5b134e7 100644
--- a/operator/internal/webhook/admission/pcs/validation/handler_test.go
+++ b/operator/internal/webhook/admission/pcs/validation/handler_test.go
@@ -47,7 +47,12 @@ func TestNewHandler(t *testing.T) {
Logger: logr.Discard(),
}
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := groveconfigv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
require.NotNil(t, handler)
assert.NotNil(t, handler.logger)
}
@@ -113,7 +118,12 @@ func TestValidateCreate(t *testing.T) {
Logger: logr.Discard(),
}
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := groveconfigv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
ctx := context.Background()
warnings, err := handler.ValidateCreate(ctx, tt.obj)
@@ -244,7 +254,12 @@ func TestValidateUpdate(t *testing.T) {
Logger: logr.Discard(),
}
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := groveconfigv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
ctx := context.Background()
warnings, err := handler.ValidateUpdate(ctx, tt.newObj, tt.oldObj)
@@ -271,7 +286,12 @@ func TestValidateDelete(t *testing.T) {
Logger: logr.Discard(),
}
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := groveconfigv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
// Deletion validation always succeeds
ctx := context.Background()
@@ -382,7 +402,12 @@ func TestLogValidatorFunctionInvocation(t *testing.T) {
Logger: logr.Discard(),
}
- handler := NewHandler(mgr, getDefaultTASConfig(), getDefaultNetworkConfig())
+ cfg := groveconfigv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: getDefaultTASConfig(),
+ Network: getDefaultNetworkConfig(),
+ Scheduler: groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
// This function doesn't return an error, but we can verify it doesn't panic
assert.NotPanics(t, func() {
diff --git a/operator/internal/webhook/admission/pcs/validation/podcliqueset.go b/operator/internal/webhook/admission/pcs/validation/podcliqueset.go
index 303dccac1..ac22e00f2 100644
--- a/operator/internal/webhook/admission/pcs/validation/podcliqueset.go
+++ b/operator/internal/webhook/admission/pcs/validation/podcliqueset.go
@@ -23,6 +23,7 @@ import (
groveconfigv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
"github.com/ai-dynamo/grove/operator/internal/utils"
"github.com/samber/lo"
@@ -46,10 +47,13 @@ type pcsValidator struct {
pcs *grovecorev1alpha1.PodCliqueSet
tasEnabled bool
clusterTopologyDomains []string
+ schedulerConfig groveconfigv1alpha1.SchedulerConfiguration
}
// newPCSValidator creates a new PodCliqueSet validator for the given operation.
-func newPCSValidator(pcs *grovecorev1alpha1.PodCliqueSet, operation admissionv1.Operation, tasConfig groveconfigv1alpha1.TopologyAwareSchedulingConfiguration) *pcsValidator {
+// schedulerConfig is the full scheduler configuration; the validator uses it for
+// scheduler-name matching and may use per-scheduler config for future validations.
+func newPCSValidator(pcs *grovecorev1alpha1.PodCliqueSet, operation admissionv1.Operation, tasConfig groveconfigv1alpha1.TopologyAwareSchedulingConfiguration, schedulerConfig groveconfigv1alpha1.SchedulerConfiguration) *pcsValidator {
topologyDomains := lo.Map(tasConfig.Levels, func(level grovecorev1alpha1.TopologyLevel, _ int) string {
return string(level.Domain)
})
@@ -58,6 +62,7 @@ func newPCSValidator(pcs *grovecorev1alpha1.PodCliqueSet, operation admissionv1.
pcs: pcs,
tasEnabled: tasConfig.Enabled,
clusterTopologyDomains: topologyDomains,
+ schedulerConfig: schedulerConfig,
}
}
@@ -138,21 +143,50 @@ func (v *pcsValidator) validatePodCliqueTemplates(fldPath *field.Path) ([]string
allErrs = append(allErrs, sliceMustHaveUniqueElements(cliqueNames, fldPath.Child("name"), "cliqueTemplateSpec names must be unique")...)
allErrs = append(allErrs, sliceMustHaveUniqueElements(cliqueRoles, fldPath.Child("roleName"), "cliqueTemplateSpec.Spec roleNames must be unique")...)
+ allErrs = append(allErrs, v.validateSchedulerNames(schedulerNames, fldPath)...)
+
+ if v.isStartupTypeExplicit() {
+ allErrs = append(allErrs, validateCliqueDependencies(cliqueTemplateSpecs, fldPath)...)
+ }
+
+ return warnings, allErrs
+}
+
+// validateSchedulerNames ensures all pod scheduler names resolve to the same scheduler and that scheduler is enabled.
+// Empty schedulerName is resolved to the default backend name from schedulerbackend.GetDefault().
+func (v *pcsValidator) validateSchedulerNames(schedulerNames []string, fldPath *field.Path) field.ErrorList {
+ allErrs := field.ErrorList{}
+ specPath := fldPath.Child("spec").Child("podSpec").Child("schedulerName")
+
+ defaultSchedulerName := "default-scheduler"
+ if def := schedulerbackend.GetDefault(); def != nil {
+ defaultSchedulerName = def.Name()
+ }
+
+ // Resolve empty to default backend name; then require all resolved names to be the same.
uniqueSchedulerNames := lo.Uniq(lo.Map(schedulerNames, func(item string, _ int) string {
if item == "" {
- return "default-scheduler"
+ return defaultSchedulerName
}
return item
}))
if len(uniqueSchedulerNames) > 1 {
- allErrs = append(allErrs, field.Invalid(fldPath.Child("spec").Child("podSpec").Child("schedulerName"), uniqueSchedulerNames[0], "the schedulerName for all pods have to be the same"))
+ allErrs = append(allErrs, field.Invalid(specPath, strings.Join(uniqueSchedulerNames, ", "), "the schedulerName for all pods have to be the same"))
}
- if v.isStartupTypeExplicit() {
- allErrs = append(allErrs, validateCliqueDependencies(cliqueTemplateSpecs, fldPath)...)
+ // Validate that the resolved scheduler is enabled.
+ pcsSchedulerName := ""
+ if len(uniqueSchedulerNames) > 0 && uniqueSchedulerNames[0] != "" {
+ pcsSchedulerName = uniqueSchedulerNames[0]
}
-
- return warnings, allErrs
+ if pcsSchedulerName != "default-scheduler" && schedulerbackend.Get(pcsSchedulerName) == nil {
+ allErrs = append(allErrs, field.Invalid(
+ specPath,
+ pcsSchedulerName,
+ "schedulerName must be an enabled scheduler backend; this scheduler is not enabled in OperatorConfiguration",
+ ))
+ }
+ return allErrs
}
// validatePodCliqueNameConstraints validates that PodClique names meet DNS subdomain requirements and pod naming constraints.
@@ -543,6 +577,7 @@ func (v *pcsValidator) validatePodCliqueUpdate(oldCliques []*grovecorev1alpha1.P
allErrs = append(allErrs, apivalidation.ValidateImmutableField(newClique.Spec.RoleName, oldIndexCliqueTuple.B.Spec.RoleName, cliqueFldPath.Child("roleName"))...)
allErrs = append(allErrs, apivalidation.ValidateImmutableField(newClique.Spec.MinAvailable, oldIndexCliqueTuple.B.Spec.MinAvailable, cliqueFldPath.Child("minAvailable"))...)
allErrs = append(allErrs, apivalidation.ValidateImmutableField(newClique.Spec.StartsAfter, oldIndexCliqueTuple.B.Spec.StartsAfter, cliqueFldPath.Child("startsAfter"))...)
+ allErrs = append(allErrs, apivalidation.ValidateImmutableField(newClique.Spec.PodSpec.SchedulerName, oldIndexCliqueTuple.B.Spec.PodSpec.SchedulerName, cliqueFldPath.Child("podSpec", "schedulerName"))...)
}
return allErrs
diff --git a/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go b/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go
index d1e62fe2d..022e5bee9 100644
--- a/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go
+++ b/operator/internal/webhook/admission/pcs/validation/podcliqueset_test.go
@@ -18,17 +18,22 @@ package validation
import (
"fmt"
+ "strings"
"testing"
"time"
groveconfigv1alpha1 "github.com/ai-dynamo/grove/operator/api/config/v1alpha1"
grovecorev1alpha1 "github.com/ai-dynamo/grove/operator/api/core/v1alpha1"
+ "github.com/ai-dynamo/grove/operator/internal/schedulerbackend"
testutils "github.com/ai-dynamo/grove/operator/test/utils"
+ "github.com/samber/lo"
"github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
admissionv1 "k8s.io/api/admission/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/validation/field"
+ "k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
)
@@ -133,7 +138,7 @@ func TestResourceNamingValidation(t *testing.T) {
pcs := pcsBuilder.Build()
- validator := newPCSValidator(pcs, admissionv1.Create, defaultTASConfig())
+ validator := newPCSValidator(pcs, admissionv1.Create, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)})
warnings, errs := validator.validate()
if tc.errorMatchers != nil {
@@ -147,6 +152,178 @@ func TestResourceNamingValidation(t *testing.T) {
}
}
+func TestValidateSchedulerNames(t *testing.T) {
+ specPath := field.NewPath("cliques").Child("spec").Child("podSpec").Child("schedulerName")
+ cl := testutils.CreateDefaultFakeClient(nil)
+ recorder := record.NewFakeRecorder(10)
+
+ tests := []struct {
+ name string
+ schedulerConfig groveconfigv1alpha1.SchedulerConfiguration
+ schedulerNames []string
+ expectErrors int
+ expectInvalidSame bool
+ expectInvalidEnabled bool
+ }{
+ {
+ name: "all same default-scheduler (kube default)",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"default-scheduler", "default-scheduler"},
+ expectErrors: 0,
+ },
+ {
+ name: "all empty with default default-scheduler",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"", ""},
+ expectErrors: 0,
+ },
+ {
+ name: "all empty with default kai-scheduler (kai default)",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKai),
+ },
+ schedulerNames: []string{"", ""},
+ expectErrors: 0,
+ },
+ {
+ name: "mixed empty and default-scheduler",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"", "default-scheduler"},
+ expectErrors: 0,
+ },
+ {
+ name: "mixed default-scheduler and kai-scheduler",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"default-scheduler", "kai-scheduler"},
+ expectErrors: 1,
+ expectInvalidSame: true,
+ expectInvalidEnabled: false,
+ },
+ {
+ name: "single kai-scheduler when enabled (kube+kai)",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"kai-scheduler"},
+ expectErrors: 0,
+ },
+ {
+ name: "single kube-scheduler when enabled (kube only)",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}},
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"kai-scheduler"},
+ expectErrors: 1,
+ expectInvalidSame: false,
+ expectInvalidEnabled: true,
+ },
+ {
+ name: "unknown scheduler not enabled",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"volcano"},
+ expectErrors: 1,
+ expectInvalidSame: false,
+ expectInvalidEnabled: true,
+ },
+ {
+ name: "no cliques (empty list)",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}},
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{},
+ expectErrors: 0,
+ },
+ {
+ name: "mixed empty and kai when default is default-scheduler",
+ schedulerConfig: groveconfigv1alpha1.SchedulerConfiguration{
+ Profiles: []groveconfigv1alpha1.SchedulerProfile{
+ {Name: groveconfigv1alpha1.SchedulerNameKube},
+ {Name: groveconfigv1alpha1.SchedulerNameKai},
+ },
+ DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube),
+ },
+ schedulerNames: []string{"", "kai-scheduler"},
+ expectErrors: 1,
+ expectInvalidSame: true,
+ expectInvalidEnabled: false,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ err := schedulerbackend.Initialize(cl, cl.Scheme(), recorder, tt.schedulerConfig)
+ require.NoError(t, err)
+
+ pcsBuilder := testutils.NewPodCliqueSetBuilder("test", "default", uuid.NewUUID()).
+ WithReplicas(1).
+ WithTerminationDelay(4 * time.Hour).
+ WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder))
+ for i := 0; i < len(tt.schedulerNames); i++ {
+ clique := createDummyPodCliqueTemplate(fmt.Sprintf("c%d", i))
+ clique.Spec.PodSpec.SchedulerName = tt.schedulerNames[i]
+ pcsBuilder = pcsBuilder.WithPodCliqueTemplateSpec(clique)
+ }
+ pcs := pcsBuilder.Build()
+ validator := newPCSValidator(pcs, admissionv1.Create, defaultTASConfig(), tt.schedulerConfig)
+ fldPath := field.NewPath("cliques")
+ errs := validator.validateSchedulerNames(tt.schedulerNames, fldPath)
+
+ assert.Len(t, errs, tt.expectErrors, "validation errors: %v", errs)
+ if tt.expectErrors > 0 {
+ msgs := lo.Map(errs, func(e *field.Error, _ int) string { return e.ErrorBody() })
+ if tt.expectInvalidSame {
+ assert.Contains(t, strings.Join(msgs, " "), "have to be the same")
+ }
+ if tt.expectInvalidEnabled {
+ assert.Contains(t, strings.Join(msgs, " "), "not enabled")
+ }
+ }
+ for _, e := range errs {
+ assert.Equal(t, specPath.String(), e.Field, "error field path")
+ }
+ })
+ }
+}
+
func TestPodCliqueScalingGroupConfigValidation(t *testing.T) {
testCases := []struct {
description string
@@ -263,7 +440,7 @@ func TestPodCliqueScalingGroupConfigValidation(t *testing.T) {
// Add scaling groups
pcs.Spec.Template.PodCliqueScalingGroupConfigs = tc.scalingGroups
- validator := newPCSValidator(pcs, admissionv1.Create, defaultTASConfig())
+ validator := newPCSValidator(pcs, admissionv1.Create, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)})
warnings, errs := validator.validate()
if tc.errorMatchers != nil {
@@ -386,7 +563,7 @@ func TestPodCliqueUpdateValidation(t *testing.T) {
newPCS.Spec.Template.Cliques = tc.newCliques
// Create validator and validate update
- validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig())
+ validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)})
fldPath := field.NewPath("spec").Child("template").Child("cliques")
validationErrors := validator.validatePodCliqueUpdate(oldPCS.Spec.Template.Cliques, fldPath)
@@ -497,6 +674,21 @@ func TestImmutableFieldsValidation(t *testing.T) {
expectError: true,
expectedErrMsg: "field is immutable",
},
+ {
+ name: "Invalid: schedulerName is immutable",
+ setupOldPCS: func() *grovecorev1alpha1.PodCliqueSet {
+ pcs := createTestPodCliqueSet("test")
+ pcs.Spec.Template.Cliques[0].Spec.PodSpec.SchedulerName = ""
+ return pcs
+ },
+ setupNewPCS: func() *grovecorev1alpha1.PodCliqueSet {
+ pcs := createTestPodCliqueSet("test")
+ pcs.Spec.Template.Cliques[0].Spec.PodSpec.SchedulerName = "default-scheduler"
+ return pcs
+ },
+ expectError: true,
+ expectedErrMsg: "field is immutable",
+ },
}
for _, tc := range testCases {
@@ -504,7 +696,7 @@ func TestImmutableFieldsValidation(t *testing.T) {
oldPCS := tc.setupOldPCS()
newPCS := tc.setupNewPCS()
- validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig())
+ validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)})
err := validator.validateUpdate(oldPCS)
if tc.expectError {
@@ -674,7 +866,7 @@ func TestPodCliqueScalingGroupConfigsUpdateValidation(t *testing.T) {
newPCS.Spec.Template.PodCliqueScalingGroupConfigs = tc.newConfigs
// Create validator and validate update
- validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig())
+ validator := newPCSValidator(newPCS, admissionv1.Update, defaultTASConfig(), groveconfigv1alpha1.SchedulerConfiguration{Profiles: []groveconfigv1alpha1.SchedulerProfile{{Name: groveconfigv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(groveconfigv1alpha1.SchedulerNameKube)})
fldPath := field.NewPath("spec", "template", "podCliqueScalingGroupConfigs")
validationErrors := validator.validatePodCliqueScalingGroupConfigsUpdate(tc.oldConfigs, fldPath)
diff --git a/operator/internal/webhook/admission/pcs/validation/register_test.go b/operator/internal/webhook/admission/pcs/validation/register_test.go
index 7926b8874..9a52dd063 100644
--- a/operator/internal/webhook/admission/pcs/validation/register_test.go
+++ b/operator/internal/webhook/admission/pcs/validation/register_test.go
@@ -42,7 +42,12 @@ func TestRegisterWithManager(t *testing.T) {
})
mgr.WebhookServer = server
- handler := NewHandler(mgr, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ cfg := configv1alpha1.OperatorConfiguration{
+ TopologyAwareScheduling: configv1alpha1.TopologyAwareSchedulingConfiguration{},
+ Network: configv1alpha1.NetworkAcceleration{},
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ handler := NewHandler(mgr, &cfg)
err := handler.RegisterWithManager(mgr)
require.NoError(t, err)
}
diff --git a/operator/internal/webhook/register.go b/operator/internal/webhook/register.go
index 9cf378160..a01da791b 100644
--- a/operator/internal/webhook/register.go
+++ b/operator/internal/webhook/register.go
@@ -31,18 +31,21 @@ import (
)
// Register registers the webhooks with the controller manager.
-func Register(mgr manager.Manager, authorizerConfig configv1alpha1.AuthorizerConfig, tasConfig configv1alpha1.TopologyAwareSchedulingConfiguration, networkConfig configv1alpha1.NetworkAcceleration) error {
- defaultingWebhook := defaulting.NewHandler(mgr, networkConfig)
+func Register(mgr manager.Manager, operatorCfg *configv1alpha1.OperatorConfiguration) error {
+ if operatorCfg == nil {
+ return fmt.Errorf("operator configuration must not be nil")
+ }
+ defaultingWebhook := defaulting.NewHandler(mgr, operatorCfg.Network)
slog.Info("Registering webhook with manager", "handler", defaulting.Name)
if err := defaultingWebhook.RegisterWithManager(mgr); err != nil {
return fmt.Errorf("failed adding %s webhook handler: %v", defaulting.Name, err)
}
- pcsValidatingWebhook := pcsvalidation.NewHandler(mgr, tasConfig, networkConfig)
+ pcsValidatingWebhook := pcsvalidation.NewHandler(mgr, operatorCfg)
slog.Info("Registering webhook with manager", "handler", pcsvalidation.Name)
if err := pcsValidatingWebhook.RegisterWithManager(mgr); err != nil {
return fmt.Errorf("failed adding %s webhook handler: %v", pcsvalidation.Name, err)
}
- if authorizerConfig.Enabled {
+ if operatorCfg.Authorizer.Enabled {
serviceAccountName, ok := os.LookupEnv(constants.EnvVarServiceAccountName)
if !ok {
return fmt.Errorf("can not register authorizer webhook with no \"%s\" environment vairable", constants.EnvVarServiceAccountName)
@@ -52,7 +55,7 @@ func Register(mgr manager.Manager, authorizerConfig configv1alpha1.AuthorizerCon
return fmt.Errorf("error reading namespace file with error: %w", err)
}
reconcilerServiceAccountUserName := generateReconcilerServiceAccountUsername(string(namespace), serviceAccountName)
- authorizerWebhook := authorization.NewHandler(mgr, authorizerConfig, reconcilerServiceAccountUserName)
+ authorizerWebhook := authorization.NewHandler(mgr, operatorCfg.Authorizer, reconcilerServiceAccountUserName)
slog.Info("Registering webhook with manager", "handler", authorization.Name)
if err := authorizerWebhook.RegisterWithManager(mgr); err != nil {
return fmt.Errorf("failed adding %s webhook handler: %v", authorization.Name, err)
diff --git a/operator/internal/webhook/register_test.go b/operator/internal/webhook/register_test.go
index 9c560e507..c3b8f813b 100644
--- a/operator/internal/webhook/register_test.go
+++ b/operator/internal/webhook/register_test.go
@@ -91,7 +91,13 @@ func TestRegisterWebhooks_WithoutAuthorizer(t *testing.T) {
Enabled: false,
}
- err := Register(mgr, authorizerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ operatorCfg := configv1alpha1.OperatorConfiguration{
+ Authorizer: authorizerConfig,
+ TopologyAwareScheduling: configv1alpha1.TopologyAwareSchedulingConfiguration{},
+ Network: configv1alpha1.NetworkAcceleration{},
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ err := Register(mgr, &operatorCfg)
require.NoError(t, err)
}
@@ -120,7 +126,13 @@ func TestRegisterWebhooks_WithAuthorizerMissingEnvVar(t *testing.T) {
Enabled: true,
}
- err = Register(mgr, authorizerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ operatorCfg := configv1alpha1.OperatorConfiguration{
+ Authorizer: authorizerConfig,
+ TopologyAwareScheduling: configv1alpha1.TopologyAwareSchedulingConfiguration{},
+ Network: configv1alpha1.NetworkAcceleration{},
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ err = Register(mgr, &operatorCfg)
require.Error(t, err)
assert.Contains(t, err.Error(), constants.EnvVarServiceAccountName)
}
@@ -149,7 +161,13 @@ func TestRegisterWebhooks_WithAuthorizerMissingNamespaceFile(t *testing.T) {
Enabled: true,
}
- err := Register(mgr, authorizerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ operatorCfg := configv1alpha1.OperatorConfiguration{
+ Authorizer: authorizerConfig,
+ TopologyAwareScheduling: configv1alpha1.TopologyAwareSchedulingConfiguration{},
+ Network: configv1alpha1.NetworkAcceleration{},
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ err := Register(mgr, &operatorCfg)
require.Error(t, err)
assert.Contains(t, err.Error(), "error reading namespace file")
}
@@ -194,7 +212,13 @@ func TestRegisterWebhooks_WithAuthorizerSuccess(t *testing.T) {
Enabled: true,
}
- err = Register(mgr, authorizerConfig, configv1alpha1.TopologyAwareSchedulingConfiguration{}, configv1alpha1.NetworkAcceleration{})
+ operatorCfg := configv1alpha1.OperatorConfiguration{
+ Authorizer: authorizerConfig,
+ TopologyAwareScheduling: configv1alpha1.TopologyAwareSchedulingConfiguration{},
+ Network: configv1alpha1.NetworkAcceleration{},
+ Scheduler: configv1alpha1.SchedulerConfiguration{Profiles: []configv1alpha1.SchedulerProfile{{Name: configv1alpha1.SchedulerNameKube}}, DefaultProfileName: string(configv1alpha1.SchedulerNameKube)},
+ }
+ err = Register(mgr, &operatorCfg)
// Will error because it tries to read the hardcoded namespace file path
require.Error(t, err)
}
diff --git a/operator/skaffold.yaml b/operator/skaffold.yaml
index 52f760410..c8c309a7f 100644
--- a/operator/skaffold.yaml
+++ b/operator/skaffold.yaml
@@ -65,6 +65,10 @@ profiles:
value:
replicaCount: 1
config:
+ scheduler:
+ defaultProfileName: kai-scheduler
+ profiles:
+ - name: kai-scheduler
leaderElection:
enabled: false
topologyAwareScheduling:
diff --git a/operator/test/utils/client.go b/operator/test/utils/client.go
index b7f4d07d4..37afbb61c 100644
--- a/operator/test/utils/client.go
+++ b/operator/test/utils/client.go
@@ -138,6 +138,14 @@ func (b *TestClientBuilder) WithObjects(objects ...client.Object) *TestClientBui
return b
}
+// WithStatusSubresource registers types that have status subresources so that Status().Patch() works with the fake client.
+func (b *TestClientBuilder) WithStatusSubresource(objs ...client.Object) *TestClientBuilder {
+ if len(objs) > 0 {
+ b.delegatingClientBuilder.WithStatusSubresource(objs...)
+ }
+ return b
+}
+
// RecordErrorForObjects records an error for a specific client.Client method and object keys.
func (b *TestClientBuilder) RecordErrorForObjects(method ClientMethod, err *apierrors.StatusError, objectKeys ...client.ObjectKey) *TestClientBuilder {
// this method records error, so if nil error is passed then there is no need to create any error record.
diff --git a/operator/test/utils/pod.go b/operator/test/utils/pod.go
index d94159fca..453607b43 100644
--- a/operator/test/utils/pod.go
+++ b/operator/test/utils/pod.go
@@ -81,6 +81,12 @@ func (b *PodBuilder) WithOwner(ownerName string) *PodBuilder {
return b
}
+// WithSchedulerName sets the scheduler name on the Pod spec.
+func (b *PodBuilder) WithSchedulerName(name string) *PodBuilder {
+ b.pod.Spec.SchedulerName = name
+ return b
+}
+
// WithLabels adds labels to the Pod.
func (b *PodBuilder) WithLabels(labels map[string]string) *PodBuilder {
if b.pod.Labels == nil {
diff --git a/operator/test/utils/podgang.go b/operator/test/utils/podgang.go
new file mode 100644
index 000000000..075b25942
--- /dev/null
+++ b/operator/test/utils/podgang.go
@@ -0,0 +1,85 @@
+// /*
+// Copyright 2026 The Grove Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// */
+
+package utils
+
+import (
+ apicommon "github.com/ai-dynamo/grove/operator/api/common"
+
+ groveschedulerv1alpha1 "github.com/ai-dynamo/grove/scheduler/api/core/v1alpha1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// PodGangBuilder is a builder for PodGang objects (scheduler API).
+type PodGangBuilder struct {
+ pg *groveschedulerv1alpha1.PodGang
+}
+
+// NewPodGangBuilder creates a new PodGangBuilder.
+func NewPodGangBuilder(name, namespace string) *PodGangBuilder {
+ return &PodGangBuilder{
+ pg: createEmptyPodGang(name, namespace),
+ }
+}
+
+// WithGeneration sets the Generation on the PodGang.
+func (b *PodGangBuilder) WithGeneration(generation int64) *PodGangBuilder {
+ b.pg.SetGeneration(generation)
+ return b
+}
+
+// WithManaged sets the managed-by label so the PodGang is considered operator-managed.
+func (b *PodGangBuilder) WithManaged(managed bool) *PodGangBuilder {
+ if b.pg.Labels == nil {
+ b.pg.Labels = make(map[string]string)
+ }
+ if managed {
+ b.pg.Labels[apicommon.LabelManagedByKey] = apicommon.LabelManagedByValue
+ } else {
+ delete(b.pg.Labels, apicommon.LabelManagedByKey)
+ }
+ return b
+}
+
+// WithPodGroups sets the Spec.PodGroups slice.
+func (b *PodGangBuilder) WithPodGroups(groups []groveschedulerv1alpha1.PodGroup) *PodGangBuilder {
+ b.pg.Spec.PodGroups = groups
+ return b
+}
+
+// WithPodGroup adds a single PodGroup (convenience for tests that need one group).
+func (b *PodGangBuilder) WithPodGroup(name string, minReplicas int32) *PodGangBuilder {
+ b.pg.Spec.PodGroups = append(b.pg.Spec.PodGroups, groveschedulerv1alpha1.PodGroup{
+ Name: name,
+ MinReplicas: minReplicas,
+ })
+ return b
+}
+
+// Build returns the PodGang.
+func (b *PodGangBuilder) Build() *groveschedulerv1alpha1.PodGang {
+ return b.pg
+}
+
+func createEmptyPodGang(name, namespace string) *groveschedulerv1alpha1.PodGang {
+ return &groveschedulerv1alpha1.PodGang{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ },
+ Spec: groveschedulerv1alpha1.PodGangSpec{},
+ }
+}
diff --git a/scheduler/api/core/v1alpha1/podgang.go b/scheduler/api/core/v1alpha1/podgang.go
index dae9804a8..914f17112 100644
--- a/scheduler/api/core/v1alpha1/podgang.go
+++ b/scheduler/api/core/v1alpha1/podgang.go
@@ -157,6 +157,9 @@ const (
PodGangConditionTypeScheduled PodGangConditionType = "Scheduled"
// PodGangConditionTypeReady indicates that all the constituent PodGroups are Ready.
PodGangConditionTypeReady PodGangConditionType = "Ready"
+ // PodGangConditionTypeInitialized indicates that all Pods have been created and PodGang has been populated with pod references.
+ // This condition is set to True after all pods are created, signaling that scheduling gates can be removed.
+ PodGangConditionTypeInitialized PodGangConditionType = "Initialized"
// PodGangConditionTypeUnhealthy indicates that the PodGang is unhealthy. It is now a candidate for gang termination.
// If this condition is true for at least PodGangSpec.TerminationDelay duration, then the PodGang will be terminated.
PodGangConditionTypeUnhealthy PodGangConditionType = "Unhealthy"