Skip to content

Commit 7d78fda

Browse files
fix: autodetect Volcano installation (#3956)
Signed-off-by: Julien Mancuso <[email protected]>
1 parent ce576c0 commit 7d78fda

File tree

3 files changed

+38
-10
lines changed

3 files changed

+38
-10
lines changed

deploy/cloud/operator/cmd/main.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,9 @@ func main() {
295295
mgrOpts.Cache.DefaultNamespaces = map[string]cache.Config{
296296
restrictedNamespace: {},
297297
}
298+
setupLog.Info("Restricted namespace configured, launching in restricted mode", "namespace", restrictedNamespace)
299+
} else {
300+
setupLog.Info("No restricted namespace configured, launching in cluster-wide mode")
298301
}
299302
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), mgrOpts)
300303
if err != nil {
@@ -308,13 +311,22 @@ func main() {
308311
ctrlConfig.Grove.Enabled = groveEnabled
309312
setupLog.Info("Detecting LWS availability...")
310313
lwsEnabled := commonController.DetectLWSAvailability(mainCtx, mgr)
311-
ctrlConfig.LWS.Enabled = lwsEnabled
312-
314+
setupLog.Info("Detecting Volcano availability...")
315+
volcanoEnabled := commonController.DetectVolcanoAvailability(mainCtx, mgr)
316+
// LWS for multinode deployment usage depends on both LWS and Volcano availability
317+
ctrlConfig.LWS.Enabled = lwsEnabled && volcanoEnabled
313318
// Detect Kai-scheduler availability using discovery client
314319
setupLog.Info("Detecting Kai-scheduler availability...")
315320
kaiSchedulerEnabled := commonController.DetectKaiSchedulerAvailability(mainCtx, mgr)
316321
ctrlConfig.KaiScheduler.Enabled = kaiSchedulerEnabled
317322

323+
setupLog.Info("Detected orchestrators availability",
324+
"grove", groveEnabled,
325+
"lws", lwsEnabled,
326+
"volcano", volcanoEnabled,
327+
"kai-scheduler", kaiSchedulerEnabled,
328+
)
329+
318330
// Create etcd client
319331
cli, err := clientv3.New(clientv3.Config{
320332
Endpoints: []string{etcdAddr},

deploy/cloud/operator/internal/controller_common/predicate.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ func DetectLWSAvailability(ctx context.Context, mgr ctrl.Manager) bool {
101101
return detectAPIGroupAvailability(ctx, mgr, "leaderworkerset.x-k8s.io")
102102
}
103103

104+
// detectVolcanoAvailability checks if Volcano is available by checking if the Volcano API group is registered
105+
// This approach uses the discovery client which is simpler and more reliable
106+
func DetectVolcanoAvailability(ctx context.Context, mgr ctrl.Manager) bool {
107+
return detectAPIGroupAvailability(ctx, mgr, "scheduling.volcano.sh")
108+
}
109+
104110
// DetectKaiSchedulerAvailability checks if Kai-scheduler is available by checking if the scheduling.run.ai API group is registered
105111
// This approach uses the discovery client which is simpler and more reliable
106112
func DetectKaiSchedulerAvailability(ctx context.Context, mgr ctrl.Manager) bool {

docs/kubernetes/installation_guide.md

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,24 @@ Found existing namespace-restricted Dynamo operators in namespaces: ...
131131
```
132132

133133
> [!TIP]
134-
> For multinode deployments, you need to enable Grove and KAI Scheduler.
135-
> You might chose to install them manually or through the dynamo-platform helm install command.
136-
> When using the dynamo-platform helm install command, Grove and KAI Scheduler are NOT installed by default. You can enable their installation by setting the following flags in the helm install command:
137-
138-
```bash
139-
--set "grove.enabled=true"
140-
--set "kai-scheduler.enabled=true"
141-
```
134+
> For multinode deployments, you need to install multinode orchestration components:
135+
>
136+
> **Option 1 (Recommended): Grove + KAI Scheduler**
137+
> - Grove and KAI Scheduler can be installed manually or through the dynamo-platform helm install command.
138+
> - When using the dynamo-platform helm install command, Grove and KAI Scheduler are NOT installed by default. You can enable their installation by setting the following flags:
139+
>
140+
> ```bash
141+
> --set "grove.enabled=true"
142+
> --set "kai-scheduler.enabled=true"
143+
> ```
144+
>
145+
> **Option 2: LeaderWorkerSet (LWS) + Volcano**
146+
> - If using LWS for multinode deployments, you must also install Volcano (required dependency):
147+
> - [LWS Installation](https://github.com/kubernetes-sigs/lws#installation)
148+
> - [Volcano Installation](https://volcano.sh/en/docs/installation/) (required for gang scheduling with LWS)
149+
> - These must be installed manually before deploying multinode workloads with LWS.
150+
>
151+
> See the [Multinode Deployment Guide](./deployment/multinode-deployment.md) for details on orchestrator selection.
142152
143153
> [!TIP]
144154
> By default, Model Express Server is not used.

0 commit comments

Comments
 (0)