Skip to content

Commit b580718

Browse files
authored
Merge pull request #440 from nebius/dev
Soperator release 1.18.0
2 parents 9b33f35 + 1b65493 commit b580718

File tree

151 files changed

+4604
-1434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

151 files changed

+4604
-1434
lines changed

.github/dependabot.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ updates:
1919
target-branch: "dev"
2020

2121
- package-ecosystem: gomod
22-
directory: /images/jail/gpubench
22+
directory: /images/worker/gpubench
2323
schedule:
2424
interval: daily
2525
target-branch: "dev"

.github/workflows/gpubench_only.yml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Build gpubench only
33
on:
44
push:
55
paths:
6-
- 'images/jail/gpubench/**'
6+
- 'images/worker/gpubench/**'
77

88
permissions:
99
contents: read
@@ -20,7 +20,7 @@ jobs:
2020

2121
steps:
2222
- name: Harden Runner
23-
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
23+
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
2424
with:
2525
egress-policy: audit
2626

@@ -43,15 +43,15 @@ jobs:
4343

4444
steps:
4545
- name: Harden Runner
46-
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
46+
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
4747
with:
4848
egress-policy: audit
4949

5050
- name: Checkout repository
5151
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
5252

5353
- name: Install GO
54-
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
54+
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
5555
with:
5656
go-version-file: 'go.mod'
5757

@@ -64,10 +64,10 @@ jobs:
6464
run: make test-version-sync
6565

6666
- name: Set up Docker Buildx
67-
uses: docker/setup-buildx-action@6524bf65af31da8d45b59e8c27de4bd072b392f5 # v3.8.0
67+
uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0
6868

6969
- name: Log in to the Github Container registry
70-
uses: docker/login-action@7ca345011ac4304463197fac0e56eab1bc7e6af0
70+
uses: docker/login-action@327cd5a69de6c009b9ce71bce8395f28e651bf99
7171
with:
7272
registry: ghcr.io
7373
username: ${{ github.actor }}
@@ -81,12 +81,9 @@ jobs:
8181
OPERATOR_IMAGE_TAG=$(make get-operator-tag-version UNSTABLE=${UNSTABLE})
8282
8383
echo "Running gpubench tests"
84-
cd ./images/jail/gpubench/
84+
cd ./images/worker/gpubench/
8585
go test
8686
cd -
8787
88-
echo "Removing previous jail rootfs tar archive"
89-
rm -rf images/jail_rootfs.tar
90-
9188
echo "Building tarball for jail"
9289
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=jail DOCKERFILE=jail/jail.dockerfile DOCKER_OUTPUT="--output type=tar,dest=jail_rootfs.tar"

.github/workflows/one_job.yml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@ on:
1010
- 'PROJECT'
1111
- 'README.md'
1212
- 'SECURITY.md'
13-
- 'images/jail/gpubench/**'
14-
pull_request:
15-
branches:
16-
- main
13+
- 'images/worker/gpubench/**'
14+
1715

1816
permissions:
1917
contents: read
@@ -30,7 +28,7 @@ jobs:
3028

3129
steps:
3230
- name: Harden Runner
33-
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
31+
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
3432
with:
3533
egress-policy: audit
3634

@@ -54,15 +52,15 @@ jobs:
5452

5553
steps:
5654
- name: Harden Runner
57-
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
55+
uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4
5856
with:
5957
egress-policy: audit
6058

6159
- name: Checkout repository
6260
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
6361

6462
- name: Install GO
65-
uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0
63+
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
6664
with:
6765
go-version-file: 'go.mod'
6866

@@ -75,10 +73,10 @@ jobs:
7573
run: make test-version-sync
7674

7775
- name: Set up Docker Buildx
78-
uses: docker/setup-buildx-action@6524bf65af31da8d45b59e8c27de4bd072b392f5 # v3.8.0
76+
uses: docker/setup-buildx-action@f7ce87c1d6bead3e36075b2ce75da1f6cc28aaca # v3.9.0
7977

8078
- name: Log in to the Github Container registry
81-
uses: docker/login-action@7ca345011ac4304463197fac0e56eab1bc7e6af0
79+
uses: docker/login-action@327cd5a69de6c009b9ce71bce8395f28e651bf99
8280
with:
8381
registry: ghcr.io
8482
username: ${{ github.actor }}
@@ -120,6 +118,9 @@ jobs:
120118
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd DOCKERFILE=restd/slurmrestd.dockerfile
121119
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=slurmrestd
122120
121+
make docker-build UNSTABLE="${UNSTABLE}" IMAGE_NAME=rebooter DOCKERFILE=rebooter.dockerfile IMAGE_VERSION="$OPERATOR_IMAGE_TAG"
122+
make docker-push UNSTABLE="${UNSTABLE}" IMAGE_NAME=rebooter IMAGE_VERSION="$OPERATOR_IMAGE_TAG"
123+
123124
echo "Common images were built"
124125
125126
echo "Removing previous jail rootfs tar archive"

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.23@sha256:7ea4c9dcb2b97ff8ee80a67db3d44f98c8ffa0d191399197007d8459c1453041 AS operator_builder
1+
FROM golang:1.23@sha256:927112936d6b496ed95f55f362cc09da6e3e624ef868814c56d55bd7323e0959 AS operator_builder
22

33
ARG GO_LDFLAGS=""
44
ARG BUILD_TIME
@@ -16,7 +16,7 @@ RUN GOOS=$GOOS GOARCH=$GOARCH CGO_ENABLED=$CGO_ENABLED GO_LDFLAGS=$GO_LDFLAGS \
1616
go build -o slurm_operator ./cmd/
1717

1818
#######################################################################################################################
19-
FROM alpine:latest@sha256:b97e2a89d0b9e4011bb88c02ddf01c544b8c781acf1f4d559e7c8f12f1047ac3 AS slurm-operator
19+
FROM alpine:latest@sha256:56fa17d2a7e7f168a043a2712e63aed1f8543aeafdcee47c58dcffe38ed51099 AS slurm-operator
2020

2121
COPY --from=operator_builder /operator/slurm_operator /usr/bin/
2222

Makefile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ SHELL = /usr/bin/env bash -o pipefail
1818
.SHELLFLAGS = -ec
1919

2020
# Limit the scope of generation otherwise it will try to generate configs for non-controller code
21-
GENPATH = "./api/v1;./internal/controller/..."
21+
GENPATH = "./api/v1;"
2222

2323
CHART_PATH = helm
2424
CHART_OPERATOR_PATH = $(CHART_PATH)/soperator
2525
CHART_OPERATOR_CRDS_PATH = $(CHART_PATH)/soperator-crds
2626
CHART_CLUSTER_PATH = $(CHART_PATH)/slurm-cluster
2727
CHART_STORAGE_PATH = $(CHART_PATH)/slurm-cluster-storage
2828

29-
SLURM_VERSION = 24.05.2
29+
SLURM_VERSION = 24.05.5
3030
UBUNTU_VERSION = jammy
3131
VERSION = $(shell cat VERSION)
3232

@@ -78,8 +78,9 @@ help: ## Display this help.
7878

7979
.PHONY: manifests
8080
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
81-
$(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths=$(GENPATH) output:crd:artifacts:config=config/crd/bases
82-
81+
$(CONTROLLER_GEN) crd webhook paths=$(GENPATH) output:crd:artifacts:config=config/crd/bases
82+
$(CONTROLLER_GEN) rbac:roleName=manager-role paths="./internal/controller/clustercontroller/..." output:artifacts:config=config/rbac/clustercontroller/
83+
$(CONTROLLER_GEN) rbac:roleName=node-configurator-role paths="./internal/rebooter/..." output:artifacts:config=config/rbac/node-configurator/
8384
.PHONY: generate
8485
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
8586
$(CONTROLLER_GEN) object paths=$(GENPATH)
@@ -229,6 +230,8 @@ ifndef DOCKERFILE
229230
endif
230231
ifeq (${IMAGE_NAME},slurm-operator)
231232
docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
233+
else ifeq ($(IMAGE_NAME),rebooter)
234+
docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
232235
else
233236
cd images && docker build $(DOCKER_BUILD_ARGS) --tag $(IMAGE_REPO)/${IMAGE_NAME}:${IMAGE_VERSION} --target ${IMAGE_NAME} ${DOCKER_IGNORE_CACHE} ${DOCKER_LOAD} ${DOCKER_BUILD_PLATFORM} -f ${DOCKERFILE} ${DOCKER_OUTPUT} .
234237
endif

PROJECT

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,13 @@ resources:
2525
webhooks:
2626
validation: true
2727
webhookVersion: v1
28+
- api:
29+
crdVersion: v1
30+
namespaced: true
31+
controller: true
32+
domain: nebius.ai
33+
group: slurm
34+
kind: NodeConfigurator
35+
path: nebius.ai/slurm-operator/api/v1alpha1
36+
version: v1alpha1
2837
version: "3"

README.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,14 @@ This helps cluster administrators and users monitor resource utilization, enforc
107107

108108

109109
## ❌ Limitations
110-
- **GPUs are required**. Although support for CPU-only clusters or partitions seems pretty straightforward, we haven't
111-
implemented it yet.
110+
- **GPU-only or CPU-only**.The cluster of Slurm can currently be either GPU-only or CPU-only.
111+
Support for mixed configurations based on nodesets (e.g., separate GPU and CPU nodesets) has not been implemented yet.
112112
- **Single-partition clusters**. Slurm's ability to split clusters into several partitions isn't supported now.
113113
- **Software versions**. The list of software versions we currently support is quite short.
114-
- Linux: Ubuntu [20.04](https://releases.ubuntu.com/focal/) and
115-
[22.04](https://releases.ubuntu.com/jammy/).
116-
- Slurm: versions `23.11.6` and `24.05.3`.
117-
- CUDA: version [12.2.2](https://developer.nvidia.com/cuda-12-2-2-download-archive).
118-
- Kubernetes: >= [1.29](https://kubernetes.io/blog/2023/08/15/kubernetes-v1-28-release/).
114+
- Linux: Ubuntu [22.04](https://releases.ubuntu.com/jammy/).
115+
- Slurm: versions `24.05.5`.
116+
- CUDA: version [12.4.1](https://developer.nvidia.com/cuda-12-4-1-download-archive).
117+
- Kubernetes: >= [1.29](https://kubernetes.io/blog/2023/12/13/kubernetes-v1-29-release/).
119118
- Versions of some preinstalled software packages can't be changed.
120119

121120

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.17.0
1+
1.18.0

api/v1/slurmcluster_types.go

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ type SlurmClusterSpec struct {
2828
// - none: No maintenance is performed. The cluster operates normally.
2929
// - downscale: Scales down all components to 0.
3030
// - downscaleAndDeletePopulateJail: Scales down all components to 0 and deletes the kubernetes Kind Jobs populateJail.
31+
// - downscaleAndOverwritePopulateJail: Scales down all components to 0 and overwrite populateJail (same as overwrite=true).
3132
// - skipPopulateJail: Skips the execution of the populateJail job during maintenance.
3233
//
3334
// +kubebuilder:validation:Optional
34-
// +kubebuilder:validation:Enum=none;downscale;downscaleAndDeletePopulateJail;skipPopulateJail
35+
// +kubebuilder:validation:Enum=none;downscale;downscaleAndDeletePopulateJail;downscaleAndOverwritePopulateJail;skipPopulateJail
3536
// +kubebuilder:default="none"
3637
Maintenance *consts.MaintenanceMode `json:"maintenance,omitempty"`
3738

@@ -83,8 +84,15 @@ type SlurmClusterSpec struct {
8384
// SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported.
8485
//
8586
// +kubebuilder:validation:Optional
86-
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400}
87+
// +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", epilog: "", prolog: "", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400}
8788
SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"`
89+
90+
// MPIConfig represents the PMIx configuration in mpi.conf. Not all options are supported.
91+
//
92+
// +kubebuilder:validation:Optional
93+
// +kubebuilder:default={pmixEnv: "OMPI_MCA_btl_tcp_if_include=eth0"}
94+
MPIConfig MPIConfig `json:"mpiConfig,omitempty"`
95+
8896
// Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed.
8997
//
9098
// +kubebuilder:default=false
@@ -114,6 +122,16 @@ type SlurmConfig struct {
114122
// +kubebuilder:default="Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs"
115123
// +kubebuilder:validation:Pattern="^((Accrue|Agent|AuditRPCs|Backfill|BackfillMap|BurstBuffer|Cgroup|ConMgr|CPU_Bind|CpuFrequency|Data|DBD_Agent|Dependency|Elasticsearch|Energy|Federation|FrontEnd|Gres|Hetjob|Gang|GLOB_SILENCE|JobAccountGather|JobComp|JobContainer|License|Network|NetworkRaw|NodeFeatures|NO_CONF_HASH|Power|Priority|Profile|Protocol|Reservation|Route|Script|SelectType|Steps|Switch|TLS|TraceJobs|Triggers)(,)?)+$"
116124
DebugFlags *string `json:"debugFlags,omitempty"`
125+
// Defines specific file to run the epilog when job ends. Default value is no epilog
126+
//
127+
// +kubebuilder:validation:Optional
128+
// +kubebuilder:default=""
129+
Epilog *string `json:"epilog,omitempty"`
130+
// Defines specific file to run the prolog when job starts. Default value is no prolog
131+
//
132+
// +kubebuilder:validation:Optional
133+
// +kubebuilder:default=""
134+
Prolog *string `json:"prolog,omitempty"`
117135
// Additional parameters for the task plugin
118136
//
119137
// +kubebuilder:validation:Optional
@@ -132,6 +150,16 @@ type SlurmConfig struct {
132150
MinJobAge *int32 `json:"minJobAge,omitempty"`
133151
}
134152

153+
type MPIConfig struct {
154+
// Semicolon separated list of environment variables to be set in job environments to be used by PMIx.
155+
// Defaults to "OMPI_MCA_btl_tcp_if_include=eth0" to avoid "lo" and "docker" interfaces to be selected by OpenMPI.
156+
//
157+
// +kubebuilder:validation:Optional
158+
// +kubebuilder:default="OMPI_MCA_btl_tcp_if_include=eth0"
159+
// +kubebuilder:validation:Optional
160+
PMIxEnv string `json:"pmixEnv,omitempty"`
161+
}
162+
135163
type PartitionConfiguration struct {
136164
// ConfigType
137165
// +kubebuilder:validation:Enum=default;custom
@@ -308,7 +336,8 @@ type NCCLArguments struct {
308336
// +kubebuilder:default="0"
309337
ThresholdMoreThan string `json:"thresholdMoreThan,omitempty"`
310338

311-
// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test
339+
// UseInfiniband defines using NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring env variables for test.
340+
// According to NVIDIA these env vars should be used only for debugging.
312341
// https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html
313342
//
314343
// +kubebuilder:validation:Optional
@@ -570,6 +599,8 @@ type SlurmdbdConfig struct {
570599

571600
type AccountingSlurmConf struct {
572601
// +kubebuilder:validation:Optional
602+
// +kubebuilder:validation:Pattern="^((Billing|CPU|Mem|VMem|Node|Energy|Pages|FS/Disk|FS/Lustre|Gres/gpu|Gres/gpu:tesla|Gres/gpu:volta)(,)?)+$"
603+
// +kubebuilder:default="Billing,CPU,Mem,Node,VMem"
573604
AccountingStorageTRES *string `json:"accountingStorageTRES,omitempty"`
574605
// +kubebuilder:validation:Optional
575606
AccountingStoreFlags *string `json:"accountingStoreFlags,omitempty"`
@@ -581,6 +612,7 @@ type AccountingSlurmConf struct {
581612
AcctGatherProfileType *string `json:"acctGatherProfileType,omitempty"`
582613
// +kubebuilder:validation:Optional
583614
// +kubebuilder:validation:Enum="jobacct_gather/linux";"jobacct_gather/cgroup";"jobacct_gather/none"
615+
// +kubebuilder:default="jobacct_gather/cgroup"
584616
JobAcctGatherType *string `json:"jobAcctGatherType,omitempty"`
585617
// +kubebuilder:validation:Optional
586618
// +kubebuilder:default=30
@@ -681,6 +713,68 @@ type SlurmNodeWorker struct {
681713
//
682714
// +kubebuilder:validation:Optional
683715
SlurmNodeExtra string `json:"slurmNodeExtra,omitempty"`
716+
717+
// PriorityClass defines the priority class for the Slurm worker node
718+
//
719+
// +kubebuilder:validation:Optional
720+
PriorityClass string `json:"priorityClass,omitempty"`
721+
// It's alpha feature and will be moved to separate CRD in the future
722+
// Rebooter defines the configuration for the Slurm worker node rebooter
723+
//
724+
// +kubebuilder:validation:Optional
725+
Rebooter Rebooter `json:"rebooter"`
726+
}
727+
728+
// Rebooter defines the configuration for the Slurm worker node rebooter
729+
type Rebooter struct {
730+
// enabled defines whether the rebooter is enabled
731+
//
732+
// +kubebuilder:validation:Optional
733+
// +kubebuilder:default=false
734+
Enabled bool `json:"enabled"`
735+
736+
// Image defines the rebooter container image
737+
//
738+
// +kubebuilder:validation:Optional
739+
Image string `json:"image"`
740+
741+
// imagePullPolicy defines the image pull policy
742+
//
743+
// +kubebuilder:validation:Enum=Always;Never;IfNotPresent
744+
// +kubebuilder:validation:Optional
745+
// +kubebuilder:default="IfNotPresent"
746+
ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"`
747+
748+
// Resources defines the [corev1.ResourceRequirements] for the container
749+
//
750+
// +kubebuilder:validation:Optional
751+
Resources corev1.ResourceList `json:"resources,omitempty"`
752+
753+
// evictionMethod defines the method of eviction for the Slurm worker node
754+
// Must be one of [drain, evict]. Now only evict is supported
755+
//
756+
// +kubebuilder:validation:Optional
757+
// +kubebuilder:validation:Enum="evict"
758+
// +kubebuilder:default="evict"
759+
EvictionMethod string `json:"evictionMethod,omitempty"`
760+
761+
// logLevel defines the log level for the rebooter
762+
//
763+
// +kubebuilder:validation:Optional
764+
// +kubebuilder:default="info"
765+
// +kubebuilder:validation:Enum="debug";"info";"warn";"error"
766+
LogLevel string `json:"logLevel,omitempty"`
767+
768+
// Namespace defines the namespace where the rebooter will be deployed
769+
// By default, the same namespace as the soperator
770+
//
771+
// +kubebuilder:validation:Optional
772+
Namespace string `json:"namespace,omitempty"`
773+
774+
// serviceAccountName defines the service account name for the rebooter
775+
//
776+
// +kubebuilder:validation:Optional
777+
ServiceAccountName string `json:"serviceAccountName,omitempty"`
684778
}
685779

686780
// SlurmNodeWorkerVolumes defines the volumes for the Slurm worker node
@@ -1019,6 +1113,7 @@ const (
10191113
ConditionClusterWorkersAvailable = "WorkersAvailable"
10201114
ConditionClusterLoginAvailable = "LoginAvailable"
10211115
ConditionClusterAccountingAvailable = "AccountingAvailable"
1116+
ConditionClusterPopulateJailMode = "PopulateJailMode"
10221117

10231118
PhaseClusterReconciling = "Reconciling"
10241119
PhaseClusterNotAvailable = "Not available"

0 commit comments

Comments
 (0)