Skip to content

Commit 8467124

Browse files
authored
Merge pull request #135 from nebius/dev
Release v1.14.11
2 parents acef4c4 + 756b068 commit 8467124

File tree

25 files changed

+232
-46
lines changed

25 files changed

+232
-46
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.14.10
1+
1.14.11

api/v1/slurmcluster_types.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,25 @@ type SlurmClusterSpec struct {
6868
//
6969
// +kubebuilder:validation:Optional
7070
Telemetry *Telemetry `json:"telemetry,omitempty"`
71+
72+
// PartitionConfiguration define partition configuration of slurm worker nodes
73+
// https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION
74+
// +kubebuilder:validation:Optional
75+
PartitionConfiguration PartitionConfiguration `json:"partitionConfiguration,omitempty"`
76+
}
77+
78+
type PartitionConfiguration struct {
79+
// ConfigType
80+
// +kubebuilder:validation:Enum=default;custom
81+
// +kubebuilder:validation:Optional
82+
// +kubebuilder:default="default"
83+
ConfigType string `json:"configType,omitempty"`
84+
// RawConfig define partition configuration as list of string started with PartitionName
85+
// Example for custom ConfigType:
86+
// - PartitionName=low_priority Nodes=worker-[0-15] Default=YES MaxTime=INFINITE State=UP PriorityTier=1
87+
// - PartitionName=high_priority Nodes=worker-[10-20] Default=NO MaxTime=INFINITE State=UP PriorityTier=2
88+
// +kubebuilder:validation:Optional
89+
RawConfig []string `json:"rawConfig,omitempty"`
7190
}
7291

7392
type NCCLSettings struct {

api/v1/zz_generated.deepcopy.go

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/slurm.nebius.ai_slurmclusters.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,28 @@ spec:
10831083
- custom
10841084
type: string
10851085
type: object
1086+
partitionConfiguration:
1087+
description: |-
1088+
PartitionConfiguration define partition configuration of slurm worker nodes
1089+
https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION
1090+
properties:
1091+
configType:
1092+
default: default
1093+
description: ConfigType
1094+
enum:
1095+
- default
1096+
- custom
1097+
type: string
1098+
rawConfig:
1099+
description: |-
1100+
RawConfig define partition configuration as list of string started with PartitionName
1101+
Example for custom ConfigType:
1102+
- PartitionName=low_priority Nodes=worker-[0-15] Default=YES MaxTime=INFINITE State=UP PriorityTier=1
1103+
- PartitionName=high_priority Nodes=worker-[10-20] Default=NO MaxTime=INFINITE State=UP PriorityTier=2
1104+
items:
1105+
type: string
1106+
type: array
1107+
type: object
10861108
pause:
10871109
description: |-
10881110
Pause defines whether to gracefully stop the cluster.

config/manager/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ resources:
33
images:
44
- name: controller
55
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
6-
newTag: 1.14.10
6+
newTag: 1.14.11

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ spec:
8282
value: "false"
8383
- name: SLURM_OPERATOR_WATCH_NAMESPACES
8484
value: "*"
85-
image: controller:1.14.10
85+
image: controller:1.14.11
8686
imagePullPolicy: Always
8787
name: manager
8888
securityContext:

helm/slurm-cluster-storage/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster-storage
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.14.10"
6-
appVersion: "1.14.10"
5+
version: "1.14.11"
6+
appVersion: "1.14.11"

helm/slurm-cluster/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.14.10"
6-
appVersion: "1.14.10"
5+
version: "1.14.11"
6+
appVersion: "1.14.11"

helm/slurm-cluster/templates/slurm-cluster-cr.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ spec:
1515
crVersion: {{ .Chart.Version }}
1616
pause: {{ .Values.pause }}
1717
clusterType: {{ .Values.clusterType }}
18+
partitionConfiguration:
19+
configType: {{ (default "default" .Values.partitionConfiguration.configType) }}
20+
rawConfig:
21+
{{- default list .Values.partitionConfiguration.rawConfig | toYaml | nindent 6 }}
1822
k8sNodeFilters:
1923
{{- range .Values.k8sNodeFilters }}
2024
- name: {{ .name }}

helm/slurm-cluster/values.yaml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ annotations: {}
55
pause: false
66
# Slurm cluster type. Can be now gpu or cpu
77
clusterType: gpu
8+
# partitionConfiguration define partition configuration of slurm worker nodes
9+
# https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION
10+
partitionConfiguration:
11+
# Could be default or custom
12+
configType: "default"
13+
# configuration as list string started with PartitionName
14+
# Example for custom ConfigType:
15+
rawConfig: []
16+
# - PartitionName=low_priority Nodes=worker-[0-15] Default=YES MaxTime=INFINITE State=UP PriorityTier=1
17+
# - PartitionName=high_priority Nodes=worker-[10-20] Default=NO MaxTime=INFINITE State=UP PriorityTier=2
818
# K8s node filters used in Slurm node specifications. Define which nodes should be used to schedule pods to
919
k8sNodeFilters:
1020
- name: gpu
@@ -333,12 +343,12 @@ telemetry: {}
333343
# otelCollectorPort: 8429
334344

335345
images:
336-
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.14.10-jammy-slurm24.05.2"
337-
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.14.10-jammy-slurm24.05.2"
338-
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.14.10-jammy-slurm24.05.2"
339-
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.14.10-jammy-slurm24.05.2"
340-
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.14.10-jammy-slurm24.05.2"
341-
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.14.10-jammy-slurm24.05.2"
342-
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.14.10-jammy-slurm24.05.2"
343-
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.14.10-jammy-slurm24.05.2"
346+
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.14.11-jammy-slurm24.05.2"
347+
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.14.11-jammy-slurm24.05.2"
348+
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.14.11-jammy-slurm24.05.2"
349+
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.14.11-jammy-slurm24.05.2"
350+
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.14.11-jammy-slurm24.05.2"
351+
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.14.11-jammy-slurm24.05.2"
352+
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.14.11-jammy-slurm24.05.2"
353+
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.14.11-jammy-slurm24.05.2"
344354
mariaDB: "docker-registry1.mariadb.com/library/mariadb:11.4.3"

0 commit comments

Comments
 (0)