Skip to content

Commit b882c51

Browse files
authored
MSP-2609: fix secret templating and appapormor populatejail (#176)
* MSP-2609: fix secret templating * fix bug with populatejail * fix helm chart * fix bug NCCLBenchmark appapparmor test
1 parent f0b8b84 commit b882c51

File tree

17 files changed

+117
-68
lines changed

17 files changed

+117
-68
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.15.1
1+
1.15.2

api/v1/slurmcluster_types.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ type PopulateJail struct {
134134
// +kubebuilder:validation:Optional
135135
// +kubebuilder:default=false
136136
Overwrite bool `json:"overwrite"`
137+
138+
// AppArmorProfile defines the AppArmor profile for the Slurm node
139+
//
140+
// +kubebuilder:validation:Optional
141+
// +kubebuilder:default="unconfined"
142+
AppArmorProfile string `json:"appArmorProfile,omitempty"`
137143
}
138144

139145
// PeriodicChecks define the k8s CronJobs performing cluster checks
@@ -204,6 +210,12 @@ type NCCLBenchmark struct {
204210
//
205211
// +kubebuilder:validation:Required
206212
K8sNodeFilterName string `json:"k8sNodeFilterName"`
213+
214+
// AppArmorProfile defines the AppArmor profile for the Slurm node
215+
//
216+
// +kubebuilder:validation:Optional
217+
// +kubebuilder:default="unconfined"
218+
AppArmorProfile string `json:"appArmorProfile,omitempty"`
207219
}
208220

209221
// NCCLArguments define nccl settings for periodic nccl benchmark
@@ -749,7 +761,7 @@ type NodeContainer struct {
749761
// +kubebuilder:validation:Optional
750762
SecurityLimitsConfig string `json:"securityLimitsConfig,omitempty"`
751763

752-
// AppArmorProfile defines the AppArmor profile for the Slurm worker node
764+
// AppArmorProfile defines the AppArmor profile for the Slurm containers
753765
//
754766
// +kubebuilder:validation:Optional
755767
// +kubebuilder:default="unconfined"

config/crd/bases/slurm.nebius.ai_slurmclusters.yaml

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,11 @@ spec:
11231123
in seconds
11241124
format: int64
11251125
type: integer
1126+
appArmorProfile:
1127+
default: unconfined
1128+
description: AppArmorProfile defines the AppArmor profile
1129+
for the Slurm node
1130+
type: string
11261131
enabled:
11271132
default: true
11281133
description: Enabled defines whether the CronJob should be
@@ -1219,6 +1224,11 @@ spec:
12191224
description: PopulateJail defines the k8s Job that performs initial
12201225
jail file system population
12211226
properties:
1227+
appArmorProfile:
1228+
default: unconfined
1229+
description: AppArmorProfile defines the AppArmor profile for
1230+
the Slurm node
1231+
type: string
12221232
image:
12231233
description: Image defines the populate jail container image
12241234
type: string
@@ -1524,7 +1534,7 @@ spec:
15241534
appArmorProfile:
15251535
default: unconfined
15261536
description: AppArmorProfile defines the AppArmor profile
1527-
for the Slurm worker node
1537+
for the Slurm containers
15281538
type: string
15291539
enabled:
15301540
type: boolean
@@ -8289,7 +8299,7 @@ spec:
82898299
appArmorProfile:
82908300
default: unconfined
82918301
description: AppArmorProfile defines the AppArmor profile
8292-
for the Slurm worker node
8302+
for the Slurm containers
82938303
type: string
82948304
image:
82958305
description: Image defines the container image
@@ -8376,7 +8386,7 @@ spec:
83768386
appArmorProfile:
83778387
default: unconfined
83788388
description: AppArmorProfile defines the AppArmor profile
8379-
for the Slurm worker node
8389+
for the Slurm containers
83808390
type: string
83818391
image:
83828392
description: Image defines the container image
@@ -8532,7 +8542,7 @@ spec:
85328542
appArmorProfile:
85338543
default: unconfined
85348544
description: AppArmorProfile defines the AppArmor profile
8535-
for the Slurm worker node
8545+
for the Slurm containers
85368546
type: string
85378547
image:
85388548
description: Image defines the container image
@@ -8579,7 +8589,7 @@ spec:
85798589
appArmorProfile:
85808590
default: unconfined
85818591
description: AppArmorProfile defines the AppArmor profile
8582-
for the Slurm worker node
8592+
for the Slurm containers
85838593
type: string
85848594
image:
85858595
description: Image defines the container image
@@ -9072,7 +9082,7 @@ spec:
90729082
appArmorProfile:
90739083
default: unconfined
90749084
description: AppArmorProfile defines the AppArmor profile
9075-
for the Slurm worker node
9085+
for the Slurm containers
90769086
type: string
90779087
image:
90789088
description: Image defines the container image
@@ -9123,7 +9133,7 @@ spec:
91239133
appArmorProfile:
91249134
default: unconfined
91259135
description: AppArmorProfile defines the AppArmor profile
9126-
for the Slurm worker node
9136+
for the Slurm containers
91279137
type: string
91289138
image:
91299139
description: Image defines the container image
@@ -9612,7 +9622,7 @@ spec:
96129622
appArmorProfile:
96139623
default: unconfined
96149624
description: AppArmorProfile defines the AppArmor profile
9615-
for the Slurm worker node
9625+
for the Slurm containers
96169626
type: string
96179627
image:
96189628
description: Image defines the container image
@@ -9664,7 +9674,7 @@ spec:
96649674
appArmorProfile:
96659675
default: unconfined
96669676
description: AppArmorProfile defines the AppArmor profile
9667-
for the Slurm worker node
9677+
for the Slurm containers
96689678
type: string
96699679
image:
96709680
description: Image defines the container image
@@ -9993,7 +10003,7 @@ spec:
999310003
appArmorProfile:
999410004
default: unconfined
999510005
description: AppArmorProfile defines the AppArmor profile
9996-
for the Slurm worker node
10006+
for the Slurm containers
999710007
type: string
999810008
image:
999910009
description: Image defines the container image
@@ -10057,7 +10067,7 @@ spec:
1005710067
appArmorProfile:
1005810068
default: unconfined
1005910069
description: AppArmorProfile defines the AppArmor profile
10060-
for the Slurm worker node
10070+
for the Slurm containers
1006110071
type: string
1006210072
image:
1006310073
description: Image defines the container image
@@ -10103,7 +10113,7 @@ spec:
1010310113
appArmorProfile:
1010410114
default: unconfined
1010510115
description: AppArmorProfile defines the AppArmor profile
10106-
for the Slurm worker node
10116+
for the Slurm containers
1010710117
type: string
1010810118
image:
1010910119
description: Image defines the container image

config/manager/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ resources:
33
images:
44
- name: controller
55
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
6-
newTag: 1.15.1
6+
newTag: 1.15.2

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ spec:
8282
value: "false"
8383
- name: SLURM_OPERATOR_WATCH_NAMESPACES
8484
value: "*"
85-
image: controller:1.15.1
85+
image: controller:1.15.2
8686
imagePullPolicy: Always
8787
name: manager
8888
securityContext:

helm/slurm-cluster-storage/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster-storage
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.15.1"
6-
appVersion: "1.15.1"
5+
version: "1.15.2"
6+
appVersion: "1.15.2"

helm/slurm-cluster/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-slurm-cluster
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: "1.15.1"
6-
appVersion: "1.15.1"
5+
version: "1.15.2"
6+
appVersion: "1.15.2"

helm/slurm-cluster/templates/slurm-cluster-cr.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ spec:
4040
- name: {{ .name | quote }}
4141
{{- omit . "name" | toYaml | nindent 6 }}
4242
{{- end }}
43-
secrets:
44-
sshdKeysName: {{ include "slurm-cluster.secret.sshdKeysName" . }}
43+
secrets: {{ toYaml .Values.secrets | nindent 4 }}
4544
populateJail:
4645
image: {{ required "populateJail image" .Values.images.populateJail | quote }}
4746
imagePullPolicy: {{ default "IfNotPresent" .Values.populateJail.imagePullPolicy | quote }}

helm/slurm-cluster/values.yaml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ volumeSources:
6161
# readOnly: false
6262

6363
# Secret references needed for Slurm cluster operation
64-
secrets:
65-
# Secret reference required for login sshd. If secret name empty - operator generate own secret with keys
66-
sshdKeysName: ""
64+
secrets: {}
65+
# Secret reference required for login sshd. If secret name empty - operator generate own secret with keys
66+
# sshdKeysName: ""
6767
# Job performing initial jail file system population
6868
populateJail:
6969
imagePullPolicy: "IfNotPresent"
@@ -380,13 +380,13 @@ telemetry: {}
380380
# otelCollectorPort: 8429
381381

382382
images:
383-
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.15.1-jammy-slurm24.05.2"
384-
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.15.1-jammy-slurm24.05.2"
385-
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.15.1-jammy-slurm24.05.2"
386-
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.15.1-jammy-slurm24.05.2"
387-
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.15.1-jammy-slurm24.05.2"
388-
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.15.1-jammy-slurm24.05.2"
389-
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.15.1-jammy-slurm24.05.2"
390-
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.15.1-jammy-slurm24.05.2"
391-
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.15.1-jammy-slurm24.05.2"
383+
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.15.2-jammy-slurm24.05.2"
384+
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.15.2-jammy-slurm24.05.2"
385+
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.15.2-jammy-slurm24.05.2"
386+
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.15.2-jammy-slurm24.05.2"
387+
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.15.2-jammy-slurm24.05.2"
388+
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.15.2-jammy-slurm24.05.2"
389+
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.15.2-jammy-slurm24.05.2"
390+
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.15.2-jammy-slurm24.05.2"
391+
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.15.2-jammy-slurm24.05.2"
392392
mariaDB: "docker-registry1.mariadb.com/library/mariadb:11.4.3"

helm/soperator-crds/Chart.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ apiVersion: v2
22
name: helm-soperator-crds
33
description: A Helm chart for Kubernetes
44
type: application
5-
version: 1.15.1
6-
appVersion: "1.15.1"
5+
version: 1.15.2
6+
appVersion: "1.15.2"

0 commit comments

Comments
 (0)