Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.20.0
1.20.1
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import (
var scheme = runtime.NewScheme()

func init() {

utilruntime.Must(clientgoscheme.AddToScheme(scheme))

// Check if OpenTelemetryCollector and PodMonitor CRD is installed before adding it to the scheme
Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ resources:
images:
- name: controller
newName: cr.eu-north1.nebius.cloud/soperator/slurm-operator
newTag: 1.20.0
newTag: 1.20.1
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ spec:
value: "false"
- name: SLURM_OPERATOR_WATCH_NAMESPACES
value: "*"
image: controller:1.20.0
image: controller:1.20.1
imagePullPolicy: Always
name: manager
securityContext:
Expand Down
2 changes: 1 addition & 1 deletion config/soperatorchecks/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ resources:
images:
- name: controller
newName: cr.eu-north1.nebius.cloud/soperator/soperatorchecks
newTag: 1.20.0
newTag: 1.20.1
patches:
# Protect the /metrics endpoint by putting it behind auth.
# If you want your controller-manager to expose the /metrics
Expand Down
4 changes: 4 additions & 0 deletions fluxcd/base/soperator-fluxcd/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ spec:
name: soperator-fluxcd
valuesKey: values.yaml
optional: true
- kind: ConfigMap
name: backup-schedule
valuesKey: values.yaml
optional: true
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ spec:
name: nebius-cloud
postBuild:
substitute:
soperator_version: 1.20.0
soperator_version: 1.20.1
path: "./fluxcd/enviroment/nebius-cloud/dev"
prune: true
timeout: 1m
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
name: nebius-cloud
postBuild:
substitute:
soperator_version: 1.20.0
soperator_version: 1.20.1
path: "./fluxcd/enviroment/nebius-cloud/prod"
prune: false
timeout: 1m
4 changes: 2 additions & 2 deletions helm/nodeconfigurator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.20.0
version: 1.20.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.20.0"
appVersion: "1.20.1"
2 changes: 1 addition & 1 deletion helm/nodeconfigurator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ rebooter:
env: []
image:
repository: "cr.eu-north1.nebius.cloud/soperator/rebooter"
tag: "1.20.0"
tag: "1.20.1"
pullPolicy: IfNotPresent
nodeSelector: {}
resources: {}
Expand Down
4 changes: 2 additions & 2 deletions helm/slurm-cluster-storage/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-slurm-cluster-storage
description: A Helm chart for Kubernetes
type: application
version: "1.20.0"
appVersion: "1.20.0"
version: "1.20.1"
appVersion: "1.20.1"
4 changes: 2 additions & 2 deletions helm/slurm-cluster/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-slurm-cluster
description: A Helm chart for Kubernetes
type: application
version: "1.20.0"
appVersion: "1.20.0"
version: "1.20.1"
appVersion: "1.20.1"
kubeVersion: ">=1.29.0-0"
20 changes: 10 additions & 10 deletions helm/slurm-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -485,14 +485,14 @@ telemetry: {}
# otelCollectorPort: 8429

images:
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.20.0-jammy-slurm24.05.7"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.20.0-jammy-slurm24.05.7"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.20.0-jammy-slurm24.05.7"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.20.0-jammy-slurm24.05.7"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.20.0-jammy-slurm24.05.7"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.20.0-jammy-slurm24.05.7"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.20.0-jammy-slurm24.05.7"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.20.0-jammy-slurm24.05.7"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.20.0-jammy-slurm24.05.7"
sConfigController: cr.eu-north1.nebius.cloud/soperator/sconfigcontroller:1.20.0
slurmctld: "cr.eu-north1.nebius.cloud/soperator/controller_slurmctld:1.20.1-jammy-slurm24.05.7"
slurmrestd: "cr.eu-north1.nebius.cloud/soperator/slurmrestd:1.20.1-jammy-slurm24.05.7"
slurmd: "cr.eu-north1.nebius.cloud/soperator/worker_slurmd:1.20.1-jammy-slurm24.05.7"
sshd: "cr.eu-north1.nebius.cloud/soperator/login_sshd:1.20.1-jammy-slurm24.05.7"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.20.1-jammy-slurm24.05.7"
populateJail: "cr.eu-north1.nebius.cloud/soperator/populate_jail:1.20.1-jammy-slurm24.05.7"
ncclBenchmark: "cr.eu-north1.nebius.cloud/soperator/nccl_benchmark:1.20.1-jammy-slurm24.05.7"
slurmdbd: "cr.eu-north1.nebius.cloud/soperator/controller_slurmdbd:1.20.1-jammy-slurm24.05.7"
exporter: "cr.eu-north1.nebius.cloud/soperator/exporter:1.20.1-jammy-slurm24.05.7"
sConfigController: cr.eu-north1.nebius.cloud/soperator/sconfigcontroller:1.20.1
mariaDB: docker-registry1.mariadb.com/library/mariadb:11.4.3
4 changes: 2 additions & 2 deletions helm/soperator-activechecks/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-soperator-activechecks
description: A Helm chart for Kubernetes
type: application
version: "1.20.0"
appVersion: "1.20.0"
version: "1.20.1"
appVersion: "1.20.1"
4 changes: 2 additions & 2 deletions helm/soperator-activechecks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ activeCheck:
persistentVolumeClaim:
claimName: "jail-pvc"
images:
slurmJob: "cr.eu-north1.nebius.cloud/soperator/slurm_check_job:1.20.0-jammy-slurm24.05.7"
slurmJob: "cr.eu-north1.nebius.cloud/soperator/slurm_check_job:1.20.1-jammy-slurm24.05.7"
k8sJob: "ubuntu:jammy"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.20.0-jammy-slurm24.05.7"
munge: "cr.eu-north1.nebius.cloud/soperator/munge:1.20.1-jammy-slurm24.05.7"
4 changes: 2 additions & 2 deletions helm/soperator-crds/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ apiVersion: v2
name: helm-soperator-crds
description: A Helm chart for Kubernetes
type: application
version: 1.20.0
appVersion: "1.20.0"
version: 1.20.1
appVersion: "1.20.1"
kubeVersion: ">=1.29.0-0"
4 changes: 2 additions & 2 deletions helm/soperator-dcgm-exporter/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ apiVersion: v2
name: helm-soperator-dcgm-exporter
description: A Helm chart for Nvidia DCGM Exporter
type: application
version: 1.20.0
appVersion: "1.20.0"
version: 1.20.1
appVersion: "1.20.1"
4 changes: 2 additions & 2 deletions helm/soperator-fluxcd/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.20.0
version: 1.20.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.20.0"
appVersion: "1.20.1"
2 changes: 1 addition & 1 deletion helm/soperator-fluxcd/templates/backup_schedule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
interval: {{ .Values.backup.schedule.interval }}
timeout: {{ .Values.backup.schedule.timeout }}
releaseName: {{ .Values.backup.schedule.releaseName }}
targetNamespace: {{ .Values.backup.schedule.namespace }}
targetNamespace: {{ .Values.slurmCluster.namespace }}
upgrade:
crds: Skip
values:
Expand Down
6 changes: 3 additions & 3 deletions helm/soperator-fluxcd/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ observability:
enabled: true
interval: 5m
timeout: 5m
version: 1.20.0
version: 1.20.1
namespace: soperator
releaseName: soperator-dcgm-exporter
values:
Expand All @@ -192,7 +192,7 @@ slurmCluster:
enabled: true
interval: 5m
timeout: 5m
version: 1.20.0
version: 1.20.1
namespace: soperator
releaseName: soperator
values: null
Expand All @@ -208,7 +208,7 @@ soperator:
enabled: true
interval: 5m
timeout: 5m
version: 1.20.0
version: 1.20.1
namespace: "soperator-system"
releaseName: soperator-controller
values:
Expand Down
4 changes: 2 additions & 2 deletions helm/soperator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ apiVersion: v2
name: helm-soperator
description: A Helm chart for Kubernetes
type: application
version: 1.20.0
appVersion: "1.20.0"
version: 1.20.1
appVersion: "1.20.1"
kubeVersion: ">=1.29.0-0"
dependencies:
- name: kruise
Expand Down
2 changes: 1 addition & 1 deletion helm/soperator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ controllerManager:
slurmOperatorWatchNamespaces: '*'
image:
repository: cr.eu-north1.nebius.cloud/soperator/slurm-operator
tag: 1.20.0
tag: 1.20.1
imagePullPolicy: Always
resources:
limits:
Expand Down
4 changes: 2 additions & 2 deletions helm/soperatorchecks/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.20.0
version: 1.20.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.20.0"
appVersion: "1.20.1"
2 changes: 1 addition & 1 deletion helm/soperatorchecks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ checks:
slurmOperatorWatchNamespaces: '*'
image:
repository: cr.eu-north1.nebius.cloud/soperator/soperatorchecks
tag: 1.20.0
tag: 1.20.1
imagePullPolicy: Always
resources:
limits:
Expand Down
2 changes: 1 addition & 1 deletion internal/consts/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
package consts

const (
VersionCR = "1.20.0"
VersionCR = "1.20.1"
)
6 changes: 4 additions & 2 deletions internal/render/common/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"path"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

Expand Down Expand Up @@ -419,12 +420,13 @@ func RenderVolumeMountSshdKeys() corev1.VolumeMount {
// region InMemory

// RenderVolumeInMemory renders [corev1.Volume] which content is stored in shared memory (tmpfs).
func RenderVolumeInMemory() corev1.Volume {
func RenderVolumeInMemory(sizeLimit *resource.Quantity) corev1.Volume {
return corev1.Volume{
Name: consts.VolumeNameInMemorySubmount,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
Medium: corev1.StorageMediumMemory,
SizeLimit: sizeLimit,
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion internal/render/login/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func renderVolumesAndClaimTemplateSpecs(
common.RenderVolumeSecurityLimits(clusterName, consts.ComponentTypeLogin),
common.RenderVolumeSshdKeys(secrets.SshdKeysName),
common.RenderVolumeSshdRootKeys(clusterName),
common.RenderVolumeInMemory(),
common.RenderVolumeInMemory(login.ContainerSshd.Resources.Memory()),
common.RenderVolumeTmpDisk(),
renderVolumeSshdConfigs(login.SSHDConfigMapName),
}
Expand Down
2 changes: 1 addition & 1 deletion internal/render/worker/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func renderVolumesAndClaimTemplateSpecs(
common.RenderVolumeSecurityLimits(clusterName, consts.ComponentTypeWorker),
common.RenderVolumeSshdKeys(secrets.SshdKeysName),
common.RenderVolumeSshdRootKeys(clusterName),
common.RenderVolumeInMemory(),
common.RenderVolumeInMemory(worker.ContainerSlurmd.Resources.Memory()),
common.RenderVolumeTmpDisk(),
renderVolumeSshdConfigs(worker.SSHDConfigMapName),
renderVolumeNvidia(),
Expand Down
Loading