From ed4a76126f574ee72fca090d8813d9d94ca0d610 Mon Sep 17 00:00:00 2001 From: Pavel Sofronii Date: Tue, 24 Mar 2026 17:52:35 +0100 Subject: [PATCH] SCHED-1210 use cuda_force_upgrade for upgrading CUDA version and upgrade ansible requirements.txt --- ansible/requirements.txt | 42 +++++++++---------- .../templates/jail-mount-daemonset.yaml | 2 +- helm/soperator-activechecks/values.yaml | 6 +-- images/accounting/slurmdbd.dockerfile | 4 +- images/controller/slurmctld.dockerfile | 4 +- images/jail/jail.dockerfile | 4 +- images/k8s_check_job/k8s_check_job.dockerfile | 4 +- images/login/sshd.dockerfile | 4 +- images/munge/munge.dockerfile | 4 +- images/restd/slurmrestd.dockerfile | 4 +- images/sansible/sansible.dockerfile | 4 +- .../slurm_check_job.dockerfile | 4 +- images/worker/slurmd.dockerfile | 4 +- 13 files changed, 45 insertions(+), 45 deletions(-) diff --git a/ansible/requirements.txt b/ansible/requirements.txt index f6a39896c..95eb0d50f 100644 --- a/ansible/requirements.txt +++ b/ansible/requirements.txt @@ -1,36 +1,36 @@ -ansible==12.2.0 -ansible-compat==25.8.2 -ansible-core==2.19.4 -ansible-lint==25.9.2 +ansible==13.4.0 +ansible-compat==25.12.1 +ansible-core==2.20.4 +ansible-lint==26.3.0 argcomplete==3.6.3 -attrs==25.4.0 -black==25.9.0 +attrs==26.1.0 +black==26.3.1 bracex==2.6 cffi==2.0.0 -charset-normalizer==3.4.4 -click==8.3.0 +charset-normalizer==3.4.6 +click==8.3.1 cryptography==46.0.5 distro==1.9.0 -filelock==3.20.3 -importlib_metadata==8.7.0 +filelock==3.25.2 +importlib_metadata==9.0.0 Jinja2==3.1.6 -jsonschema==4.25.1 +jsonschema==4.26.0 jsonschema-specifications==2025.9.1 MarkupSafe==3.0.3 mypy_extensions==1.1.0 -packaging==25.0 -pathspec==0.12.1 -platformdirs==4.5.0 -pycparser==2.23 -python-debian==1.0.1 -pytokens==0.2.0 +packaging==26.0 +pathspec==1.0.4 +platformdirs==4.9.4 +pycparser==3.0 +python-debian==1.1.0 +pytokens==0.4.1 PyYAML==6.0.3 referencing==0.37.0 resolvelib==1.2.1 -rpds-py==0.28.0 -ruamel.yaml==0.18.16 -ruamel.yaml.clib==0.2.14 +rpds-py==0.30.0 +ruamel.yaml==0.19.1 +ruamel.yaml.clib==0.2.15 subprocess-tee==0.4.2 wcmatch==10.1 -yamllint==1.37.1 +yamllint==1.38.0 zipp==3.23.0 diff --git a/helm/slurm-cluster-storage/templates/jail-mount-daemonset.yaml b/helm/slurm-cluster-storage/templates/jail-mount-daemonset.yaml index 3f502ec96..6cc28ca2c 100644 --- a/helm/slurm-cluster-storage/templates/jail-mount-daemonset.yaml +++ b/helm/slurm-cluster-storage/templates/jail-mount-daemonset.yaml @@ -22,7 +22,7 @@ spec: {{- if eq (include "slurm-cluster-storage.volume.jail.type" .) "filestore" }} image: cr.eu-north1.nebius.cloud/soperator/busybox {{- else }} - image: cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260220113744 + image: cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260324150527 {{- end }} command: - /bin/sh diff --git a/helm/soperator-activechecks/values.yaml b/helm/soperator-activechecks/values.yaml index e79ca5761..24922dc3d 100644 --- a/helm/soperator-activechecks/values.yaml +++ b/helm/soperator-activechecks/values.yaml @@ -35,9 +35,9 @@ images: sansible: "cr.eu-north1.nebius.cloud/soperator/sansible:3.0.2-slurm25.11.3" activeCheckImageRepository: "cr.eu-north1.nebius.cloud#ml-containers/training_diag" activeCheckImageTags: - # Image training_diag from PR https://github.com/nebius/ml-containers/pull/77 - "12.9.0": "12.9.0-ubuntu24.04-20260305112552" - "13.0.2": "13.0.2-ubuntu24.04-20260305112552" + # Image training_diag from PR https://github.com/nebius/ml-containers/pull/79 + "12.9.0": "12.9.0-ubuntu24.04-20260324155131" + "13.0.2": "13.0.2-ubuntu24.04-20260324155131" # Optional override in Pyxis ("reg#repo:tag") format. activeCheckImage: "" checks: diff --git a/images/accounting/slurmdbd.dockerfile b/images/accounting/slurmdbd.dockerfile index 4f7fead8f..9e88e8940 100644 --- a/images/accounting/slurmdbd.dockerfile +++ b/images/accounting/slurmdbd.dockerfile @@ -2,8 +2,8 @@ ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS controller_slurmdbd +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS controller_slurmdbd # Expose the port used for accessing slurmdbd EXPOSE 6819 diff --git a/images/controller/slurmctld.dockerfile b/images/controller/slurmctld.dockerfile index 242822652..c1402c277 100644 --- a/images/controller/slurmctld.dockerfile +++ b/images/controller/slurmctld.dockerfile @@ -30,8 +30,8 @@ RUN --mount=type=cache,target=/root/.cache/go-build \ GOOS=$GOOS CGO_ENABLED=$CGO_ENABLED GO_LDFLAGS=$GO_LDFLAGS \ go build -v -o power-manager ./cmd/powermanager -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS controller_slurmctld +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS controller_slurmctld COPY ansible/sssd.yml /opt/ansible/sssd.yml COPY ansible/roles/sssd /opt/ansible/roles/sssd diff --git a/images/jail/jail.dockerfile b/images/jail/jail.dockerfile index b804b9352..ecec80a78 100644 --- a/images/jail/jail.dockerfile +++ b/images/jail/jail.dockerfile @@ -2,8 +2,8 @@ ARG CUDA_VERSION ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/77 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm_training_diag:slurm${SLURM_VERSION}-cuda${CUDA_VERSION}-ubuntu24.04-20260305113511 AS jail +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm_training_diag:slurm${SLURM_VERSION}-cuda${CUDA_VERSION}-ubuntu24.04-20260324162019 AS jail # Create directory for pivoting host's root RUN mkdir -m 555 /mnt/host diff --git a/images/k8s_check_job/k8s_check_job.dockerfile b/images/k8s_check_job/k8s_check_job.dockerfile index b8e941920..4ddf08bb8 100644 --- a/images/k8s_check_job/k8s_check_job.dockerfile +++ b/images/k8s_check_job/k8s_check_job.dockerfile @@ -1,7 +1,7 @@ # syntax=docker.io/docker/dockerfile-upstream:1.20.0 -# https://github.com/nebius/ml-containers/pull/67 -FROM cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260220113744 AS k8s_check_job +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260324150527 AS k8s_check_job # Install common packages RUN apt update && \ diff --git a/images/login/sshd.dockerfile b/images/login/sshd.dockerfile index ff623493f..8378fca58 100644 --- a/images/login/sshd.dockerfile +++ b/images/login/sshd.dockerfile @@ -2,8 +2,8 @@ ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS login_sshd +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS login_sshd # Install OpenSSH server # Create root .ssh directory diff --git a/images/munge/munge.dockerfile b/images/munge/munge.dockerfile index 1e4a3b1f6..de6b6b77f 100644 --- a/images/munge/munge.dockerfile +++ b/images/munge/munge.dockerfile @@ -1,7 +1,7 @@ # syntax=docker.io/docker/dockerfile-upstream:1.20.0 -# https://github.com/nebius/ml-containers/pull/67 -FROM cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260220113744 AS munge +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/neubuntu:noble-20260324150527 AS munge RUN apt-get update && \ apt -y install \ diff --git a/images/restd/slurmrestd.dockerfile b/images/restd/slurmrestd.dockerfile index f8e1b06d4..5deb0508a 100644 --- a/images/restd/slurmrestd.dockerfile +++ b/images/restd/slurmrestd.dockerfile @@ -2,8 +2,8 @@ ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS slurmrestd +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS slurmrestd # Expose the port used for accessing slurmrestd EXPOSE 6820 diff --git a/images/sansible/sansible.dockerfile b/images/sansible/sansible.dockerfile index 523300fee..49d708308 100644 --- a/images/sansible/sansible.dockerfile +++ b/images/sansible/sansible.dockerfile @@ -1,7 +1,7 @@ # syntax=docker.io/docker/dockerfile-upstream:1.20.0 -# https://github.com/nebius/ml-containers/pull/77 -FROM cr.eu-north1.nebius.cloud/ml-containers/ansible_roles:noble-20260305113500 AS sansible +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/ansible_roles:noble-20260324152306 AS sansible # Install common packages RUN apt update && \ diff --git a/images/slurm_check_job/slurm_check_job.dockerfile b/images/slurm_check_job/slurm_check_job.dockerfile index 873704671..19091321e 100644 --- a/images/slurm_check_job/slurm_check_job.dockerfile +++ b/images/slurm_check_job/slurm_check_job.dockerfile @@ -2,8 +2,8 @@ ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS slurm_check_job +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS slurm_check_job # Install slurm сhroot plugin COPY images/common/chroot-plugin/chroot.c /usr/src/chroot-plugin/ diff --git a/images/worker/slurmd.dockerfile b/images/worker/slurmd.dockerfile index 73023bca4..cfd9fd02a 100644 --- a/images/worker/slurmd.dockerfile +++ b/images/worker/slurmd.dockerfile @@ -2,8 +2,8 @@ ARG SLURM_VERSION -# https://github.com/nebius/ml-containers/pull/73 -FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260225115852 AS worker_slurmd +# https://github.com/nebius/ml-containers/pull/79 +FROM cr.eu-north1.nebius.cloud/ml-containers/slurm:${SLURM_VERSION}-20260324153054 AS worker_slurmd # Install useful packages RUN apt-get update && \