Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions accelerator/roles/intel/tasks/install_ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
- name: Install drivers on Gaudi nodes
when: node_has_accelerator
block:
- name: make sure required kernel command line are set

Check failure on line 22 in accelerator/roles/intel/tasks/install_ubuntu.yml

View workflow job for this annotation

GitHub Actions / Ansible Lint

name[casing]

All names should start with an uppercase letter.
ansible.builtin.include_tasks: verify_kernel_boot_cmdline.yml

- name: Gather package facts
ansible.builtin.package_facts:
manager: auto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@

- name: Create gaudi-network.sh
when: not check_scale_up_script.stat.exists
ansible.builtin.blockinfile:
path: "{{ intel_scale_up_ports_script_path }}"
create: true
ansible.builtin.copy:
dest: "{{ intel_scale_up_ports_script_path }}"
mode: "{{ file_permissions }}"
block: |
content: |
#!/bin/bash
EXT_PORTS="24"
RETRIES=10
Expand Down
56 changes: 56 additions & 0 deletions accelerator/roles/intel/tasks/verify_kernel_boot_cmdline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2025 Intel Corporation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
- name: Read current GRUB_CMDLINE_LINUX
ansible.builtin.command: grep '^GRUB_CMDLINE_LINUX=' /etc/default/grub
register: grub_cmdline
changed_when: false

- name: Ensure iommu=pt is present
ansible.builtin.lineinfile:
path: /etc/default/grub
regexp: '^GRUB_CMDLINE_LINUX="(.*)"'
line: 'GRUB_CMDLINE_LINUX="\1 iommu=pt"'
backrefs: true
when: '"iommu=pt" not in grub_cmdline.stdout'

- name: Ensure intel_iommu=on is present
ansible.builtin.lineinfile:
path: /etc/default/grub
regexp: '^GRUB_CMDLINE_LINUX="(.*)"'
line: 'GRUB_CMDLINE_LINUX="\1 intel_iommu=on"'
backrefs: true
when: '"intel_iommu=on" not in grub_cmdline.stdout'

- name: Update GRUB configuration
ansible.builtin.command: update-grub
when: '"iommu=pt" not in grub_cmdline.stdout or "intel_iommu=on" not in grub_cmdline.stdout'
changed_when: false

- name: Prompt for reboot
ansible.builtin.pause:
seconds: "{{ warning_wait_time }}"
prompt: "{{ reboot_warning_msg }}"
when: '"iommu=pt" not in grub_cmdline.stdout or "intel_iommu=on" not in grub_cmdline.stdout'

- name: Initiate reboot
when: '"iommu=pt" not in grub_cmdline.stdout or "intel_iommu=on" not in grub_cmdline.stdout'
block:
- name: Rebooting node (This task will take some time)
ansible.builtin.reboot:
reboot_timeout: 600
rescue:
- name: Failed to reboot node
ansible.builtin.fail:
msg: "{{ reboot_fail_msg }}"
4 changes: 4 additions & 0 deletions accelerator/roles/intel/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ intel_gaudi_kernel_module_to_load:
- habanalabs
- habanalabs_cn
- habanalabs_en
- habanalabs_compat

# TODO: move to a central config file
intel_habana_packages:
Expand Down Expand Up @@ -70,3 +71,6 @@ intel_scale_up_ports_service_name: "gaudi-network.service"
file_permissions: "0755"
svc_file_permissions: "0644"
gaudi3_pci_vendor_device_class: "1da3:1060:1200"
warning_wait_time: 30 # Time to wait for user input
reboot_warning_msg: "Changes have been made to the GRUB configuration. Do you want to reboot the system now? Press 'Enter' to continue or 'Ctrl+C' to abort."
reboot_fail_msg: "Failed to reboot the node. Please check the system manually."
2 changes: 1 addition & 1 deletion accelerator/tests/test_vars/test_Gaudi_vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ oim_dir: "../"
Gaudi_validation_script_path: test_Gaudi_validation.yml
inventory: ../inventory.ini

Gaudi_Default_version: "1.19.2"
Gaudi_Default_version: "1.21.1"

version_pass: 'Gaudi driver version installed on the nodes matched successfully with the default version'
version_fail: 'Gaudi driver version installed on the nodes does not matched with the default version'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Use the local repository feature to create a customized set of local repositorie
{"name": "telemetry"},
{"name": "ucx", "version": "1.15.0"},
{"name": "openmpi", "version": "4.1.6"},
{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},
{"name": "csi_driver_powerscale", "version":"v2.13.0"}
],
"bcm_roce": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Input parameters for Local Repositories
{"name": "telemetry"},
{"name": "ucx", "version": "1.15.0"},
{"name": "openmpi", "version": "4.1.6"},
{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},
{"name": "csi_driver_powerscale", "version":"v2.13.0"}
{"name": "intel_benchmarks", "version": "2024.1"},
{"name": "amd_benchmarks"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Configure specific local repositories

::

{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},

* Add the following line below the ``softwares`` section:

Expand Down
2 changes: 1 addition & 1 deletion docs/source/OmniaInstallGuide/samplefiles.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ software_config.json for Ubuntu
{"name": "telemetry"},
{"name": "ucx", "version": "1.15.0"},
{"name": "openmpi", "version": "4.1.6"},
{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},
{"name": "csi_driver_powerscale", "version":"v2.13.0"}
],

Expand Down
2 changes: 1 addition & 1 deletion docs/source/Overview/newfeatures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ New Features
+--------------------------+-----------------------------------+-------------------------------+
| Kubespray | 2.27 | 2.25 |
+--------------------------+-----------------------------------+-------------------------------+
| Intel Gaudi driver | 1.19.2 | 1.19.1 |
| Intel Gaudi driver | 1.21.1 | 1.19.2 |
+--------------------------+-----------------------------------+-------------------------------+
| CSI PowerScale driver | 2.13.0 | 2.11.0 |
+--------------------------+-----------------------------------+-------------------------------+
Expand Down
6 changes: 3 additions & 3 deletions docs/source/Tables/omnia_installed_software.csv
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ beegfs,BeeGFS (formerly known as FhGFS) END USER LICENSE,Public repository for t
beegfs on ubuntu 24.04,BeeGFS (formerly known as FhGFS) END USER LICENSE,Public repository for the BeeGFS Parallel File System,7.4.5,,,,
csi powerscale driver ,Apache-2.0 license,CSI Driver for Dell PowerScale,2.13.0,,,,
CUDA toolkit,NVIDIA Software License,"The NVIDIA® CUDA® Toolkit provides a development environment for creating high-performance, GPU-accelerated applications.",12.8,,,,
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.19.2,,,,
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.21.1,,,,
FreeIPA (RHEL/Rocky),GNU-General Public License v3.0," FreeIPA, an integrated security information management solution",4.9.11,,,,
NVIDIA device plugin,Apache License 2.0,NVIDIA device plugin for Kubernetes,0.14.4,,,,
rocm device plugin,Apache License 2.0,Kubernetes (k8s) device plugin to enable registration of AMD GPU to a container cluster,0.19.0,,,,
xilinx-device-plugin,Apache License 2.0,The AMD-Xilinx device plugin for Kubernetes is a Daemonset deployed on the Kubernetes(k8s) cluster,1.2.0,,,,
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.19.2,,,,
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.21.1,,,,
Jupyterhub,BSD-3-Clause license,Multi-user server for Jupyter notebooks,3.2.0,,,,
Kserve,Apache License 2.0,Standardized Serverless ML Inference Platform on Kubernetes,0.13.0,,,,
kubeflow,Apache-2.0,A repository for Kustomize manifests,1.9.1,,,,
Expand Down Expand Up @@ -141,7 +141,7 @@ Tensorflow Nvidia,Apache-2.0 license,An Open Source Machine Learning Framework f
Kustomize (RHEL/Rocky 8.8),Apache License 2.0,Customization of kubernetes YAML configurations,5.0.3,,,,
Kustomize (ubuntu 22.04/24.04),Apache License 2.0,Customization of kubernetes YAML configurations,5.4.3,,,,
nfs-subdir-external-provisioner,Apache License 2.0,Dynamic sub-dir volume provisioner on a remote NFS server.,4.0.18,,,,
habana-container-runtime,Apache License 2.0,Habana container runtime,1.19.2,,,,
habana-container-runtime,Apache License 2.0,Habana container runtime,1.21.1,,,,
nvidia-container-toolkit,Apache License 2.0,NVIDIA container runtime library,1.17.5,,,,
helm-charts,Apache-2.0 license,The source for Dell Helm charts.,csi-isilon-2.13.0,,,,
csi-powerscale,Apache-2.0 license,CSI Driver for Dell PowerScale,2.13.0,,,,
Expand Down
2 changes: 1 addition & 1 deletion docs/source/Tables/software_matrix_rhel_rocky.csv
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ FreeIPA,GNU-General Public License v3.0," FreeIPA, an integrated security infor
NVIDIA device plugin,Apache License 2.0,NVIDIA device plugin for Kubernetes,0.14.4
rocm device plugin,Apache License 2.0,Kubernetes (k8s) device plugin to enable registration of AMD GPU to a container cluster,0.19.0
xilinx-device-plugin,Apache License 2.0,The AMD-Xilinx device plugin for Kubernetes is a Daemonset deployed on the Kubernetes(k8s) cluster,1.2.0
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.19.2
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.21.1
Jupyterhub,BSD-3-Clause license,Multi-user server for Jupyter notebooks,3.2.0
Kserve,Apache License 2.0,Standardized Serverless ML Inference Platform on Kubernetes,0.13.0
kubeflow,Apache-2.0,A repository for Kustomize manifests,1.9.1
Expand Down
8 changes: 4 additions & 4 deletions docs/source/Tables/software_matrix_ubuntu_22.csv
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ rccl,MIT License,The ROCm Communication Collectives Library (RCCL) is a stand-al
beegfs,BeeGFS (formerly known as FhGFS) END USER LICENSE,Public repository for the BeeGFS Parallel File System,7.4.5
beeGFS-Client,GPLv2,"BeeGFS is a parallel file system, developed and optimized for high-performance computing.",7.4.5
CUDA toolkit,NVIDIA Software License,"The NVIDIA® CUDA® Toolkit provides a development environment for creating high-performance, GPU-accelerated applications.",12.8
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.19.2
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.21.1
NVIDIA device plugin,Apache License 2.0,NVIDIA device plugin for Kubernetes,0.14.4
rocm device plugin,Apache License 2.0,Kubernetes (k8s) device plugin to enable registration of AMD GPU to a container cluster,0.19.0
xilinx-device-plugin,Apache License 2.0,The AMD-Xilinx device plugin for Kubernetes is a Daemonset deployed on the Kubernetes(k8s) cluster,1.2.0
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.19.2
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.21.1
Jupyterhub,BSD-3-Clause license,Multi-user server for Jupyter notebooks,3.2.0
Kserve,Apache License 2.0,Standardized Serverless ML Inference Platform on Kubernetes,0.13.0
kubeflow,Apache-2.0,A repository for Kustomize manifests,1.9.1
Expand Down Expand Up @@ -109,8 +109,8 @@ Tensorflow AMD,Apache-2.0 license,An Open Source Machine Learning Framework for
Tensorflow NVIDIA,Apache-2.0 license,An Open Source Machine Learning Framework for Everyone,23.12-tf2-py3
Kustomize,Apache License 2.0,Customization of kubernetes YAML configurations,5.4.3
nfs-subdir-external-provisioner,Apache License 2.0,Dynamic sub-dir volume provisioner on a remote NFS server.,4.0.18
habana-container-runtime,Apache License 2.0,Habana container runtime,1.19.2
hccl,Habana Outbound Software License Agreement,The Habana Collective Communications Library (HCCL) is Intel® Gaudi®’s emulation layer of the NVIDIA Collective Communication Library (NCCL) and is included in the Intel Gaudi software suite.,1.19.2
habana-container-runtime,Apache License 2.0,Habana container runtime,1.21.1
hccl,Habana Outbound Software License Agreement,The Habana Collective Communications Library (HCCL) is Intel® Gaudi®’s emulation layer of the NVIDIA Collective Communication Library (NCCL) and is included in the Intel Gaudi software suite.,1.21.1
nvidia-container-toolkit,Apache License 2.0,NVIDIA container runtime library,1.17.5
helm-charts,Apache-2.0 license,The source for Dell Helm charts.,csi-isilon-2.13.0
csi-powerscale,Apache-2.0 license,CSI Driver for Dell PowerScale,2.13.0
Expand Down
8 changes: 4 additions & 4 deletions docs/source/Tables/software_matrix_ubuntu_24.csv
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ rccl,MIT License,The ROCm Communication Collectives Library (RCCL) is a stand-al
beegfs,BeeGFS (formerly known as FhGFS) END USER LICENSE,Public repository for the BeeGFS Parallel File System,7.4.5
Beegfs-Client,GPLv2,"BeeGFS is a parallel file system, developed and optimized for high-performance computing.",7.4.5
CUDA toolkit,NVIDIA Software License,"The NVIDIA® CUDA® Toolkit provides a development environment for creating high-performance, GPU-accelerated applications.",12.8
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.19.2
Intel Gaudi3 driver,MIT license,Intel Gaudi3 drivers,1.21.1
NVIDIA device plugin,Apache License 2.0,NVIDIA device plugin for Kubernetes,0.14.4
rocm device plugin,Apache License 2.0,Kubernetes (k8s) device plugin to enable registration of AMD GPU to a container cluster,0.19.0
xilinx-device-plugin,Apache License 2.0,The AMD-Xilinx device plugin for Kubernetes is a Daemonset deployed on the Kubernetes(k8s) cluster,1.2.0
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.19.2
habanalabs-k8s-device-plugin,Apache License 2.0,HABANA device plugin for Kubernetes,1.21.1
Jupyterhub,BSD-3-Clause license,Multi-user server for Jupyter notebooks,3.2.0
Kserve,Apache License 2.0,Standardized Serverless ML Inference Platform on Kubernetes,0.13.0
kubeflow,Apache-2.0,A repository for Kustomize manifests,1.9.1
Expand Down Expand Up @@ -109,8 +109,8 @@ Tensorflow AMD,Apache-2.0 license,An Open Source Machine Learning Framework for
Tensorflow NVIDIA,Apache-2.0 license,An Open Source Machine Learning Framework for Everyone,23.12-tf2-py3
Kustomize,Apache License 2.0,Customization of kubernetes YAML configurations,5.4.3
nfs-subdir-external-provisioner,Apache License 2.0,Dynamic sub-dir volume provisioner on a remote NFS server.,4.0.18
habana-container-runtime,Apache License 2.0,Habana container runtime,1.19.2
hccl,Habana Outbound Software License Agreement,The Habana Collective Communications Library (HCCL) is Intel® Gaudi®’s emulation layer of the NVIDIA Collective Communication Library (NCCL) and is included in the Intel Gaudi software suite.,1.19.2
habana-container-runtime,Apache License 2.0,Habana container runtime,1.21.1
hccl,Habana Outbound Software License Agreement,The Habana Collective Communications Library (HCCL) is Intel® Gaudi®’s emulation layer of the NVIDIA Collective Communication Library (NCCL) and is included in the Intel Gaudi software suite.,1.21.1
nvidia-container-toolkit,Apache License 2.0,NVIDIA container runtime library,1.17.5
helm-charts,Apache-2.0 license,The source for Dell Helm charts.,csi-isilon-2.13.0
csi-powerscale driver,Apache-2.0 license,CSI Driver for Dell PowerScale,2.13.0
Expand Down
12 changes: 6 additions & 6 deletions examples/ai_examples/intel/deepSpeed/ds_configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec:
template:
spec:
containers:
- image: vault.habana.ai/gaudi-docker/1.19.2/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
- image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
name: gaudi-llm-ds-ft-launcher
env:
- name: HF_HOME
Expand Down Expand Up @@ -66,11 +66,11 @@ spec:

git clone https://github.com/huggingface/optimum-habana /optimum-habana;
cd /optimum-habana;
git checkout v1.15.0;
git checkout v1.17.0;
sed -i '194s|deepspeed|deepspeed --force_multi|' optimum/habana/distributed/distributed_runner.py;
retry_until_success pip install .;
retry_until_success pip install -r examples/language-modeling/requirements.txt;
retry_until_success pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0;
retry_until_success pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0;

mpirun --npernode 1 \
--tag-output \
Expand Down Expand Up @@ -108,7 +108,7 @@ spec:

git clone https://github.com/huggingface/optimum-habana /optimum-habana
cd /optimum-habana
git checkout v1.15.0
git checkout v1.17.0
hf_home_var="os.environ[\"HF_HOME\"] = \"${HF_HOME}\""
token_var="os.environ[\"HUGGING_FACE_HUB_TOKEN\"] = \"${HUGGING_FACE_HUB_TOKEN}\""
https_var="os.environ[\"https_proxy\"] = \"${https_proxy}\""
Expand All @@ -121,7 +121,7 @@ spec:
sed -i "60i\\${no_proxy_var}" examples/language-modeling/run_lora_clm.py
retry_until_success pip install .
retry_until_success pip install -r examples/language-modeling/requirements.txt
retry_until_success pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0
retry_until_success pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
';

eval $(ssh-agent);
Expand Down Expand Up @@ -172,7 +172,7 @@ spec:
spec:
hostIPC: true
containers:
- image: vault.habana.ai/gaudi-docker/1.19.2/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
- image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
name: gaudi-llm-ds-ft-worker
command: ["/bin/bash", "-c"]
args:
Expand Down
4 changes: 2 additions & 2 deletions examples/ai_examples/intel/vllm/vllm_configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spec:
app: vllm-llama-app
spec:
containers:
- image: vault.habana.ai/gaudi-docker/1.19.2/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
- image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu24.04/habanalabs/pytorch-installer-2.6.0:latest
name: vllm-llama-openai
imagePullPolicy: Always
workingDir: /root
Expand Down Expand Up @@ -63,7 +63,7 @@ spec:
- "/bin/sh"
- "-c"
- |
git clone -b v0.6.4.post2+Gaudi-1.19.2 https://github.com/HabanaAI/vllm-fork.git
git clone -b v0.7.2+Gaudi-1.21.0 https://github.com/HabanaAI/vllm-fork.git
cd vllm-fork
pip install -v -r requirements-hpu.txt
export VLLM_TARGET_DEVICE=hpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"softwares": [
{"name": "amdgpu", "version": "6.3.1"},
{"name": "bcm_roce", "version": "232.1.133.2"},
{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},
{"name": "cuda", "version": "12.8.0"},
{"name": "ofed", "version": "24.01-0.3.3.1"},
{"name": "openldap"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"softwares": [
{"name": "amdgpu", "version": "6.3.1"},
{"name": "bcm_roce", "version": "232.1.133.2"},
{"name": "intelgaudi", "version": "1.19.2-32"},
{"name": "intelgaudi", "version": "1.21.1-16"},
{"name": "cuda", "version": "12.8.0"},
{"name": "ofed", "version": "24.07-0.6.1.0"},
{"name": "openldap"},
Expand Down
2 changes: 1 addition & 1 deletion examples/ubuntu_software_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
{"name": "jupyter"},
{"name": "pytorch"},
{"name": "tensorflow"},
{"name": "intelgaudi", "version": "1.19.2-32"}
{"name": "intelgaudi", "version": "1.21.1-16"}
],

"bcm_roce": [
Expand Down
2 changes: 1 addition & 1 deletion input/config/ubuntu/22.04/k8s.json
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
},
{
"package": "vault.habana.ai/docker-k8s-device-plugin/docker-k8s-device-plugin",
"tag": "1.19.2-32",
"tag": "1.21.1-16",
"type": "image"
},
{
Expand Down
2 changes: 1 addition & 1 deletion input/config/ubuntu/22.04/pytorch.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

"cluster": [
{
"package": "vault.habana.ai/gaudi-docker/1.19.2/ubuntu22.04/habanalabs/pytorch-installer-2.5.1",
"package": "vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0",
"tag": "latest",
"type": "image"
}
Expand Down
2 changes: 1 addition & 1 deletion input/config/ubuntu/24.04/k8s.json
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@
},
{
"package": "vault.habana.ai/docker-k8s-device-plugin/docker-k8s-device-plugin",
"tag": "1.19.2-32",
"tag": "1.21.1-16",
"type": "image"
},
{
Expand Down
Loading
Loading