diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 33a25d9b4..f41c17f70 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -1,6 +1,6 @@ cuda_distro: "rhel{{ ansible_distribution_major_version }}" cuda_repo: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_distro }}/x86_64/cuda-{{ cuda_distro }}.repo" -cuda_driver_stream: default +cuda_driver_stream: open-dkms cuda_package_version: 'latest' cuda_packages: - "cuda{{ ('-' + cuda_package_version) if cuda_package_version != 'latest' else '' }}" diff --git a/ansible/roles/cuda/tasks/main.yml b/ansible/roles/cuda/tasks/main.yml index 22f8e9e8e..3dbc45268 100644 --- a/ansible/roles/cuda/tasks/main.yml +++ b/ansible/roles/cuda/tasks/main.yml @@ -25,7 +25,7 @@ register: _cuda_driver_module_enabled - name: Enable nvidia driver module - ansible.builtin.command: "dnf module enable -y nvidia-driver:open-dkms" + ansible.builtin.command: "dnf module enable -y nvidia-driver:{{ cuda_driver_stream }}" register: _cuda_driver_module_enable when: "'No matching Modules to list' in _cuda_driver_module_enabled.stderr" changed_when: "'Nothing to do' not in _cuda_driver_module_enable.stdout" diff --git a/packer/openhpc_extravars.yml b/packer/openhpc_extravars.yml index 66f668649..7a78e3e51 100644 --- a/packer/openhpc_extravars.yml +++ b/packer/openhpc_extravars.yml @@ -1 +1,2 @@ workaround_ansible_issue_61497: yes # extravars files can't be empty +cuda_driver_stream: 560-open # pinned to fix incompatibility cuda latest (12.6.2) being out of date for nvidia-driver latest (565.57.01), remove when fixed