diff --git a/ansible/roles/cuda/README.md b/ansible/roles/cuda/README.md index be6439cd5..1e74d07f3 100644 --- a/ansible/roles/cuda/README.md +++ b/ansible/roles/cuda/README.md @@ -10,6 +10,6 @@ Requires OFED to be installed to provide required kernel-* packages. - `cuda_repo_url`: Optional. URL of `.repo` file. Default is upstream for appropriate OS/architecture. - `cuda_nvidia_driver_stream`: Optional. Version of `nvidia-driver` stream to enable. This controls whether the open or proprietary drivers are installed and the major version. Changing this once the drivers are installed does not change the version. -- `cuda_packages`: Optional. Default: `['cuda', 'nvidia-gds']`. +- `cuda_packages`: Optional. Default: `['cuda', 'nvidia-gds', 'cmake', 'cuda-toolkit-12-8']`. - `cuda_package_version`: Optional. Default `latest` which will install the latest packages if not installed but won't upgrade already-installed packages. Use `'none'` to skip installing CUDA. - `cuda_persistenced_state`: Optional. State of systemd `nvidia-persistenced` service. Values as [ansible.builtin.systemd:state](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/systemd_module.html#parameter-state). Default `started`. diff --git a/ansible/roles/cuda/defaults/main.yml b/ansible/roles/cuda/defaults/main.yml index 31cfe23d5..fd4bf37c8 100644 --- a/ansible/roles/cuda/defaults/main.yml +++ b/ansible/roles/cuda/defaults/main.yml @@ -1,13 +1,14 @@ cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo" cuda_nvidia_driver_stream: '570-open' cuda_package_version: '12.8.1-1' +cuda_version_short: '12.8' cuda_packages: - "cuda{{ ('-' + cuda_package_version) if cuda_package_version != 'latest' else '' }}" - nvidia-gds -# _cuda_version_tuple: # discovered from installed package e.g. ('12', '1', '0') -cuda_version_short: "{{ _cuda_version_tuple[0] }}.{{ _cuda_version_tuple[1] }}" + - cmake + - cuda-toolkit-12-8 cuda_samples_release_url: "https://github.com/NVIDIA/cuda-samples/archive/refs/tags/v{{ cuda_version_short }}.tar.gz" -cuda_samples_path: "/home/{{ ansible_user }}/cuda_samples" +cuda_samples_path: "/var/lib/{{ ansible_user }}/cuda_samples" cuda_samples_programs: - deviceQuery - bandwidthTest diff --git a/ansible/roles/cuda/tasks/samples.yml b/ansible/roles/cuda/tasks/samples.yml index 679ce5644..38ce3339d 100644 --- a/ansible/roles/cuda/tasks/samples.yml +++ b/ansible/roles/cuda/tasks/samples.yml @@ -1,12 +1,3 @@ -- name: Read CUDA version file - slurp: - src: /usr/local/cuda/version.json - register: _cuda_samples_version - -- name: Set fact for discovered CUDA version - set_fact: - _cuda_version_tuple: "{{ (_cuda_samples_version.content | b64decode | from_json).cuda.version | split('.') }}" # e.g. '12.1.0' - - name: Ensure cuda_samples_path exists file: state: directory