Skip to content

Commit f118e20

Browse files
authored
Merge branch 'main' into filesystems-docs
2 parents f568327 + ba96992 commit f118e20

File tree

4 files changed

+10
-24
lines changed

4 files changed

+10
-24
lines changed

ansible/roles/cuda/defaults/main.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cuda_repo_url: "https://developer.download.nvidia.com/compute/cuda/repos/rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}/cuda-rhel{{ ansible_distribution_major_version }}.repo"
2-
cuda_nvidia_driver_stream: '575-open'
3-
cuda_nvidia_driver_pkg: "nvidia-open-3:575.57.08-1.el{{ ansible_distribution_major_version }}"
4-
cuda_package_version: '12.9.1-1'
2+
cuda_nvidia_driver_stream: '580-open'
3+
cuda_nvidia_driver_pkg: "nvidia-open-3:580.65.06-1.el{{ ansible_distribution_major_version }}"
4+
cuda_package_version: '13.0.0-1'
55
cuda_version_short: "{{ (cuda_package_version | split('.'))[0:2] | join('.') }}" # major.minor
66
cuda_packages:
77
- "cuda-toolkit-{{ cuda_package_version }}"

ansible/roles/hpctests/library/plot_nxnlatbw.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,11 @@ def run_module():
129129
if len(vals) != 4:
130130
print('warning: skipping line %i (%i values)' % (ln, len(vals)))
131131
continue
132-
rankA, rankB, lat, bw = int(vals[0]), int(vals[1]), float(vals[2]), float(vals[3])
132+
try:
133+
rankA, rankB, lat, bw = int(vals[0]), int(vals[1]), float(vals[2]), float(vals[3])
134+
except ValueError:
135+
print('warning: skipping line %i (%s) - parse failure' % (ln, line))
136+
continue
133137
latencies[rankA, rankB] = lat
134138
bandwidths[rankA, rankB] = bw
135139

environments/.caas/ansible.cfg

Lines changed: 0 additions & 19 deletions
This file was deleted.

environments/common/inventory/group_vars/all/openhpc.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ openhpc_config_extra: {}
5454
# additional default slurm.conf parameters for the appliance:
5555
openhpc_config_default:
5656
TaskPlugin: task/cgroup,task/affinity
57-
ReturnToService: 2 # workaround for templating bug TODO: Remove once on stackhpc.openhpc v1.2.0
57+
ProctrackType: proctrack/cgroup
58+
JobAcctGatherType: jobacct_gather/cgroup
5859

5960
# additional default slurm.conf parameters when "rebuild" enabled:
6061
openhpc_config_rebuild:

0 commit comments

Comments
 (0)