Skip to content

Commit 2738190

Browse files
Merge pull request #116 from oci-hpc/2.10.2_peermem_fix
2.10.2 peermem fix
2 parents cffd0cd + 9be56ef commit 2738190

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

playbooks/roles/nvidia_peermem/tasks/common.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,27 @@
33
shell:
44
cmd: "curl -sH \"Authorization: Bearer Oracle\" -L http://169.254.169.254/opc/v2/instance/ | jq .shape | grep GPU"
55
warn: false
6-
register: shape
6+
register: shape_gpu
77
failed_when: false
88

9-
109
- name: Check if nvidia drivers are installed
1110
shell: cat /sys/module/nvidia/version | wc -l
1211
register: nvidia
13-
when: shape.stdout != ""
14-
12+
when: shape_gpu.stdout != ""
1513

1614
- name: Check if nvidia_peermem module is loaded
1715
shell: lsmod | grep nvidia_peermem | wc -l
1816
register: result
19-
when: shape.stdout != "" and nvidia.stdout == '1'
17+
when: shape_gpu.stdout != "" and nvidia.stdout == '1'
2018

19+
- name: Check ofed version
20+
shell:
21+
cmd: |
22+
/usr/bin/ofed_info |grep MLNX_OFED_LINUX|grep -v rpm|awk -F "(" '{print $2}'|cut -c 6-|awk -F "-" '{print $1}'
23+
register: ofed_version_local
24+
when: shape_gpu.stdout != "" and nvidia.stdout == '1'
2125

2226
- name: Load nvidia_peermem module
2327
become: true
2428
shell: modprobe nvidia_peermem
25-
when: shape.stdout != "" and nvidia.stdout == '1' and result.stdout != '3'
29+
when: shape_gpu.stdout != "" and nvidia.stdout == '1' and result.stdout != '3' and ofed_version_local.stdout|int >= '5.1'

0 commit comments

Comments
 (0)