File tree Expand file tree Collapse file tree 4 files changed +24
-1
lines changed
cookbooks/aws-parallelcluster-platform
resources/nvidia_nvlsm/partial Expand file tree Collapse file tree 4 files changed +24
-1
lines changed Original file line number Diff line number Diff line change 3232 CODE
3333end
3434include_recipe "aws-parallelcluster-platform::directories"
35+ nvidia_nvlsm 'Install Nvidia NVLink Subnet Manager'
3536install_packages 'Install OS and extra packages'
3637bash "Check kernel after extra packages" do
3738 code <<-CODE
Original file line number Diff line number Diff line change 3838end
3939
4040action :install_nvlsm_dependencies do
41- package nvidia_nvlsm_dependencies do
41+ bash "Install nvlsm dependencies" do
42+ user 'root'
43+ code <<-CODE
44+ set -ex
45+ #{ nvidia_nvlsm_install_dependencies_commands }
46+ CODE
4247 retries 3
4348 retry_delay 5
4449 end
50+ # package nvidia_nvlsm_dependencies do
51+ # options '--verbose'
52+ # retries 3
53+ # retry_delay 5
54+ # end
4555
4656 # Make sure kernel module for Infiniband is loaded at instance boot time
4757 cookbook_file 'infiniband.conf' do
@@ -103,6 +113,10 @@ def nvidia_nvlsm_install_preconditions_commands
103113 # OS dependent
104114end
105115
116+ def nvidia_nvlsm_install_dependencies_commands
117+ # OS dependent
118+ end
119+
106120def nvidia_nvlsm_dependencies
107121 # OS dependent
108122end
Original file line number Diff line number Diff line change @@ -40,3 +40,7 @@ def nvidia_nvlsm_dependencies
4040def nvidia_nvlsm_install_preconditions_commands
4141 "uname -a ; apt-cache policy ; apt-mark showhold"
4242end
43+
44+ def nvidia_nvlsm_install_dependencies_commands
45+ "apt -o Debug::pkgProblemResolver=1 install -y infiniband-diags ibutils"
46+ end
Original file line number Diff line number Diff line change @@ -39,3 +39,7 @@ def nvidia_nvlsm_dependencies
3939def nvidia_nvlsm_install_preconditions_commands
4040 "uname -a ; yum repolist all ; yum versionlock list ; yum info kernel-modules-extra-aws infiniband-diags libibumad ; yum provides kernel-modules-extra-aws infiniband-diags libibumad"
4141end
42+
43+ def nvidia_nvlsm_install_dependencies_commands
44+ "yum install -yv infiniband-diags libibumad"
45+ end
You can’t perform that action at this time.
0 commit comments