File tree Expand file tree Collapse file tree 2 files changed +43
-4
lines changed
cookbooks/aws-parallelcluster-platform
resources/fabric_manager/partial Expand file tree Collapse file tree 2 files changed +43
-4
lines changed Original file line number Diff line number Diff line change @@ -54,10 +54,12 @@ def _nvidia_driver_version
5454
5555# Get number of nv switches
5656def get_nvswitches
57- # A100 (P4) and H100(P5) systems have NVSwitches
57+ # A100 (P4), H100(P5) and B200(P6 ) systems have NVSwitches
5858 # NVSwitch device id is 10de:1af1 for P4 instance
5959 # NVSwitch device id is 10de:22a3 for P5 instance
60- nvswitch_check_p4 = shell_out ( "lspci -d 10de:1af1 | wc -l" )
61- nvswitch_check_p5 = shell_out ( "lspci -d 10de:22a3 | wc -l" )
62- nvswitch_check_p4 . stdout . strip . to_i + nvswitch_check_p5 . stdout . strip . to_i
60+ # NVSwitch device id is 10de:2901 for P6 instance
61+ # We sum the count for all these deviceIds as output of lscpi command will be >0
62+ # for only one device ID based on the instance type
63+ nvswitch_device_ids = [ '10de:1af1' , '10de:22a3' , '10de:2901' ]
64+ nvswitch_device_ids . sum { |id | shell_out ( "lspci -d #{ id } | wc -l" ) . stdout . strip . to_i }
6365end
Original file line number Diff line number Diff line change @@ -260,3 +260,40 @@ def self.configure(chef_run)
260260 end
261261 end
262262end
263+
264+ describe 'fabric_manager:get_nvswitches' do
265+ cached ( :chef_run ) do
266+ ChefSpec ::SoloRunner . new ( step_into : [ 'fabric_manager' ] )
267+ end
268+
269+ let ( :output_of_shell ) { double ( 'shell_out' ) }
270+ cached ( :resource ) do
271+ ConvergeFabricManager . setup ( chef_run )
272+ chef_run . find_resource ( 'fabric_manager' , 'setup' )
273+ end
274+
275+ before do
276+ allow ( resource ) . to receive ( :shell_out ) . and_return ( output_of_shell )
277+ end
278+
279+ context 'when count of NVSwitches > 1' do
280+ it 'correctly counts multiple NVSwitches' do
281+ allow ( output_of_shell ) . to receive ( :stdout ) . and_return ( "2\n " , "0\n " , "0\n " )
282+ expect ( resource . get_nvswitches ) . to eq ( 2 )
283+ end
284+ end
285+
286+ context 'when count of NVSwitches == 1' do
287+ it 'returns zero when no NVSwitches are found' do
288+ allow ( output_of_shell ) . to receive ( :stdout ) . and_return ( "0\n " , "1\n " , "0\n " )
289+ expect ( resource . get_nvswitches ) . to eq ( 1 )
290+ end
291+ end
292+
293+ context 'when count of NVSwitches gives unexpected output' do
294+ it 'handles non-numeric output' do
295+ allow ( output_of_shell ) . to receive ( :stdout ) . and_return ( "error\n " )
296+ expect ( resource . get_nvswitches ) . to eq ( 0 )
297+ end
298+ end
299+ end
You can’t perform that action at this time.
0 commit comments