|
3 | 3 | nvidia_version = "1.2.3" |
4 | 4 | SOURCE_DIR = 'SOURCE_DIR'.freeze |
5 | 5 | nvidia_imex_shared_dir = "SHARED_DIR/nvidia-imex" |
| 6 | +imex_service_file = "/etc/systemd/system/nvidia-imex.service" |
6 | 7 | imex_binary = '/usr/bin/nvidia-imex' |
7 | 8 | imex_ctl_binary = '/usr/bin/nvidia-imex-ctl' |
8 | 9 | queue_name = 'queue-name' |
@@ -296,118 +297,152 @@ def self.configure(chef_run) |
296 | 297 |
|
297 | 298 | describe 'nvidia_imex:configure' do |
298 | 299 | [%w(false), [false], %w(no), %w(true), [true], %w(yes)].each do |force_indicator| |
299 | | - for_all_oses do |platform, version| |
300 | | - context "on #{platform}#{version} with force_configuration #{force_indicator}" do |
301 | | - context "when nvidia-imex binary is not installed" do |
302 | | - cached(:chef_run) do |
303 | | - stubs_for_resource('nvidia_imex') do |res| |
304 | | - allow(res).to receive(:imex_installed?).and_return(false) |
305 | | - end |
306 | | - runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
307 | | - ConvergeNvidiaImex.configure(runner) |
308 | | - end |
309 | | - cached(:node) { chef_run.node } |
310 | | - |
311 | | - it 'does not configure nvidia-imex' do |
312 | | - is_expected.not_to configure_nvidia_imex('nvidia-imex') |
313 | | - end |
314 | | - end |
315 | | - |
316 | | - %w(HeadNode LoginNode ComputeFleet).each do |node_type| |
317 | | - context "when get_nvswitch_count > 1 on #{node_type} node" do |
318 | | - cached(:chef_run) do |
319 | | - stubs_for_provider('nvidia_imex[configure]') do |pro| |
320 | | - allow(pro).to receive(:imex_installed?).and_return(true) |
321 | | - allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
322 | | - allow(pro).to receive(:get_nvswitch_count).with('test').and_return(4) |
323 | | - allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 300 | + [true, false].each do |shared_dir_exists| |
| 301 | + [true, false].each do |imex_service_file_exists| |
| 302 | + for_all_oses do |platform, version| |
| 303 | + context "on #{platform}#{version} with force_configuration #{force_indicator} with shared_dir existence #{shared_dir_exists}" do |
| 304 | + context "when nvidia-imex binary is not installed" do |
| 305 | + cached(:chef_run) do |
| 306 | + stubs_for_resource('nvidia_imex') do |res| |
| 307 | + allow(res).to receive(:imex_installed?).and_return(false) |
| 308 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 309 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 310 | + end |
| 311 | + runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 312 | + ConvergeNvidiaImex.configure(runner) |
324 | 313 | end |
325 | | - runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
326 | | - end |
327 | | - cached(:node) { chef_run.node } |
| 314 | + cached(:node) { chef_run.node } |
328 | 315 |
|
329 | | - before do |
330 | | - chef_run.node.override['cluster']['region'] = 'aws_region' |
331 | | - chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
332 | | - chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir |
333 | | - chef_run.node.override['cluster']['node_type'] = node_type |
334 | | - chef_run.node.override['cluster']['scheduler_queue_name'] = queue_name |
335 | | - chef_run.node.override['cluster']['scheduler_compute_resource_name'] = compute_resource_name |
336 | | - |
337 | | - ConvergeNvidiaImex.configure(chef_run) |
338 | | - end |
339 | | - |
340 | | - if (platform == 'amazon' && version == '2') || %w(HeadNode LoginNode).include?(node_type) |
341 | 316 | it 'does not configure nvidia-imex' do |
342 | | - is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
343 | | - .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
344 | | - .with(user: 'root') |
345 | | - .with(group: 'root') |
346 | | - .with(mode: '0755') |
347 | | - is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
348 | | - .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
349 | | - .with(user: 'root') |
350 | | - .with(group: 'root') |
351 | | - .with(mode: '0755') |
352 | | - .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
353 | | - is_expected.not_to create_template("/etc/systemd/system/nvidia-imex.service") |
354 | | - .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
355 | | - .with(user: 'root') |
356 | | - .with(group: 'root') |
357 | | - .with(mode: '0644') |
358 | | - .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
359 | | - is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
360 | | - end |
361 | | - else |
362 | | - it 'it starts nvidia-imex service' do |
363 | | - is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
364 | | - .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
365 | | - .with(user: 'root') |
366 | | - .with(group: 'root') |
367 | | - .with(mode: '0755') |
368 | | - is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
369 | | - .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
370 | | - .with(user: 'root') |
371 | | - .with(group: 'root') |
372 | | - .with(mode: '0755') |
373 | | - .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
374 | | - is_expected.to create_template("/etc/systemd/system/nvidia-imex.service") |
375 | | - .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
376 | | - .with(user: 'root') |
377 | | - .with(group: 'root') |
378 | | - .with(mode: '0644') |
379 | | - .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
380 | | - is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 317 | + is_expected.not_to configure_nvidia_imex('nvidia-imex') |
381 | 318 | end |
382 | 319 | end |
383 | | - end |
384 | | - end |
385 | 320 |
|
386 | | - context "when get_nvswitch_count <= 1" do |
387 | | - cached(:chef_run) do |
388 | | - stubs_for_provider('nvidia_imex[configure]') do |pro| |
389 | | - allow(pro).to receive(:imex_installed?).and_return(true) |
390 | | - allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
391 | | - allow(pro).to receive(:get_nvswitch_count).with('test').and_return(1) |
392 | | - allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 321 | + %w(HeadNode LoginNode ComputeFleet).each do |node_type| |
| 322 | + context "when get_nvswitch_count > 1 on #{node_type} node" do |
| 323 | + cached(:chef_run) do |
| 324 | + stubs_for_provider('nvidia_imex[configure]') do |pro| |
| 325 | + allow(pro).to receive(:imex_installed?).and_return(true) |
| 326 | + allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
| 327 | + allow(pro).to receive(:get_nvswitch_count).with('test').and_return(4) |
| 328 | + allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 329 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 330 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 331 | + end |
| 332 | + runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 333 | + end |
| 334 | + cached(:node) { chef_run.node } |
| 335 | + |
| 336 | + before do |
| 337 | + chef_run.node.override['cluster']['region'] = 'aws_region' |
| 338 | + chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
| 339 | + chef_run.node.override['cluster']['nvidia']['imex']['shared_dir'] = nvidia_imex_shared_dir |
| 340 | + chef_run.node.override['cluster']['node_type'] = node_type |
| 341 | + chef_run.node.override['cluster']['scheduler_queue_name'] = queue_name |
| 342 | + chef_run.node.override['cluster']['scheduler_compute_resource_name'] = compute_resource_name |
| 343 | + |
| 344 | + ConvergeNvidiaImex.configure(chef_run) |
| 345 | + end |
| 346 | + |
| 347 | + if (platform == 'amazon' && version == '2') || %w(HeadNode LoginNode).include?(node_type) |
| 348 | + it 'does not configure nvidia-imex' do |
| 349 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 350 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 351 | + .with(user: 'root') |
| 352 | + .with(group: 'root') |
| 353 | + .with(mode: '0755') |
| 354 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 355 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 356 | + .with(user: 'root') |
| 357 | + .with(group: 'root') |
| 358 | + .with(mode: '0755') |
| 359 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 360 | + is_expected.not_to create_template(imex_service_file) |
| 361 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 362 | + .with(user: 'root') |
| 363 | + .with(group: 'root') |
| 364 | + .with(mode: '0644') |
| 365 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 366 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 367 | + end |
| 368 | + else |
| 369 | + it 'it starts nvidia-imex service' do |
| 370 | + if shared_dir_exists |
| 371 | + is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 372 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 373 | + .with(user: 'root') |
| 374 | + .with(group: 'root') |
| 375 | + .with(mode: '0755') |
| 376 | + is_expected.to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 377 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 378 | + .with(user: 'root') |
| 379 | + .with(group: 'root') |
| 380 | + .with(mode: '0755') |
| 381 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 382 | + is_expected.to create_template(imex_service_file) |
| 383 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 384 | + .with(user: 'root') |
| 385 | + .with(group: 'root') |
| 386 | + .with(mode: '0644') |
| 387 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 388 | + else |
| 389 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg") |
| 390 | + .with(source: 'nvidia-imex/nvidia-imex-nodes.erb') |
| 391 | + .with(user: 'root') |
| 392 | + .with(group: 'root') |
| 393 | + .with(mode: '0755') |
| 394 | + is_expected.not_to create_if_missing_template("#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg") |
| 395 | + .with(source: 'nvidia-imex/nvidia-imex-config.erb') |
| 396 | + .with(user: 'root') |
| 397 | + .with(group: 'root') |
| 398 | + .with(mode: '0755') |
| 399 | + .with(variables: { imex_nodes_config_file_path: "#{nvidia_imex_shared_dir}/nodes_config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 400 | + is_expected.not_to create_template(imex_service_file) |
| 401 | + .with(source: 'nvidia-imex/nvidia-imex.service.erb') |
| 402 | + .with(user: 'root') |
| 403 | + .with(group: 'root') |
| 404 | + .with(mode: '0644') |
| 405 | + .with(variables: { imex_main_config_file_path: "#{nvidia_imex_shared_dir}/config_#{queue_name}_#{compute_resource_name}.cfg" }) |
| 406 | + end |
| 407 | + if imex_service_file_exists |
| 408 | + is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 409 | + else |
| 410 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 411 | + end |
| 412 | + end |
| 413 | + end |
| 414 | + end |
393 | 415 | end |
394 | | - runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
395 | | - ConvergeNvidiaImex.configure(runner) |
396 | | - end |
397 | | - cached(:node) { chef_run.node } |
398 | 416 |
|
399 | | - before do |
400 | | - chef_run.node.override['cluster']['region'] = 'aws_region' |
401 | | - chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
402 | | - end |
| 417 | + context "when get_nvswitch_count <= 1" do |
| 418 | + cached(:chef_run) do |
| 419 | + stubs_for_provider('nvidia_imex[configure]') do |pro| |
| 420 | + allow(pro).to receive(:imex_installed?).and_return(true) |
| 421 | + allow(pro).to receive(:get_device_ids).and_return({ 'gb200' => 'test' }) |
| 422 | + allow(pro).to receive(:get_nvswitch_count).with('test').and_return(1) |
| 423 | + allow(pro).to receive(:enable_force_configuration?).and_return(force_indicator) |
| 424 | + allow(Dir).to receive(:exist?).with(nvidia_imex_shared_dir).and_return(shared_dir_exists) |
| 425 | + allow(File).to receive(:exist?).with(imex_service_file).and_return(imex_service_file_exists) |
| 426 | + end |
| 427 | + runner = runner(platform: platform, version: version, step_into: ['nvidia_imex']) |
| 428 | + ConvergeNvidiaImex.configure(runner) |
| 429 | + end |
| 430 | + cached(:node) { chef_run.node } |
403 | 431 |
|
404 | | - if ['true', 'yes', true].include?(force_indicator) |
405 | | - it 'does configure nvidia-imex' do |
406 | | - is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
407 | | - end |
408 | | - else |
409 | | - it 'does not configure nvidia-imex' do |
410 | | - is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 432 | + before do |
| 433 | + chef_run.node.override['cluster']['region'] = 'aws_region' |
| 434 | + chef_run.node.override['cluster']['nvidia']['imex']['force_configuration'] = force_indicator |
| 435 | + end |
| 436 | + |
| 437 | + if ['true', 'yes', true].include?(force_indicator) && imex_service_file_exists |
| 438 | + it 'does configure nvidia-imex' do |
| 439 | + is_expected.to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 440 | + end |
| 441 | + else |
| 442 | + it 'does not configure nvidia-imex' do |
| 443 | + is_expected.not_to start_service('nvidia-imex').with_action(%i(enable start)).with_supports({ status: true }) |
| 444 | + end |
| 445 | + end |
411 | 446 | end |
412 | 447 | end |
413 | 448 | end |
|
0 commit comments