@@ -338,7 +338,11 @@ def run_test_and_summarize_results(
338338
339339 # copy current environment variables
340340 _environ = dict (os .environ )
341+ < << << << HEAD
341342
343+ == == == =
344+
345+ >> >> >> > 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
342346 # modify path
343347 test_shell_path = pytorch_root_dir + "/.ci/pytorch/test.sh"
344348 test_run_test_path = pytorch_root_dir + "/test/run_test.py"
@@ -385,6 +389,13 @@ def run_test_and_summarize_results(
385389 global CONSOLIDATED_LOG_FILE_PATH
386390 CONSOLIDATED_LOG_FILE_PATH = overall_logs_path_current_run + CONSOLIDATED_LOG_FILE_NAME
387391
392+ << < << < < HEAD
393+ == == == =
394+ # Check multi gpu availability if distributed tests are enabled
395+ if ("distributed" in test_config ) or len (distributed_list ) != 0 :
396+ check_num_gpus_for_distributed ()
397+
398+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
388399 # Install test requirements
389400 command = "pip3 install -r requirements.txt && pip3 install -r .ci/docker/requirements-ci.txt"
390401 run_command_and_capture_output (command )
@@ -393,15 +404,23 @@ def run_test_and_summarize_results(
393404 if not priority_tests and not default_list and not distributed_list and not inductor_list :
394405 # run entire tests for default, distributed and inductor workflows → use test.sh
395406 if not test_config :
407+ << << << < HEAD
408+ == == == =
409+ check_num_gpus_for_distributed ()
410+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
396411 # default test process
397412 res_default_all = run_entire_tests ("default" , test_shell_path , overall_logs_path_current_run , test_reports_src )
398413 res_all_tests_dict ["default" ] = res_default_all
399414 # distributed test process
415+ << < << << HEAD
400416 res_distributed_all = {}
401417 if is_multi_gpus_available_for_distributed ():
402418 res_distributed_all = run_entire_tests ("distributed" , test_shell_path , overall_logs_path_current_run , test_reports_src )
403419 else :
404420 print ("Warning: Cannot run distributed unit tests. Number of visible GPUs should be >1 to run distributed unit tests." )
421+ == == == =
422+ res_distributed_all = run_entire_tests ("distributed" , test_shell_path , overall_logs_path_current_run , test_reports_src )
423+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
405424 res_all_tests_dict ["distributed" ] = res_distributed_all
406425 # inductor test process
407426 res_inductor_all = run_entire_tests ("inductor" , test_shell_path , overall_logs_path_current_run , test_reports_src )
@@ -414,27 +433,39 @@ def run_test_and_summarize_results(
414433 res_default_all = run_entire_tests ("default" , test_shell_path , overall_logs_path_current_run , test_reports_src )
415434 res_all_tests_dict ["default" ] = res_default_all
416435 if "distributed" in workflow_list :
436+ << < << << HEAD
417437 res_distributed_all = {}
418438 if is_multi_gpus_available_for_distributed ():
419439 res_distributed_all = run_entire_tests ("distributed" , test_shell_path , overall_logs_path_current_run , test_reports_src )
420440 else :
421441 print ("Warning: Cannot run distributed unit tests. Number of visible GPUs should be >1 to run distributed unit tests." )
442+ == == == =
443+ res_distributed_all = run_entire_tests ("distributed" , test_shell_path , overall_logs_path_current_run , test_reports_src )
444+ >> > >> >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
422445 res_all_tests_dict ["distributed" ] = res_distributed_all
423446 if "inductor" in workflow_list :
424447 res_inductor_all = run_entire_tests ("inductor" , test_shell_path , overall_logs_path_current_run , test_reports_src )
425448 res_all_tests_dict ["inductor" ] = res_inductor_all
426449 # Run priority test for each workflow
427450 elif priority_tests and not default_list and not distributed_list and not inductor_list :
428451 if not test_config :
452+ << << << < HEAD
453+ == == == =
454+ check_num_gpus_for_distributed ()
455+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
429456 # default test process
430457 res_default_priority = run_priority_tests ("default" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
431458 res_all_tests_dict ["default" ] = res_default_priority
432459 # distributed test process
460+ << < << << HEAD
433461 res_distributed_priority = {}
434462 if is_multi_gpus_available_for_distributed ():
435463 res_distributed_priority = run_priority_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
436464 else :
437465 print ("Warning: Cannot run distributed unit tests. Number of visible GPUs should be >1 to run distributed unit tests." )
466+ == == == =
467+ res_distributed_priority = run_priority_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
468+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
438469 res_all_tests_dict ["distributed" ] = res_distributed_priority
439470 # will not run inductor priority tests
440471 print ("Inductor priority tests cannot run since no core tests defined with inductor workflow." )
@@ -446,11 +477,15 @@ def run_test_and_summarize_results(
446477 res_default_priority = run_priority_tests ("default" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
447478 res_all_tests_dict ["default" ] = res_default_priority
448479 if "distributed" in workflow_list :
480+ << << << < HEAD
449481 res_distributed_priority = {}
450482 if is_multi_gpus_available_for_distributed ():
451483 res_distributed_priority = run_priority_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
452484 else :
453485 print ("Warning: Cannot run distributed unit tests. Number of visible GPUs should be >1 to run distributed unit tests." )
486+ == == == =
487+ res_distributed_priority = run_priority_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src )
488+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
454489 res_all_tests_dict ["distributed" ] = res_distributed_priority
455490 if "inductor" in workflow_list :
456491 print ("Inductor priority tests cannot run since no core tests defined with inductor workflow." )
@@ -466,11 +501,15 @@ def run_test_and_summarize_results(
466501 distributed_workflow_list = []
467502 for item in distributed_list :
468503 distributed_workflow_list .append (item )
504+ << << < << HEAD
469505 res_distributed_selected = {}
470506 if is_multi_gpus_available_for_distributed ():
471507 res_distributed_selected = run_selected_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src , distributed_workflow_list )
472508 else :
473509 print ("Warning: Cannot run distributed unit tests. Number of visible GPUs should be >1 to run distributed unit tests." )
510+ == == == =
511+ res_distributed_selected = run_selected_tests ("distributed" , test_run_test_path , overall_logs_path_current_run , test_reports_src , distributed_workflow_list )
512+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
474513 res_all_tests_dict ["distributed" ] = res_distributed_selected
475514 if inductor_list :
476515 inductor_workflow_list = []
@@ -518,10 +557,17 @@ def parse_args():
518557 "RUN SELECTED TESTS: python3 run_pytorch_unit_tests.py --default_list test_weak test_dlpack --inductor_list inductor/test_torchinductor" )
519558 return parser .parse_args ()
520559
560+ << << < << HEAD
521561def is_multi_gpus_available_for_distributed ():
522562 p = subprocess .run ("rocminfo | grep -cE 'Name:\\ s+gfx'" , shell = True , capture_output = True , text = True )
523563 num_gpus_visible = int (p .stdout )
524564 return num_gpus_visible > 1
565+ == == == =
566+ def check_num_gpus_for_distributed ():
567+ p = subprocess .run ("rocminfo | grep -cE 'Name:\s+gfx'" , shell = True , capture_output = True , text = True )
568+ num_gpus_visible = int (p .stdout )
569+ assert num_gpus_visible > 1 , "Number of visible GPUs should be >1 to run distributed unit tests"
570+ >> >> > >> 5729657180 ([ROCm ] Specialized binary elementwise broadcast kernel for mixed dtypes with float / bfloat16 / half (#2791))
525571
526572def main ():
527573 args = parse_args ()
0 commit comments