@@ -30,6 +30,7 @@ def _set_job_resources(test: rfm.RegressionTest):
3030 test .job .num_tasks_per_socket = test .num_tasks_per_socket
3131 test .job .num_cpus_per_task = test .num_cpus_per_task
3232 test .job .use_smt = test .use_multithreading
33+ test .job .used_cpus_per_task = test .used_cpus_per_task
3334
3435
3536def _assign_default_num_cpus_per_node (test : rfm .RegressionTest ):
@@ -161,12 +162,7 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest):
161162 if not test .used_cpus_per_task :
162163 test .used_cpus_per_task = test .num_cpus_per_task
163164
164- # Tell srun to use the number of actually used cpus per tasks, not the number of allocated cpus per task.
165- # With the default srun launcher in ReFrame this doesn’t work because it sets `srun --cpus-per-task`.
166- # To make it work, a custom srun launcher should be used that doesn’t set `srun --cpus-per-task`.
167- test .env_vars ['SLURM_CPUS_PER_TASK' ] = test .used_cpus_per_task
168-
169- if test .current_partition .launcher_type ().registered_name == 'srun' :
165+ if test .current_partition .launcher_type ().registered_name in ['eessi-srun' , 'srun' ]:
170166 # Make sure srun inherits --cpus-per-task from the job environment for Slurm versions >= 22.05 < 23.11,
171167 # ensuring the same task binding across all Slurm versions.
172168 # https://bugs.schedmd.com/show_bug.cgi?id=13351
@@ -705,8 +701,9 @@ def set_compact_process_binding(test: rfm.RegressionTest):
705701 check_proc_attribute_defined (test , 'num_cpus_per_core' )
706702 num_cpus_per_core = test .current_partition .processor .num_cpus_per_core
707703 physical_cpus_per_task = int (test .used_cpus_per_task / num_cpus_per_core )
704+ launcher = test .current_partition .launcher_type ().registered_name
708705
709- if test . current_partition . launcher_type (). registered_name == 'mpirun' :
706+ if launcher == 'mpirun' :
710707 # Do binding for intel and OpenMPI's mpirun, and srun
711708 env_vars = {
712709 'I_MPI_PIN_CELL' : 'core' , # Don't bind to hyperthreads, only to physcial cores
@@ -725,17 +722,16 @@ def set_compact_process_binding(test: rfm.RegressionTest):
725722 if any (re .search (pattern , x ) for x in test .modules ):
726723 test .job .launcher .options .append (f'--map-by slot:PE={ physical_cpus_per_task } --report-bindings' )
727724 log (f'Set launcher command to { test .job .launcher .run_command (test .job )} ' )
728- elif test . current_partition . launcher_type (). registered_name == ' srun' :
725+ elif launcher in [ 'eessi- srun', 'srun' ] :
729726 # Set compact binding for SLURM. Only effective if the task/affinity plugin is enabled
730727 # and when number of tasks times cpus per task equals either socket, core or thread count
731728 env_vars = {
732729 'SLURM_DISTRIBUTION' : 'block:block' ,
733- 'SLURM_CPU_BIND' : 'verbose' ,
730+ 'SLURM_CPU_BIND' : 'verbose,cores ' ,
734731 }
735732 else :
736733 env_vars = {}
737- msg = "hooks.set_compact_process_binding does not support the current launcher"
738- msg += f" ({ test .current_partition .launcher_type ().registered_name } )."
734+ msg = f"hooks.set_compact_process_binding does not support the current launcher ({ launcher } )."
739735 msg += " The test will run, but using the default binding strategy of your parallel launcher."
740736 msg += " This may lead to suboptimal performance."
741737 msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher."
0 commit comments