@@ -77,7 +77,7 @@ def _assign_default_num_gpus_per_node(test: rfm.RegressionTest):
7777 log (f'default_num_gpus_per_node set to { test .default_num_gpus_per_node } ' )
7878
7979
80- def assign_tasks_per_compute_unit (test : rfm .RegressionTest , compute_unit : str , num_per : int = 1 ):
80+ def assign_tasks_per_compute_unit (test : rfm .RegressionTest ):
8181 """
8282 Assign one task per compute unit. More than 1 task per compute unit can be assigned with
8383 num_per for compute units that support it.
@@ -102,7 +102,9 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
102102 - assign_tasks_per_compute_unit(test, COMPUTE_UNITS.CPU_SOCKET) will launch 2 tasks with 64 threads per task
103103
104104 """
105- log (f'assign_tasks_per_compute_unit called with compute_unit: { compute_unit } and num_per: { num_per } ' )
105+ compute_unit = test .compute_unit
106+ num_per = test .num_tasks_per_compute_unit
107+ log (f'assign_tasks_per_compute_unit with compute_unit: { compute_unit } and num_per: { num_per } ' )
106108
107109 if num_per != 1 and compute_unit not in [COMPUTE_UNITS .NODE ]:
108110 raise NotImplementedError (
@@ -150,25 +152,32 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
150152 elif compute_unit == COMPUTE_UNITS .NUMA_NODE :
151153 _assign_one_task_per_numa_node (test )
152154 elif compute_unit == COMPUTE_UNITS .NODE :
153- _assign_num_tasks_per_node (test , num_per )
155+ _assign_num_tasks_per_node (test )
154156 else :
155157 raise ValueError (f'compute unit { compute_unit } is currently not supported' )
156158
157159 _check_always_request_gpus (test )
158160
161+ if not test .used_cpus_per_task :
162+ test .used_cpus_per_task = test .num_cpus_per_task
163+
159164 if test .current_partition .launcher_type ().registered_name == 'srun' :
165+ # Don’t let srun launcher set --cpus-per-task
166+ test .job .launcher .use_cpus_per_task = False
167+ # Add --cpus-per-task to srun launcher
168+ test .job .launcher .options += [f'--cpus-per-task={ test .used_cpus_per_task } ' ]
160169 # Make sure srun inherits --cpus-per-task from the job environment for Slurm versions >= 22.05 < 23.11,
161170 # ensuring the same task binding across all Slurm versions.
162171 # https://bugs.schedmd.com/show_bug.cgi?id=13351
163172 # https://bugs.schedmd.com/show_bug.cgi?id=11275
164173 # https://bugs.schedmd.com/show_bug.cgi?id=15632#c43
165- test .env_vars ['SRUN_CPUS_PER_TASK' ] = test .num_cpus_per_task
174+ test .env_vars ['SRUN_CPUS_PER_TASK' ] = test .used_cpus_per_task
166175 log (f'Set environment variable SRUN_CPUS_PER_TASK to { test .env_vars ["SRUN_CPUS_PER_TASK" ]} ' )
167176
168177 _set_job_resources (test )
169178
170179
171- def _assign_num_tasks_per_node (test : rfm .RegressionTest , num_per : int = 1 ):
180+ def _assign_num_tasks_per_node (test : rfm .RegressionTest ):
172181 """
173182 Sets num_tasks_per_node and num_cpus_per_task such that it will run
174183 'num_per' tasks per node, unless specified with:
@@ -183,6 +192,7 @@ def _assign_num_tasks_per_node(test: rfm.RegressionTest, num_per: int = 1):
183192 - num_tasks_per_node = num_per
184193 - num_cpus_per_task = test.default_num_cpus_per_node / num_tasks_per_node
185194 """
195+ num_per = test .num_tasks_per_compute_unit
186196
187197 # neither num_tasks_per_node nor num_cpus_per_task are set
188198 if not test .num_tasks_per_node and not test .num_cpus_per_task :
@@ -693,9 +703,10 @@ def set_compact_process_binding(test: rfm.RegressionTest):
693703 # TODO: check if this also leads to sensible binding when using COMPUTE_UNITS.HWTHREAD
694704 check_proc_attribute_defined (test , 'num_cpus_per_core' )
695705 num_cpus_per_core = test .current_partition .processor .num_cpus_per_core
696- physical_cpus_per_task = int (test .num_cpus_per_task / num_cpus_per_core )
706+ physical_cpus_per_task = int (test .used_cpus_per_task / num_cpus_per_core )
707+ launcher = test .current_partition .launcher_type ().registered_name
697708
698- if test . current_partition . launcher_type (). registered_name == 'mpirun' :
709+ if launcher == 'mpirun' :
699710 # Do binding for intel and OpenMPI's mpirun, and srun
700711 env_vars = {
701712 'I_MPI_PIN_CELL' : 'core' , # Don't bind to hyperthreads, only to physcial cores
@@ -714,17 +725,16 @@ def set_compact_process_binding(test: rfm.RegressionTest):
714725 if any (re .search (pattern , x ) for x in test .modules ):
715726 test .job .launcher .options .append (f'--map-by slot:PE={ physical_cpus_per_task } --report-bindings' )
716727 log (f'Set launcher command to { test .job .launcher .run_command (test .job )} ' )
717- elif test . current_partition . launcher_type (). registered_name == 'srun' :
728+ elif launcher == 'srun' :
718729 # Set compact binding for SLURM. Only effective if the task/affinity plugin is enabled
719730 # and when number of tasks times cpus per task equals either socket, core or thread count
720731 env_vars = {
721732 'SLURM_DISTRIBUTION' : 'block:block' ,
722- 'SLURM_CPU_BIND' : 'verbose' ,
733+ 'SLURM_CPU_BIND' : 'verbose,cores ' ,
723734 }
724735 else :
725736 env_vars = {}
726- msg = "hooks.set_compact_process_binding does not support the current launcher"
727- msg += f" ({ test .current_partition .launcher_type ().registered_name } )."
737+ msg = f"hooks.set_compact_process_binding does not support the current launcher ({ launcher } )."
728738 msg += " The test will run, but using the default binding strategy of your parallel launcher."
729739 msg += " This may lead to suboptimal performance."
730740 msg += " Please expand the functionality of hooks.set_compact_process_binding for your parallel launcher."
@@ -760,7 +770,7 @@ def set_omp_num_threads(test: rfm.RegressionTest):
760770 """
761771 Set number of OpenMP threads equal to number of CPUs per task
762772 """
763- test .env_vars ['OMP_NUM_THREADS' ] = test .num_cpus_per_task
773+ test .env_vars ['OMP_NUM_THREADS' ] = test .used_cpus_per_task
764774 log (f'Set environment variable OMP_NUM_THREADS to { test .env_vars ["OMP_NUM_THREADS" ]} ' )
765775
766776
0 commit comments