Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions checks/system/slurm/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import reframe.utility.osext as osext
import reframe.utility.sanity as sn

sys.path.append(str(pathlib.Path(__file__).parent / 'mixins'))
sys.path.append(str(pathlib.Path(__file__).parent.parent.parent / 'mixins'))
from uenv_slurm_mpi_options import UenvSlurmMpiOptionsMixin # noqa: E402


Expand Down Expand Up @@ -197,7 +197,8 @@ def assert_found_exceeded_memory(self):


@rfm.simple_test
class MemoryOverconsumptionCheckMPI(SlurmCompiledBaseCheck, UenvSlurmMpiOptionsMixin):
class MemoryOverconsumptionCheckMPI(SlurmCompiledBaseCheck,
UenvSlurmMpiOptionsMixin):
# TODO: maintainers = ['@jgphpc', '@ekouts']
descr = 'Tests for max allocatable memory'
valid_systems = ['+remote']
Expand Down Expand Up @@ -550,6 +551,28 @@ def validate(self):
return sn.assert_not_found(r'\bisolcpus=', self.stdout)


@rfm.simple_test
class SlurmNoUvmPerfAccessCounterMigration(rfm.RunOnlyRegressionTest):
valid_systems = ['+remote +scontrol +nvgpu']
valid_prog_environs = ['builtin']
maintainers = ['msimberg', 'SSA']
descr = '''
Check that uvm_perf_access_counter_mimc_migration_enable is set to 0
as it is buggy in older drivers.
'''
time_limit = '1m'
num_tasks_per_node = 1
sourcesdir = None
executable = 'cat'
executable_opts = [('/sys/module/nvidia_uvm/parameters/'
'uvm_perf_access_counter_mimc_migration_enable')]
tags = {'production', 'maintenance', 'slurm'}

@sanity_function
def validate(self):
return sn.assert_found(r'0', self.stdout)


@rfm.simple_test
class SlurmGPUGresTest(SlurmSimpleBaseCheck):
descr = '''
Expand Down Expand Up @@ -585,13 +608,13 @@ def assert_gres_valid(self):
)
good_nodes = sn.evaluate(
sn.extractall(rf'{node_re}.*{part_re}.*{gres_re}',
self.stdout, 1)
self.stdout, 1)
)
bad_nodes = ','.join(sorted(set(all_nodes) - set(good_nodes)))

return sn.assert_true(
len(bad_nodes) == 0,
msg=(f'{len(good_nodes)}/{len(all_nodes)} of '
f'{partition_name} nodes satisfy {gres_re}. Bad nodes: '
f'{bad_nodes}')
f'{partition_name} nodes satisfy {gres_re}. Bad nodes: '
f'{bad_nodes}')
)