|
| 1 | +import reframe as rfm |
| 2 | +import reframe.utility.sanity as sn |
| 3 | + |
| 4 | + |
| 5 | +class MemBandwidthTest(rfm.RunOnlyRegressionTest): |
| 6 | + def __init__(self): |
| 7 | + super().__init__() |
| 8 | + |
| 9 | + self.modules = ['likwid'] |
| 10 | + self.valid_prog_environs = ['PrgEnv-gnu'] |
| 11 | + self.sourcesdir = None |
| 12 | + |
| 13 | + self.executable = 'likwid-bench' |
| 14 | + |
| 15 | + self.num_tasks = 0 |
| 16 | + self.num_tasks_per_core = 2 |
| 17 | + self.system_num_cpus = { |
| 18 | + 'daint:mc': 72, |
| 19 | + 'daint:gpu': 24, |
| 20 | + 'dom:mc': 72, |
| 21 | + 'dom:gpu': 24, |
| 22 | + } |
| 23 | + self.system_numa_domains = { |
| 24 | + 'daint:mc': ['S0', 'S1'], |
| 25 | + 'daint:gpu': ['S0'], |
| 26 | + 'dom:mc': ['S0', 'S1'], |
| 27 | + 'dom:gpu': ['S0'], |
| 28 | + } |
| 29 | + |
| 30 | + # Test each level at half capacity times nthreads per domain |
| 31 | + self.system_cache_sizes = { |
| 32 | + 'daint:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', |
| 33 | + 'memory': '1800MB'}, |
| 34 | + 'daint:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', |
| 35 | + 'memory': '1200MB'}, |
| 36 | + 'dom:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', |
| 37 | + 'memory': '1800MB'}, |
| 38 | + 'dom:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', |
| 39 | + 'memory': '1200MB'}, |
| 40 | + } |
| 41 | + |
| 42 | + self.maintainers = ['SK'] |
| 43 | + self.tags = {'benchmark', 'diagnostic'} |
| 44 | + |
| 45 | + bw_pattern = sn.extractsingle(r'MByte/s:\s*(?P<bw>\S+)', |
| 46 | + self.stdout, 'bw', float) |
| 47 | + |
| 48 | + self.sanity_patterns = sn.assert_ge(bw_pattern, 0.0) |
| 49 | + self.perf_patterns = { |
| 50 | + 'bandwidth': bw_pattern |
| 51 | + } |
| 52 | + |
| 53 | + |
| 54 | +@rfm.required_version('>=2.16-dev0') |
| 55 | +@rfm.parameterized_test(*[[l, k] for l in ['L1', 'L2', 'L3'] |
| 56 | + for k in ['load_avx', 'store_avx']], |
| 57 | + ['memory', 'load_avx'], |
| 58 | + ['memory', 'store_mem_avx']) |
| 59 | +class CPUBandwidth(MemBandwidthTest): |
| 60 | + def __init__(self, mem_level, kernel_name): |
| 61 | + super().__init__() |
| 62 | + |
| 63 | + self.descr = 'CPU <- %s %s benchmark' % (mem_level, kernel_name) |
| 64 | + self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc'] |
| 65 | + |
| 66 | + # the kernel to run in likwid |
| 67 | + self.kernel_name = kernel_name |
| 68 | + self.mem_level = mem_level |
| 69 | + |
| 70 | + self.refs = { |
| 71 | + 'mc': { |
| 72 | + 'load_avx': {'L1': 5100000, 'L2': 2000000, 'L3': 900000, |
| 73 | + 'memory': 130000}, |
| 74 | + 'store_avx': {'L1': 2800000, 'L2': 900000, 'L3': 480000}, |
| 75 | + 'store_mem_avx': {'memory': 85000}, |
| 76 | + }, |
| 77 | + 'gpu': { |
| 78 | + 'load_avx': {'L1': 2100000, 'L2': 850000, 'L3': 360000, |
| 79 | + 'memory': 65000}, |
| 80 | + 'store_avx': {'L1': 1200000, 'L2': 340000, 'L3': 210000}, |
| 81 | + 'store_mem_avx': {'memory': 42500}, |
| 82 | + } |
| 83 | + } |
| 84 | + ref_proxy = {part: self.refs[part][kernel_name][mem_level] |
| 85 | + for part in self.refs.keys()} |
| 86 | + |
| 87 | + self.reference = { |
| 88 | + 'daint:gpu': { |
| 89 | + 'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s') |
| 90 | + }, |
| 91 | + 'daint:mc': { |
| 92 | + 'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s') |
| 93 | + }, |
| 94 | + 'dom:gpu': { |
| 95 | + 'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s') |
| 96 | + }, |
| 97 | + 'dom:mc': { |
| 98 | + 'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s') |
| 99 | + }, |
| 100 | + } |
| 101 | + |
| 102 | + def setup(self, partition, environ, **job_opts): |
| 103 | + pfn = parition.fullname |
| 104 | + self.data_size = self.system_cache_sizes[pfn][self.mem_level] |
| 105 | + self.num_cpus_per_task = self.system_num_cpus[partition.fullname] |
| 106 | + numa_domains = self.system_numa_domains[partition.fullname] |
| 107 | + num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) * |
| 108 | + self.num_tasks_per_core) |
| 109 | + # result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2' |
| 110 | + # format: -w domain:data_size:nthreads:chunk_size:stride |
| 111 | + # chunk_size and stride affect which cpus from <domain> are selected |
| 112 | + workgroups = ['-w %s:%s:%d:1:2' % |
| 113 | + (dom, self.data_size, num_cpu_domain) |
| 114 | + for dom in numa_domains] |
| 115 | + |
| 116 | + self.executable_opts = ['-t %s' % self.kernel_name] + workgroups |
| 117 | + |
| 118 | + super().setup(partition, environ, **job_opts) |
| 119 | + |
| 120 | + |
| 121 | +@rfm.required_version('>=2.16-dev0') |
| 122 | +@rfm.simple_test |
| 123 | +class CPUBandwidthCrossSocket(MemBandwidthTest): |
| 124 | + def __init__(self): |
| 125 | + super().__init__() |
| 126 | + |
| 127 | + self.descr = ("CPU S0 <- main memory S1 read " + |
| 128 | + "CPU S1 <- main memory S0 read") |
| 129 | + |
| 130 | + self.valid_systems = ['daint:mc', 'dom:mc'] |
| 131 | + self.kernel_name = 'load_avx' |
| 132 | + self.reference = { |
| 133 | + 'daint:mc': { |
| 134 | + 'bandwidth': (56000, -0.1, None, 'MB/s') |
| 135 | + }, |
| 136 | + 'dom:mc': { |
| 137 | + 'bandwidth': (56000, -0.1, None, 'MB/s') |
| 138 | + }, |
| 139 | + } |
| 140 | + |
| 141 | + def setup(self, partition, environ, **job_opts): |
| 142 | + |
| 143 | + self.num_cpus_per_task = self.system_num_cpus[partition.fullname] |
| 144 | + numa_domains = self.system_numa_domains[partition.fullname] |
| 145 | + |
| 146 | + num_cpu_domain = (self.num_cpus_per_task / |
| 147 | + (len(numa_domains) * self.num_tasks_per_core)) |
| 148 | + |
| 149 | + # daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0' |
| 150 | + # format: |
| 151 | + # -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain |
| 152 | + # chunk_size and stride affect which cpus from <domain> are selected |
| 153 | + workgroups = ['-w %s:100MB:%d:1:2-0:%s' % |
| 154 | + (dom_cpu, num_cpu_domain, dom_mem) |
| 155 | + for dom_cpu, dom_mem in |
| 156 | + zip(numa_domains[:2], reversed(numa_domains[:2]))] |
| 157 | + |
| 158 | + self.executable_opts = ['-t %s' % self.kernel_name] + workgroups |
| 159 | + |
| 160 | + super().setup(partition, environ, **job_opts) |
0 commit comments