Skip to content

Commit 676c076

Browse files
authored
Merge pull request #647 from sebkelle1/checks/cpu-bandwidth
[test] Add Likwid benchmarks for cache, memory and cross-socket bandwidths
2 parents 17a9044 + 239d79d commit 676c076

File tree

1 file changed

+160
-0
lines changed

1 file changed

+160
-0
lines changed
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import reframe as rfm
2+
import reframe.utility.sanity as sn
3+
4+
5+
class MemBandwidthTest(rfm.RunOnlyRegressionTest):
6+
def __init__(self):
7+
super().__init__()
8+
9+
self.modules = ['likwid']
10+
self.valid_prog_environs = ['PrgEnv-gnu']
11+
self.sourcesdir = None
12+
13+
self.executable = 'likwid-bench'
14+
15+
self.num_tasks = 0
16+
self.num_tasks_per_core = 2
17+
self.system_num_cpus = {
18+
'daint:mc': 72,
19+
'daint:gpu': 24,
20+
'dom:mc': 72,
21+
'dom:gpu': 24,
22+
}
23+
self.system_numa_domains = {
24+
'daint:mc': ['S0', 'S1'],
25+
'daint:gpu': ['S0'],
26+
'dom:mc': ['S0', 'S1'],
27+
'dom:gpu': ['S0'],
28+
}
29+
30+
# Test each level at half capacity times nthreads per domain
31+
self.system_cache_sizes = {
32+
'daint:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB',
33+
'memory': '1800MB'},
34+
'daint:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB',
35+
'memory': '1200MB'},
36+
'dom:mc': {'L1': '288kB', 'L2': '2304kB', 'L3': '23MB',
37+
'memory': '1800MB'},
38+
'dom:gpu': {'L1': '192kB', 'L2': '1536kB', 'L3': '15MB',
39+
'memory': '1200MB'},
40+
}
41+
42+
self.maintainers = ['SK']
43+
self.tags = {'benchmark', 'diagnostic'}
44+
45+
bw_pattern = sn.extractsingle(r'MByte/s:\s*(?P<bw>\S+)',
46+
self.stdout, 'bw', float)
47+
48+
self.sanity_patterns = sn.assert_ge(bw_pattern, 0.0)
49+
self.perf_patterns = {
50+
'bandwidth': bw_pattern
51+
}
52+
53+
54+
@rfm.required_version('>=2.16-dev0')
55+
@rfm.parameterized_test(*[[l, k] for l in ['L1', 'L2', 'L3']
56+
for k in ['load_avx', 'store_avx']],
57+
['memory', 'load_avx'],
58+
['memory', 'store_mem_avx'])
59+
class CPUBandwidth(MemBandwidthTest):
60+
def __init__(self, mem_level, kernel_name):
61+
super().__init__()
62+
63+
self.descr = 'CPU <- %s %s benchmark' % (mem_level, kernel_name)
64+
self.valid_systems = ['daint:mc', 'daint:gpu', 'dom:gpu', 'dom:mc']
65+
66+
# the kernel to run in likwid
67+
self.kernel_name = kernel_name
68+
self.mem_level = mem_level
69+
70+
self.refs = {
71+
'mc': {
72+
'load_avx': {'L1': 5100000, 'L2': 2000000, 'L3': 900000,
73+
'memory': 130000},
74+
'store_avx': {'L1': 2800000, 'L2': 900000, 'L3': 480000},
75+
'store_mem_avx': {'memory': 85000},
76+
},
77+
'gpu': {
78+
'load_avx': {'L1': 2100000, 'L2': 850000, 'L3': 360000,
79+
'memory': 65000},
80+
'store_avx': {'L1': 1200000, 'L2': 340000, 'L3': 210000},
81+
'store_mem_avx': {'memory': 42500},
82+
}
83+
}
84+
ref_proxy = {part: self.refs[part][kernel_name][mem_level]
85+
for part in self.refs.keys()}
86+
87+
self.reference = {
88+
'daint:gpu': {
89+
'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s')
90+
},
91+
'daint:mc': {
92+
'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s')
93+
},
94+
'dom:gpu': {
95+
'bandwidth': (ref_proxy['gpu'], -0.1, None, 'MB/s')
96+
},
97+
'dom:mc': {
98+
'bandwidth': (ref_proxy['mc'], -0.1, None, 'MB/s')
99+
},
100+
}
101+
102+
def setup(self, partition, environ, **job_opts):
103+
pfn = parition.fullname
104+
self.data_size = self.system_cache_sizes[pfn][self.mem_level]
105+
self.num_cpus_per_task = self.system_num_cpus[partition.fullname]
106+
numa_domains = self.system_numa_domains[partition.fullname]
107+
num_cpu_domain = self.num_cpus_per_task / (len(numa_domains) *
108+
self.num_tasks_per_core)
109+
# result for daint:mc: '-w S0:100MB:18:1:2 -w S1:100MB:18:1:2'
110+
# format: -w domain:data_size:nthreads:chunk_size:stride
111+
# chunk_size and stride affect which cpus from <domain> are selected
112+
workgroups = ['-w %s:%s:%d:1:2' %
113+
(dom, self.data_size, num_cpu_domain)
114+
for dom in numa_domains]
115+
116+
self.executable_opts = ['-t %s' % self.kernel_name] + workgroups
117+
118+
super().setup(partition, environ, **job_opts)
119+
120+
121+
@rfm.required_version('>=2.16-dev0')
122+
@rfm.simple_test
123+
class CPUBandwidthCrossSocket(MemBandwidthTest):
124+
def __init__(self):
125+
super().__init__()
126+
127+
self.descr = ("CPU S0 <- main memory S1 read " +
128+
"CPU S1 <- main memory S0 read")
129+
130+
self.valid_systems = ['daint:mc', 'dom:mc']
131+
self.kernel_name = 'load_avx'
132+
self.reference = {
133+
'daint:mc': {
134+
'bandwidth': (56000, -0.1, None, 'MB/s')
135+
},
136+
'dom:mc': {
137+
'bandwidth': (56000, -0.1, None, 'MB/s')
138+
},
139+
}
140+
141+
def setup(self, partition, environ, **job_opts):
142+
143+
self.num_cpus_per_task = self.system_num_cpus[partition.fullname]
144+
numa_domains = self.system_numa_domains[partition.fullname]
145+
146+
num_cpu_domain = (self.num_cpus_per_task /
147+
(len(numa_domains) * self.num_tasks_per_core))
148+
149+
# daint:mc: '-w S0:100MB:18:1:2-0:S1 -w S1:100MB:18:1:2-0:S0'
150+
# format:
151+
# -w domain:data_size:nthreads:chunk_size:stride-stream_nr:mem_domain
152+
# chunk_size and stride affect which cpus from <domain> are selected
153+
workgroups = ['-w %s:100MB:%d:1:2-0:%s' %
154+
(dom_cpu, num_cpu_domain, dom_mem)
155+
for dom_cpu, dom_mem in
156+
zip(numa_domains[:2], reversed(numa_domains[:2]))]
157+
158+
self.executable_opts = ['-t %s' % self.kernel_name] + workgroups
159+
160+
super().setup(partition, environ, **job_opts)

0 commit comments

Comments
 (0)