33
33
# to retrieve a frame. That one is hardware dependent and must be found by a third benchmark, so
34
34
# it is not theoretically a constant, but keeping it here to not forget. Until then, our performance
35
35
# index isn't really valid cross-hardware. M_COEF=400 gives roughly consistent results with the
36
- # hardware we have. So, using that until we know more.
36
+ # hardware we have. So, using that until we know more. NIC_CONSTANT seems to be around
37
+ # 1 microsecond. Using that, provisionally.
37
38
38
39
M_COEF = 400
39
- NIC_CONSTANT = 0
40
-
41
- # TODO(jiceatscion): get it from or give it to brload?
42
- BM_PACKET_LEN = 172
40
+ NIC_CONSTANT = 1.0 / 1000000
43
41
44
42
# Intf: description of an interface configured for brload's use. Depending on context
45
43
# mac and peermac may be unused. "mac" is the MAC address configured on the side of the subject
@@ -55,31 +53,36 @@ class Results:
55
53
cores : int = 0
56
54
coremark : int = 0
57
55
mmbm : int = 0
56
+ packet_size : int = 0
58
57
cases : list [dict ] = []
59
58
failed : list [dict ] = []
60
59
checked : bool = False
61
60
62
- def __init__ (self , cores : int , coremark : int , mmbm : int ):
61
+ def __init__ (self , cores : int , coremark : int , mmbm : int , packet_size : int ):
63
62
self .cores = cores
64
63
self .coremark = coremark
65
64
self .mmbm = mmbm
65
+ self .packet_size = packet_size
66
66
67
67
def perf_index (self , rate : int ) -> float :
68
68
# TODO(jiceatscion): The perf index assumes that line speed isn't the bottleneck.
69
69
# It almost never is, but ideally we'd need to run iperf3 to verify.
70
70
# mmbm is in mebiBytes/s, rate is in pkt/s
71
71
return rate * (1.0 / self .coremark +
72
- M_COEF * BM_PACKET_LEN / (self .mmbm * 1024 * 1024 ) +
72
+ M_COEF * self . packet_size / (self .mmbm * 1024 * 1024 ) +
73
73
NIC_CONSTANT )
74
74
75
- def add_case (self , name : str , rate : int , droppage : int ):
75
+ def add_case (self , name : str , rate : int , droppage : int , raw_rate : int ):
76
76
dropRatio = round (float (droppage ) / (rate + droppage ), 2 )
77
77
saturated = dropRatio > 0.03
78
78
perf = 0.0
79
79
if self .cores == 3 and self .coremark and self .mmbm :
80
80
perf = round (self .perf_index (rate ), 1 )
81
81
self .cases .append ({"case" : name ,
82
- "perf" : perf , "rate" : rate , "drop" : dropRatio , "full" : saturated })
82
+ "perf" : perf , "rate" : rate , "drop" : dropRatio ,
83
+ "bit_rate" : rate * self .packet_size * 8 ,
84
+ "raw_pkt_rate" : raw_rate ,
85
+ "full" : saturated })
83
86
84
87
def CI_check (self , expectations : dict [str , int ]):
85
88
self .checked = True
@@ -147,14 +150,15 @@ class RouterBM():
147
150
This class is a Mixin that borrows the following attributes from the host class:
148
151
* coremark: the coremark benchmark results.
149
152
* mmbm: the mmbm benchmark results.
153
+ * packet_size: the packet_size to use in the test cases.
150
154
* intf_map: the map "label->actual_interface" map to be passed to brload.
151
155
* brload: "localCmd" wraper for the brload executable (plumbum.machines.LocalCommand)
152
156
* brload_cpus: [int] cpus where it is acceptable to run brload ([] means any)
153
157
* artifacts: the data directory (passed to docker).
154
158
* prom_address: the address of the prometheus API a string in the form "host:port"
155
159
"""
156
160
157
- def exec_br_load (self , case : str , map_args : list [str ], count : int ) -> str :
161
+ def exec_br_load (self , case : str , map_args : list [str ], duration : int ) -> str :
158
162
# For num-streams, attempt to distribute uniformly on many possible number of cores.
159
163
# 840 is a multiple of 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 15, 20, 21, 24, 28, ...
160
164
brload_args = [
@@ -163,8 +167,9 @@ def exec_br_load(self, case: str, map_args: list[str], count: int) -> str:
163
167
"--artifacts" , self .artifacts ,
164
168
* map_args ,
165
169
"--case" , case ,
166
- "--num-packets " , str ( count ) ,
170
+ "--duration " , f" { duration } s" ,
167
171
"--num-streams" , "840" ,
172
+ "--packet-size" , f"{ self .packet_size } " ,
168
173
]
169
174
if self .brload_cpus :
170
175
brload_args = [
@@ -176,20 +181,20 @@ def exec_br_load(self, case: str, map_args: list[str], count: int) -> str:
176
181
def run_test_case (self , case : str , map_args : list [str ]) -> (int , int ):
177
182
logger .debug (f"==> Starting load { case } " )
178
183
179
- output = self . exec_br_load ( case , map_args , 10000000 )
180
- beg = "0"
184
+ # We transmit for 13 seconds and then ignore the first 3.
185
+ output = self . exec_br_load ( case , map_args , 13 )
181
186
end = "0"
182
187
for line in output .splitlines ():
183
188
if line .startswith ("metricsBegin" ):
184
- _ , beg , _ , end = line .split ()
189
+ end = line .split ()[ 3 ] # "... metricsEnd: <end>"
185
190
186
191
logger .debug (f"==> Collecting { case } performance metrics..." )
187
192
188
193
# The raw metrics are expressed in terms of core*seconds. We convert to machine*seconds
189
194
# which allows us to provide a projected packet/s; ...more intuitive than packets/core*s.
190
- # We measure the rate over 10s. For best results we sample the end of the middle 10s of the
191
- # run. "beg" is the start time of the real action and "end" is the end time .
192
- sampleTime = ( int (beg ) + int ( end ) + 10 ) / 2
195
+ # We measure the rate over 10s. For best results we only look at the last 10 seconds.
196
+ # "end" reports a time when the transmission was still going on at maximum rate .
197
+ sampleTime = int (end )
193
198
prom_query = urlencode ({
194
199
'time' : f'{ sampleTime } ' ,
195
200
'query' : (
@@ -218,6 +223,31 @@ def run_test_case(self, case: str, map_args: list[str]) -> (int, int):
218
223
processed = int (float (val ))
219
224
break
220
225
226
+ # Collect the raw packet rate too. Just so we can discover if the cpu-availability
227
+ # correction is bad.
228
+ prom_query = urlencode ({
229
+ 'time' : f'{ sampleTime } ' ,
230
+ 'query' : (
231
+ 'sum by (instance, job) ('
232
+ f' rate(router_output_pkts_total{{job="BR", type="{ case } "}}[10s])'
233
+ ')'
234
+ )
235
+ })
236
+ conn = HTTPConnection (self .prom_address )
237
+ conn .request ("GET" , f"/api/v1/query?{ prom_query } " )
238
+ resp = conn .getresponse ()
239
+ if resp .status != 200 :
240
+ raise RuntimeError (f"Unexpected response: { resp .status } { resp .reason } " )
241
+
242
+ # There's only one router, so whichever metric we get is the right one.
243
+ pld = json .loads (resp .read ().decode ("utf-8" ))
244
+ raw = 0
245
+ results = pld ["data" ]["result" ]
246
+ for result in results :
247
+ ts , val = result ["value" ]
248
+ raw = int (float (val ))
249
+ break
250
+
221
251
# Collect dropped packets metrics, so we can verify that the router was well saturated.
222
252
# If not, the metrics aren't very useful.
223
253
prom_query = urlencode ({
@@ -248,7 +278,7 @@ def run_test_case(self, case: str, map_args: list[str]) -> (int, int):
248
278
dropped = int (float (val ))
249
279
break
250
280
251
- return processed , dropped
281
+ return processed , dropped , raw
252
282
253
283
# Fetch and log the number of cores used by Go. This may inform performance
254
284
# modeling later.
@@ -289,18 +319,18 @@ def run_bm(self, test_cases: [str]) -> Results:
289
319
# Run one test (30% size) as warm-up to trigger any frequency scaling, else the first test
290
320
# can get much lower performance.
291
321
logger .debug ("Warmup" )
292
- self .exec_br_load (test_cases [0 ], map_args , 3000000 )
322
+ self .exec_br_load (test_cases [0 ], map_args , 5 )
293
323
294
324
# Fetch the core count once. It doesn't change while the router is running.
295
325
# We can't get it until the router has done some work, but the warmup is enough.
296
326
cores = self .core_count ()
297
327
298
328
# At long last, run the tests.
299
- results = Results (cores , self .coremark , self .mmbm )
329
+ results = Results (cores , self .coremark , self .mmbm , self . packet_size )
300
330
for test_case in test_cases :
301
331
logger .info (f"Case: { test_case } " )
302
- rate , droppage = self .run_test_case (test_case , map_args )
303
- results .add_case (test_case , rate , droppage )
332
+ rate , droppage , raw = self .run_test_case (test_case , map_args )
333
+ results .add_case (test_case , rate or 1 , droppage , raw )
304
334
305
335
return results
306
336
logger .info ("Benchmarked" )
0 commit comments