Skip to content

Commit 1203a63

Browse files
weilinwanamhyung
authored andcommitted
perf test: Rerun failed metrics with longer workload
Rerun failed metrics with longer workload to avoid false failure because sometimes metric value test fails when running in very short amount of time. Skip rerun if equal to or more than 20 metrics fail. Signed-off-by: Weilin Wang <[email protected]> Tested-by: Namhyung Kim <[email protected]> Cc: [email protected] Cc: Ian Rogers <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Caleb Biggers <[email protected]> Cc: Perry Taylor <[email protected]> Cc: Samantha Alt <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Kan Liang <[email protected]> Cc: Ingo Molnar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Namhyung Kim <[email protected]>
1 parent a0f1cc1 commit 1203a63

File tree

1 file changed

+83
-46
lines changed

1 file changed

+83
-46
lines changed

tools/perf/tests/shell/lib/perf_metric_validation.py

Lines changed: 83 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fu
1111
self.rulefname = rulefname
1212
self.reportfname = reportfname
1313
self.rules = None
14-
self.collectlist=metrics
15-
self.metrics = set(metrics)
14+
self.collectlist:str = metrics
15+
self.metrics = self.__set_metrics(metrics)
16+
self.skiplist = set()
1617
self.tolerance = t
1718

1819
self.workloads = [x for x in workload.split(",") if x]
@@ -41,6 +42,12 @@ def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fu
4142
self.debug = debug
4243
self.fullrulefname = fullrulefname
4344

45+
def __set_metrics(self, metrics=''):
46+
if metrics != '':
47+
return set(metrics.split(","))
48+
else:
49+
return set()
50+
4451
def read_json(self, filename: str) -> dict:
4552
try:
4653
with open(Path(filename).resolve(), "r") as f:
@@ -113,7 +120,7 @@ def get_value(self, name:str, ridx:int = 0) -> list:
113120
All future test(s) on this metric will fail.
114121
115122
@param name: name of the metric
116-
@returns: list with value found in self.results; list is empty when not value found.
123+
@returns: list with value found in self.results; list is empty when value is not found.
117124
"""
118125
results = []
119126
data = self.results[ridx] if ridx in self.results else self.results[0]
@@ -123,7 +130,6 @@ def get_value(self, name:str, ridx:int = 0) -> list:
123130
elif name.replace('.', '1').isdigit():
124131
results.append(float(name))
125132
else:
126-
self.errlist.append("Metric '%s' is not collected or the value format is incorrect"%(name))
127133
self.ignoremetrics.add(name)
128134
return results
129135

@@ -138,27 +144,32 @@ def pos_val_test(self):
138144
Failure: when metric value is negative or not provided.
139145
Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
140146
"""
141-
negmetric = set()
142-
missmetric = set()
147+
negmetric = dict()
143148
pcnt = 0
144149
tcnt = 0
150+
rerun = list()
145151
for name, val in self.get_results().items():
146-
if val is None or val == '':
147-
missmetric.add(name)
148-
self.errlist.append("Metric '%s' is not collected"%(name))
149-
elif val < 0:
150-
negmetric.add("{0}(={1:.4f})".format(name, val))
151-
self.collectlist[0].append(name)
152+
if val < 0:
153+
negmetric[name] = val
154+
rerun.append(name)
152155
else:
153156
pcnt += 1
154157
tcnt += 1
158+
if len(rerun) > 0 and len(rerun) < 20:
159+
second_results = dict()
160+
self.second_test(rerun, second_results)
161+
for name, val in second_results.items():
162+
if name not in negmetric: continue
163+
if val >= 0:
164+
del negmetric[name]
165+
pcnt += 1
155166

156167
self.failtests['PositiveValueTest']['Total Tests'] = tcnt
157168
self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
158-
if len(negmetric) or len(missmetric)> 0:
159-
self.ignoremetrics.update(negmetric)
160-
self.ignoremetrics.update(missmetric)
161-
self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue':list(negmetric), 'MissingValue':list(missmetric)})
169+
if len(negmetric.keys()):
170+
self.ignoremetrics.update(negmetric.keys())
171+
negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
172+
self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
162173

163174
return
164175

@@ -259,21 +270,36 @@ def single_test(self, rule:dict):
259270
metrics = rule['Metrics']
260271
passcnt = 0
261272
totalcnt = 0
262-
faillist = []
273+
faillist = list()
274+
failures = dict()
275+
rerun = list()
263276
for m in metrics:
264277
totalcnt += 1
265278
result = self.get_value(m['Name'])
266-
if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t):
279+
if len(result) > 0 and self.check_bound(result[0], lbv, ubv, t) or m['Name'] in self.skiplist:
267280
passcnt += 1
268281
else:
269-
faillist.append({'MetricName':m['Name'], 'CollectedValue':result})
270-
self.collectlist[0].append(m['Name'])
282+
failures[m['Name']] = result
283+
rerun.append(m['Name'])
284+
285+
if len(rerun) > 0 and len(rerun) < 20:
286+
second_results = dict()
287+
self.second_test(rerun, second_results)
288+
for name, val in second_results.items():
289+
if name not in failures: continue
290+
if self.check_bound(val, lbv, ubv, t):
291+
passcnt += 1
292+
del failures[name]
293+
else:
294+
failures[name] = val
295+
self.results[0][name] = val
271296

272297
self.totalcnt += totalcnt
273298
self.passedcnt += passcnt
274299
self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
275300
self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
276-
if len(faillist) != 0:
301+
if len(failures.keys()) != 0:
302+
faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
277303
self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
278304
'RangeLower': rule['RangeLower'],
279305
'RangeUpper': rule['RangeUpper'],
@@ -316,7 +342,7 @@ def check_rule(self, testtype, metric_list):
316342
return True
317343

318344
# Start of Collector and Converter
319-
def convert(self, data: list, idx: int):
345+
def convert(self, data: list, metricvalues:dict):
320346
"""
321347
Convert collected metric data from the -j output to dict of {metric_name:value}.
322348
"""
@@ -326,20 +352,29 @@ def convert(self, data: list, idx: int):
326352
if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
327353
name = result["metric-unit"].split(" ")[1] if len(result["metric-unit"].split(" ")) > 1 \
328354
else result["metric-unit"]
329-
if idx not in self.results: self.results[idx] = dict()
330-
self.results[idx][name.lower()] = float(result["metric-value"])
355+
metricvalues[name.lower()] = float(result["metric-value"])
331356
except ValueError as error:
332357
continue
333358
return
334359

335-
def collect_perf(self, data_file: str, workload: str):
360+
def _run_perf(self, metric, workload: str):
361+
tool = 'perf'
362+
command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
363+
wl = workload.split()
364+
command.extend(wl)
365+
print(" ".join(command))
366+
cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
367+
data = [x+'}' for x in cmd.stderr.split('}\n') if x]
368+
return data
369+
370+
371+
def collect_perf(self, workload: str):
336372
"""
337373
Collect metric data with "perf stat -M" on given workload with -a and -j.
338374
"""
339375
self.results = dict()
340-
tool = 'perf'
341376
print(f"Starting perf collection")
342-
print(f"Workload: {workload}")
377+
print(f"Long workload: {workload}")
343378
collectlist = dict()
344379
if self.collectlist != "":
345380
collectlist[0] = {x for x in self.collectlist.split(",")}
@@ -353,17 +388,20 @@ def collect_perf(self, data_file: str, workload: str):
353388
collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
354389

355390
for idx, metrics in collectlist.items():
356-
if idx == 0: wl = "sleep 0.5".split()
357-
else: wl = workload.split()
391+
if idx == 0: wl = "true"
392+
else: wl = workload
358393
for metric in metrics:
359-
command = [tool, 'stat', '-j', '-M', f"{metric}", "-a"]
360-
command.extend(wl)
361-
print(" ".join(command))
362-
cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
363-
data = [x+'}' for x in cmd.stderr.split('}\n') if x]
364-
self.convert(data, idx)
365-
self.collectlist = dict()
366-
self.collectlist[0] = list()
394+
data = self._run_perf(metric, wl)
395+
if idx not in self.results: self.results[idx] = dict()
396+
self.convert(data, self.results[idx])
397+
return
398+
399+
def second_test(self, collectlist, second_results):
400+
workload = self.workloads[self.wlidx]
401+
for metric in collectlist:
402+
data = self._run_perf(metric, workload)
403+
self.convert(data, second_results)
404+
367405
# End of Collector and Converter
368406

369407
# Start of Rule Generator
@@ -381,7 +419,7 @@ def parse_perf_metrics(self):
381419
if 'MetricName' not in m:
382420
print("Warning: no metric name")
383421
continue
384-
name = m['MetricName']
422+
name = m['MetricName'].lower()
385423
self.metrics.add(name)
386424
if 'ScaleUnit' in m and (m['ScaleUnit'] == '1%' or m['ScaleUnit'] == '100%'):
387425
self.pctgmetrics.add(name.lower())
@@ -391,14 +429,12 @@ def parse_perf_metrics(self):
391429

392430
return
393431

394-
def remove_unsupported_rules(self, rules, skiplist: set = None):
395-
for m in skiplist:
396-
self.metrics.discard(m)
432+
def remove_unsupported_rules(self, rules):
397433
new_rules = []
398434
for rule in rules:
399435
add_rule = True
400436
for m in rule["Metrics"]:
401-
if m["Name"] not in self.metrics:
437+
if m["Name"] in self.skiplist or m["Name"] not in self.metrics:
402438
add_rule = False
403439
break
404440
if add_rule:
@@ -415,15 +451,15 @@ def create_rules(self):
415451
"""
416452
data = self.read_json(self.rulefname)
417453
rules = data['RelationshipRules']
418-
skiplist = set(data['SkipList'])
419-
self.rules = self.remove_unsupported_rules(rules, skiplist)
454+
self.skiplist = set([name.lower() for name in data['SkipList']])
455+
self.rules = self.remove_unsupported_rules(rules)
420456
pctgrule = {'RuleIndex':0,
421457
'TestType':'SingleMetricTest',
422458
'RangeLower':'0',
423459
'RangeUpper': '100',
424460
'ErrorThreshold': self.tolerance,
425461
'Description':'Metrics in percent unit have value with in [0, 100]',
426-
'Metrics': [{'Name': m} for m in self.pctgmetrics]}
462+
'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
427463
self.rules.append(pctgrule)
428464

429465
# Re-index all rules to avoid repeated RuleIndex
@@ -479,8 +515,9 @@ def test(self):
479515
self.parse_perf_metrics()
480516
self.create_rules()
481517
for i in range(0, len(self.workloads)):
518+
self.wlidx = i
482519
self._init_data()
483-
self.collect_perf(self.datafname, self.workloads[i])
520+
self.collect_perf(self.workloads[i])
484521
# Run positive value test
485522
self.pos_val_test()
486523
for r in self.rules:

0 commit comments

Comments
 (0)