Skip to content

Commit 3aebc84

Browse files
authored
Merge pull request swiftlang#12962 from graydon/misc-process-stats-dir-fixes
2 parents fd2fe85 + 677b59c commit 3aebc84

File tree

2 files changed

+100
-29
lines changed

2 files changed

+100
-29
lines changed

utils/jobstats/jobstats.py

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ def prefixed_by(self, prefix):
9898
self.module, self.start_usec, self.dur_usec,
9999
self.jobargs, prefixed_stats)
100100

101+
def divided_by(self, n):
102+
divided_stats = dict([(k, v / n)
103+
for (k, v) in self.stats.items()])
104+
return JobStats(self.jobkind, random.randint(0, 1000000000),
105+
self.module, self.start_usec, self.dur_usec,
106+
self.jobargs, divided_stats)
107+
101108
def incrementality_percentage(self):
102109
"""Assuming the job is a driver job, return the amount of
103110
jobs that actually ran, as a percentage of the total number."""
@@ -170,25 +177,56 @@ def to_lnt_test_obj(self, args):
170177
}
171178

172179

180+
AUXPATSTR = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
181+
r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
182+
AUXPAT = re.compile(AUXPATSTR)
183+
184+
TIMERPATSTR = (r"time\.swift-(?P<jobkind>\w+)\." + AUXPATSTR +
185+
"\.(?P<timerkind>\w+)$")
186+
TIMERPAT = re.compile(TIMERPATSTR)
187+
188+
FILEPATSTR = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
189+
AUXPATSTR +
190+
r"-(?P<pid>\d+)(-.*)?.json$")
191+
FILEPAT = re.compile(FILEPATSTR)
192+
193+
194+
def match_auxpat(s):
195+
m = AUXPAT.match(s)
196+
if m is not None:
197+
return m.groupdict()
198+
else:
199+
return None
200+
201+
202+
def match_timerpat(s):
203+
m = TIMERPAT.match(s)
204+
if m is not None:
205+
return m.groupdict()
206+
else:
207+
return None
208+
209+
210+
def match_filepat(s):
211+
m = FILEPAT.match(s)
212+
if m is not None:
213+
return m.groupdict()
214+
else:
215+
return None
216+
217+
173218
def load_stats_dir(path, select_module=[], select_stat=[],
174-
exclude_timers=False, **kwargs):
219+
exclude_timers=False, merge_timers=False, **kwargs):
175220
"""Loads all stats-files found in path into a list of JobStats objects"""
176221
jobstats = []
177-
auxpat = (r"(?P<module>[^-]+)-(?P<input>[^-]+)-(?P<triple>[^-]+)" +
178-
r"-(?P<out>[^-]*)-(?P<opt>[^-]+)")
179-
fpat = (r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-" +
180-
auxpat +
181-
r"-(?P<pid>\d+)(-.*)?.json$")
182-
fre = re.compile(fpat)
183222
sre = re.compile('.*' if len(select_stat) == 0 else
184223
'|'.join(select_stat))
185224
for root, dirs, files in os.walk(path):
186225
for f in files:
187-
m = fre.match(f)
188-
if not m:
226+
mg = match_filepat(f)
227+
if not mg:
189228
continue
190229
# NB: "pid" in fpat is a random number, not unix pid.
191-
mg = m.groupdict()
192230
jobkind = mg['kind']
193231
jobid = int(mg['pid'])
194232
start_usec = int(mg['start'])
@@ -200,21 +238,22 @@ def load_stats_dir(path, select_module=[], select_stat=[],
200238
with open(os.path.join(root, f)) as fp:
201239
j = json.load(fp)
202240
dur_usec = 1
203-
patstr = (r"time\.swift-" + jobkind + r"\." + auxpat +
204-
r"\.wall$")
205-
pat = re.compile(patstr)
206241
stats = dict()
207242
for (k, v) in j.items():
208243
if sre.search(k) is None:
209244
continue
210-
if k.startswith("time."):
245+
if k.startswith('time.') and exclude_timers:
246+
continue
247+
tm = match_timerpat(k)
248+
if tm:
211249
v = int(1000000.0 * float(v))
212-
if exclude_timers:
213-
continue
250+
if tm['jobkind'] == jobkind and \
251+
tm['timerkind'] == 'wall':
252+
dur_usec = v
253+
if merge_timers:
254+
k = "time.swift-%s.%s" % (tm['jobkind'],
255+
tm['timerkind'])
214256
stats[k] = v
215-
tm = re.match(pat, k)
216-
if tm:
217-
dur_usec = v
218257

219258
e = JobStats(jobkind=jobkind, jobid=jobid,
220259
module=module, start_usec=start_usec,
@@ -225,7 +264,7 @@ def load_stats_dir(path, select_module=[], select_stat=[],
225264

226265

227266
def merge_all_jobstats(jobstats, select_module=[], group_by_module=False,
228-
merge_by="sum", **kwargs):
267+
merge_by="sum", divide_by=1, **kwargs):
229268
"""Does a pairwise merge of the elements of list of jobs"""
230269
m = None
231270
if len(select_module) > 0:
@@ -237,12 +276,15 @@ def keyfunc(j):
237276
jobstats.sort(key=keyfunc)
238277
prefixed = []
239278
for mod, group in itertools.groupby(jobstats, keyfunc):
240-
groupmerge = merge_all_jobstats(group, merge_by=merge_by)
279+
groupmerge = merge_all_jobstats(group, merge_by=merge_by,
280+
divide_by=divide_by)
241281
prefixed.append(groupmerge.prefixed_by(mod))
242282
jobstats = prefixed
243283
for j in jobstats:
244284
if m is None:
245285
m = j
246286
else:
247287
m = m.merged_with(j, merge_by=merge_by)
248-
return m
288+
if m is None:
289+
return m
290+
return m.divided_by(divide_by)

utils/process-stats-dir.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ def vars_of_args(args):
4848
vargs = vars(args)
4949
if args.select_stats_from_csv_baseline is not None:
5050
b = read_stats_dict_from_csv(args.select_stats_from_csv_baseline)
51-
if args.group_by_module:
51+
# Sniff baseline stat-names to figure out if they're module-qualified
52+
# even when the user isn't asking us to _output_ module-grouped data.
53+
all_triples = all(len(k.split('.')) == 3 for k in b.keys())
54+
if args.group_by_module or all_triples:
5255
vargs['select_stat'] = set(stat_name_minus_module(k)
5356
for k in b.keys())
5457
else:
@@ -308,9 +311,20 @@ def write_comparison(args, old_stats, new_stats):
308311

309312
if args.markdown:
310313

314+
def format_time(v):
315+
if abs(v) > 1000000:
316+
return "{:.1f}s".format(v / 1000000.0)
317+
elif abs(v) > 1000:
318+
return "{:.1f}ms".format(v / 1000.0)
319+
else:
320+
return "{:.1f}us".format(v)
321+
311322
def format_field(field, row):
312-
if field == 'name' and args.group_by_module:
313-
return stat_name_minus_module(row.name)
323+
if field == 'name':
324+
if args.group_by_module:
325+
return stat_name_minus_module(row.name)
326+
else:
327+
return row.name
314328
elif field == 'delta_pct':
315329
s = str(row.delta_pct) + "%"
316330
if args.github_emoji:
@@ -320,7 +334,11 @@ def format_field(field, row):
320334
s += " :white_check_mark:"
321335
return s
322336
else:
323-
return str(vars(row)[field])
337+
v = int(vars(row)[field])
338+
if row.name.startswith('time.'):
339+
return format_time(v)
340+
else:
341+
return "{:,d}".format(v)
324342

325343
def format_table(elts):
326344
out = args.output
@@ -357,10 +375,12 @@ def keyfunc(e):
357375
format_table(elts)
358376
out.write('</details>\n')
359377

360-
format_details('Regressed', regressed, args.close_regressions)
378+
closed_regressions = (args.close_regressions or len(regressed) == 0)
379+
format_details('Regressed', regressed, closed_regressions)
361380
format_details('Improved', improved, True)
362-
format_details('Unchanged (abs(delta) < %s%% or %susec)' %
363-
(args.delta_pct_thresh, args.delta_usec_thresh),
381+
format_details('Unchanged (delta < %s%% or delta < %s)' %
382+
(args.delta_pct_thresh,
383+
format_time(args.delta_usec_thresh)),
364384
unchanged, True)
365385

366386
else:
@@ -528,6 +548,15 @@ def main():
528548
default="sum",
529549
type=str,
530550
help="Merge identical metrics by (sum|min|max)")
551+
parser.add_argument("--merge-timers",
552+
default=False,
553+
action="store_true",
554+
help="Merge timers across modules/targets/etc.")
555+
parser.add_argument("--divide-by",
556+
default=1,
557+
metavar="D",
558+
type=int,
559+
help="Divide stats by D (to take an average)")
531560
parser.add_argument("--markdown",
532561
default=False,
533562
action="store_true",

0 commit comments

Comments
 (0)