Skip to content

Commit d05d57a

Browse files
authored
Merge pull request swiftlang#10057 from graydon/process-stats-dir-baselines
Add CSV "baseline" support to process-stats-dir
2 parents 8c32c0d + 5718b76 commit d05d57a

File tree

1 file changed

+131
-38
lines changed

1 file changed

+131
-38
lines changed

utils/process-stats-dir.py

Lines changed: 131 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -225,18 +225,18 @@ def write_lnt_values(args):
225225
json.dump(j, args.output, indent=4)
226226
else:
227227
url = args.lnt_submit
228-
print "\nSubmitting to LNT server: " + url
228+
print "\nsubmitting to LNT server: " + url
229229
json_report = {'input_data': json.dumps(j), 'commit': '1'}
230230
data = urllib.urlencode(json_report)
231231
response_str = urllib2.urlopen(urllib2.Request(url, data))
232232
response = json.loads(response_str.read())
233233
print "### response:"
234234
print response
235235
if 'success' in response:
236-
print "Server response:\tSuccess"
236+
print "server response:\tSuccess"
237237
else:
238-
print "Server response:\tError"
239-
print "Error:\t", response['error']
238+
print "server response:\tError"
239+
print "error:\t", response['error']
240240
sys.exit(1)
241241

242242

@@ -291,40 +291,127 @@ def show_incrementality(args):
291291
incrementality=pct))
292292

293293

294-
def compare_frontend_stats(args):
295-
assert(len(args.remainder) == 2)
296-
(olddir, newdir) = args.remainder
294+
def diff_and_pct(old, new):
295+
if old == 0:
296+
if new == 0:
297+
return (0, 0.0)
298+
else:
299+
return (new, 100.0)
300+
delta = (new - old)
301+
delta_pct = round((float(delta) / float(old)) * 100.0, 2)
302+
return (delta, delta_pct)
303+
304+
305+
def update_epoch_value(d, name, epoch, value):
306+
changed = 0
307+
if name in d:
308+
(existing_epoch, existing_value) = d[name]
309+
if existing_epoch > epoch:
310+
print("note: keeping newer value %d from epoch %d for %s"
311+
% (existing_value, existing_epoch, name))
312+
epoch = existing_epoch
313+
value = existing_value
314+
elif existing_value == value:
315+
epoch = existing_epoch
316+
else:
317+
(_, delta_pct) = diff_and_pct(existing_value, value)
318+
print ("note: changing value %d -> %d (%.2f%%) for %s" %
319+
(existing_value, value, delta_pct, name))
320+
changed = 1
321+
d[name] = (epoch, value)
322+
return (epoch, value, changed)
323+
324+
325+
def read_stats_dict_from_csv(f):
326+
infieldnames = ["epoch", "name", "value"]
327+
c = csv.DictReader(f, infieldnames,
328+
dialect='excel-tab',
329+
quoting=csv.QUOTE_NONNUMERIC)
330+
d = {}
331+
for row in c:
332+
epoch = int(row["epoch"])
333+
name = row["name"]
334+
value = int(row["value"])
335+
update_epoch_value(d, name, epoch, value)
336+
return d
337+
338+
339+
# The idea here is that a "baseline" is a (tab-separated) CSV file full of
340+
# the counters you want to track, each prefixed by an epoch timestamp of
341+
# the last time the value was reset.
342+
#
343+
# When you set a fresh baseline, all stats in the provided stats dir are
344+
# written to the baseline. When you set against an _existing_ baseline,
345+
# only the counters mentioned in the existing baseline are updated, and
346+
# only if their values differ.
347+
#
348+
# Finally, since it's a line-oriented CSV file, you can put:
349+
#
350+
# mybaseline.csv merge=union
351+
#
352+
# in your .gitattributes file, and forget about merge conflicts. The reader
353+
# function above will take the later epoch anytime it detects duplicates,
354+
# so union-merging is harmless. Duplicates will be eliminated whenever the
355+
# next baseline-set is done.
356+
def set_csv_baseline(args):
357+
existing = None
358+
if os.path.exists(args.set_csv_baseline):
359+
with open(args.set_csv_baseline, "r") as f:
360+
existing = read_stats_dict_from_csv(f)
361+
print ("updating %d baseline entries in %s" %
362+
(len(existing), args.set_csv_baseline))
363+
else:
364+
print "making new baseline " + args.set_csv_baseline
365+
fieldnames = ["epoch", "name", "value"]
366+
with open(args.set_csv_baseline, "wb") as f:
367+
out = csv.DictWriter(f, fieldnames, dialect='excel-tab',
368+
quoting=csv.QUOTE_NONNUMERIC)
369+
m = merge_all_jobstats([s for d in args.remainder
370+
for s in load_stats_dir(d)])
371+
changed = 0
372+
newepoch = int(time.time())
373+
for name in sorted(m.stats.keys()):
374+
epoch = newepoch
375+
value = m.stats[name]
376+
if existing is not None:
377+
if name not in existing:
378+
continue
379+
(epoch, value, chg) = update_epoch_value(existing, name,
380+
epoch, value)
381+
changed += chg
382+
out.writerow(dict(epoch=int(epoch),
383+
name=name,
384+
value=int(value)))
385+
if existing is not None:
386+
print "changed %d entries in baseline" % changed
387+
return 0
388+
389+
390+
def compare_to_csv_baseline(args):
391+
old_stats = read_stats_dict_from_csv(args.compare_to_csv_baseline)
392+
m = merge_all_jobstats([s for d in args.remainder
393+
for s in load_stats_dir(d)])
394+
new_stats = m.stats
297395

298396
regressions = 0
299-
fieldnames = ["old", "new", "delta_pct", "name"]
300-
out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
397+
outfieldnames = ["old", "new", "delta_pct", "name"]
398+
out = csv.DictWriter(args.output, outfieldnames, dialect='excel-tab')
301399
out.writeheader()
302400

303-
old_stats = load_stats_dir(olddir)
304-
new_stats = load_stats_dir(newdir)
305-
old_merged = merge_all_jobstats([x for x in old_stats
306-
if x.is_frontend_job()])
307-
new_merged = merge_all_jobstats([x for x in new_stats
308-
if x.is_frontend_job()])
309-
if old_merged is None or new_merged is None:
310-
return regressions
311-
for stat_name in sorted(old_merged.stats.keys()):
312-
if stat_name in new_merged.stats:
313-
old = old_merged.stats[stat_name]
314-
new = new_merged.stats.get(stat_name, 0)
315-
if old == 0 or new == 0:
316-
continue
317-
delta = (new - old)
318-
delta_pct = round((float(delta) / float(old)) * 100.0, 2)
319-
if (stat_name.startswith("time.") and
320-
abs(delta) < args.delta_usec_thresh):
321-
continue
322-
if abs(delta_pct) < args.delta_pct_thresh:
323-
continue
324-
out.writerow(dict(name=stat_name, old=old, new=new,
325-
delta_pct=delta_pct))
326-
if delta > 0:
327-
regressions += 1
401+
for stat_name in sorted(old_stats.keys()):
402+
(_, old) = old_stats[stat_name]
403+
new = new_stats.get(stat_name, 0)
404+
(delta, delta_pct) = diff_and_pct(old, new)
405+
if (stat_name.startswith("time.") and
406+
abs(delta) < args.delta_usec_thresh):
407+
continue
408+
if abs(delta_pct) < args.delta_pct_thresh:
409+
continue
410+
out.writerow(dict(name=stat_name,
411+
old=int(old), new=int(new),
412+
delta_pct=delta_pct))
413+
if delta > 0:
414+
regressions += 1
328415
return regressions
329416

330417

@@ -361,8 +448,12 @@ def main():
361448
help="emit a 'catapult'-compatible trace of events")
362449
modes.add_argument("--incrementality", action="store_true",
363450
help="summarize the 'incrementality' of a build")
364-
modes.add_argument("--compare-frontend-stats", action="store_true",
365-
help="Compare frontend stats from two stats-dirs")
451+
modes.add_argument("--set-csv-baseline", type=str, default=None,
452+
help="Merge stats from a stats-dir into a CSV baseline")
453+
modes.add_argument("--compare-to-csv-baseline",
454+
type=argparse.FileType('rb', 0),
455+
metavar="BASELINE.csv",
456+
help="Compare stats dir to named CSV baseline")
366457
modes.add_argument("--lnt", action="store_true",
367458
help="Emit an LNT-compatible test summary")
368459
parser.add_argument('remainder', nargs=argparse.REMAINDER,
@@ -374,8 +465,10 @@ def main():
374465
return 1
375466
if args.catapult:
376467
write_catapult_trace(args)
377-
elif args.compare_frontend_stats:
378-
return compare_frontend_stats(args)
468+
elif args.set_csv_baseline is not None:
469+
return set_csv_baseline(args)
470+
elif args.compare_to_csv_baseline:
471+
return compare_to_csv_baseline(args)
379472
elif args.incrementality:
380473
if args.paired:
381474
show_paired_incrementality(args)

0 commit comments

Comments
 (0)