Skip to content

Commit 5718b76

Browse files
committed
[process-stats-dir] Support (re)setting & comparing to CSV baselines.
1 parent 043918f commit 5718b76

File tree

1 file changed

+127
-34
lines changed

1 file changed

+127
-34
lines changed

utils/process-stats-dir.py

Lines changed: 127 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -294,40 +294,127 @@ def show_incrementality(args):
294294
incrementality=pct))
295295

296296

297-
def compare_frontend_stats(args):
298-
assert(len(args.remainder) == 2)
299-
(olddir, newdir) = args.remainder
297+
def diff_and_pct(old, new):
298+
if old == 0:
299+
if new == 0:
300+
return (0, 0.0)
301+
else:
302+
return (new, 100.0)
303+
delta = (new - old)
304+
delta_pct = round((float(delta) / float(old)) * 100.0, 2)
305+
return (delta, delta_pct)
306+
307+
308+
def update_epoch_value(d, name, epoch, value):
309+
changed = 0
310+
if name in d:
311+
(existing_epoch, existing_value) = d[name]
312+
if existing_epoch > epoch:
313+
print("note: keeping newer value %d from epoch %d for %s"
314+
% (existing_value, existing_epoch, name))
315+
epoch = existing_epoch
316+
value = existing_value
317+
elif existing_value == value:
318+
epoch = existing_epoch
319+
else:
320+
(_, delta_pct) = diff_and_pct(existing_value, value)
321+
print ("note: changing value %d -> %d (%.2f%%) for %s" %
322+
(existing_value, value, delta_pct, name))
323+
changed = 1
324+
d[name] = (epoch, value)
325+
return (epoch, value, changed)
326+
327+
328+
def read_stats_dict_from_csv(f):
329+
infieldnames = ["epoch", "name", "value"]
330+
c = csv.DictReader(f, infieldnames,
331+
dialect='excel-tab',
332+
quoting=csv.QUOTE_NONNUMERIC)
333+
d = {}
334+
for row in c:
335+
epoch = int(row["epoch"])
336+
name = row["name"]
337+
value = int(row["value"])
338+
update_epoch_value(d, name, epoch, value)
339+
return d
340+
341+
342+
# The idea here is that a "baseline" is a (tab-separated) CSV file full of
343+
# the counters you want to track, each prefixed by an epoch timestamp of
344+
# the last time the value was reset.
345+
#
346+
# When you set a fresh baseline, all stats in the provided stats dir are
347+
# written to the baseline. When you set against an _existing_ baseline,
348+
# only the counters mentioned in the existing baseline are updated, and
349+
# only if their values differ.
350+
#
351+
# Finally, since it's a line-oriented CSV file, you can put:
352+
#
353+
# mybaseline.csv merge=union
354+
#
355+
# in your .gitattributes file, and forget about merge conflicts. The reader
356+
# function above will take the later epoch anytime it detects duplicates,
357+
# so union-merging is harmless. Duplicates will be eliminated whenever the
358+
# next baseline-set is done.
359+
def set_csv_baseline(args):
360+
existing = None
361+
if os.path.exists(args.set_csv_baseline):
362+
with open(args.set_csv_baseline, "r") as f:
363+
existing = read_stats_dict_from_csv(f)
364+
print ("updating %d baseline entries in %s" %
365+
(len(existing), args.set_csv_baseline))
366+
else:
367+
print "making new baseline " + args.set_csv_baseline
368+
fieldnames = ["epoch", "name", "value"]
369+
with open(args.set_csv_baseline, "wb") as f:
370+
out = csv.DictWriter(f, fieldnames, dialect='excel-tab',
371+
quoting=csv.QUOTE_NONNUMERIC)
372+
m = merge_all_jobstats([s for d in args.remainder
373+
for s in load_stats_dir(d)])
374+
changed = 0
375+
newepoch = int(time.time())
376+
for name in sorted(m.stats.keys()):
377+
epoch = newepoch
378+
value = m.stats[name]
379+
if existing is not None:
380+
if name not in existing:
381+
continue
382+
(epoch, value, chg) = update_epoch_value(existing, name,
383+
epoch, value)
384+
changed += chg
385+
out.writerow(dict(epoch=int(epoch),
386+
name=name,
387+
value=int(value)))
388+
if existing is not None:
389+
print "changed %d entries in baseline" % changed
390+
return 0
391+
392+
393+
def compare_to_csv_baseline(args):
394+
old_stats = read_stats_dict_from_csv(args.compare_to_csv_baseline)
395+
m = merge_all_jobstats([s for d in args.remainder
396+
for s in load_stats_dir(d)])
397+
new_stats = m.stats
300398

301399
regressions = 0
302-
fieldnames = ["old", "new", "delta_pct", "name"]
303-
out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
400+
outfieldnames = ["old", "new", "delta_pct", "name"]
401+
out = csv.DictWriter(args.output, outfieldnames, dialect='excel-tab')
304402
out.writeheader()
305403

306-
old_stats = load_stats_dir(olddir)
307-
new_stats = load_stats_dir(newdir)
308-
old_merged = merge_all_jobstats([x for x in old_stats
309-
if x.is_frontend_job()])
310-
new_merged = merge_all_jobstats([x for x in new_stats
311-
if x.is_frontend_job()])
312-
if old_merged is None or new_merged is None:
313-
return regressions
314-
for stat_name in sorted(old_merged.stats.keys()):
315-
if stat_name in new_merged.stats:
316-
old = old_merged.stats[stat_name]
317-
new = new_merged.stats.get(stat_name, 0)
318-
if old == 0 or new == 0:
319-
continue
320-
delta = (new - old)
321-
delta_pct = round((float(delta) / float(old)) * 100.0, 2)
322-
if (stat_name.startswith("time.") and
323-
abs(delta) < args.delta_usec_thresh):
324-
continue
325-
if abs(delta_pct) < args.delta_pct_thresh:
326-
continue
327-
out.writerow(dict(name=stat_name, old=old, new=new,
328-
delta_pct=delta_pct))
329-
if delta > 0:
330-
regressions += 1
404+
for stat_name in sorted(old_stats.keys()):
405+
(_, old) = old_stats[stat_name]
406+
new = new_stats.get(stat_name, 0)
407+
(delta, delta_pct) = diff_and_pct(old, new)
408+
if (stat_name.startswith("time.") and
409+
abs(delta) < args.delta_usec_thresh):
410+
continue
411+
if abs(delta_pct) < args.delta_pct_thresh:
412+
continue
413+
out.writerow(dict(name=stat_name,
414+
old=int(old), new=int(new),
415+
delta_pct=delta_pct))
416+
if delta > 0:
417+
regressions += 1
331418
return regressions
332419

333420

@@ -364,8 +451,12 @@ def main():
364451
help="emit a 'catapult'-compatible trace of events")
365452
modes.add_argument("--incrementality", action="store_true",
366453
help="summarize the 'incrementality' of a build")
367-
modes.add_argument("--compare-frontend-stats", action="store_true",
368-
help="Compare frontend stats from two stats-dirs")
454+
modes.add_argument("--set-csv-baseline", type=str, default=None,
455+
help="Merge stats from a stats-dir into a CSV baseline")
456+
modes.add_argument("--compare-to-csv-baseline",
457+
type=argparse.FileType('rb', 0),
458+
metavar="BASELINE.csv",
459+
help="Compare stats dir to named CSV baseline")
369460
modes.add_argument("--lnt", action="store_true",
370461
help="Emit an LNT-compatible test summary")
371462
parser.add_argument('remainder', nargs=argparse.REMAINDER,
@@ -377,8 +468,10 @@ def main():
377468
return 1
378469
if args.catapult:
379470
write_catapult_trace(args)
380-
elif args.compare_frontend_stats:
381-
return compare_frontend_stats(args)
471+
elif args.set_csv_baseline is not None:
472+
return set_csv_baseline(args)
473+
elif args.compare_to_csv_baseline:
474+
return compare_to_csv_baseline(args)
382475
elif args.incrementality:
383476
if args.paired:
384477
show_paired_incrementality(args)

0 commit comments

Comments
 (0)