Skip to content

Commit c9871ba

Browse files
committed
Fix perf period calculation
1 parent 4b956d2 commit c9871ba

File tree

3 files changed

+86
-21
lines changed

3 files changed

+86
-21
lines changed

bench_runner/scripts/profiling_plot.py

Lines changed: 82 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from collections import defaultdict
1010
import csv
1111
import functools
12+
import json
1213
from operator import itemgetter
1314
from pathlib import Path
1415
import re
@@ -25,6 +26,9 @@
2526

2627
SANITY_CHECK = True
2728

29+
# Must match the value in _benchmark.src.yml
30+
PERF_PERIOD = 1000000
31+
2832

2933
# Categories of functions, where each value is a list of regular expressions.
3034
# These are matched in-order.
@@ -40,6 +44,7 @@
4044
"_PyPegen_.+",
4145
"_PyStack_.+",
4246
"_PyVectorcall_.+",
47+
"_TAIL_CALL_.+",
4348
"advance",
4449
"call_instrumentation_vector.*",
4550
"initialize_locals",
@@ -289,7 +294,7 @@ def handle_benchmark(
289294
md: IO[str],
290295
results: defaultdict[str, defaultdict[str, float]],
291296
categories: defaultdict[str, defaultdict[tuple[str, str], float]],
292-
):
297+
) -> float:
293298
csv_path = Path(csv_path)
294299

295300
stem = csv_path.stem.split(".", 1)[0]
@@ -314,6 +319,7 @@ def handle_benchmark(
314319
tainted_pids.add(pid)
315320

316321
times = defaultdict(float)
322+
total = 0.0
317323
with csv_path.open(newline="") as fd:
318324
csvreader = csv.reader(fd)
319325
for _ in csvreader:
@@ -327,18 +333,14 @@ def handle_benchmark(
327333
continue
328334

329335
self_time = float(self_time)
330-
if self_time > 1.0:
331-
print(f"{stem} Invalid data")
332336
if obj == "[JIT]":
333337
times[("[JIT]", "jit")] += self_time
334338
else:
335339
times[(obj, sym)] += self_time
340+
total += self_time
336341

337-
total = sum(times.values())
338-
assert total <= 1.0
339342
scale = 1.0 / total
340-
rows = [(v * scale, k[0], k[1]) for k, v in times.items()]
341-
rows.sort(reverse=True)
343+
rows = sorted(((v, *k) for k, v in times.items()), reverse=True)
342344

343345
for self_time, obj, sym in rows:
344346
if self_time <= 0.0:
@@ -349,8 +351,11 @@ def handle_benchmark(
349351

350352
results[stem][category] += self_time
351353

352-
if self_time >= 0.0025:
353-
md.write(f"| {self_time:.2%} | `{obj}` | `{sym}` | {category} |\n")
354+
scaled_time = self_time * scale
355+
if scaled_time >= 0.0025:
356+
md.write(f"| {scaled_time:.2%} | `{obj}` | `{sym}` | {category} |\n")
357+
358+
return total
354359

355360

356361
def plot_bargraph(
@@ -414,6 +419,62 @@ def plot_pie(categories: list[tuple[float, str]], output_filename: PathLike):
414419
fig.savefig(output_filename, dpi=200)
415420

416421

422+
def handle_tail_call_stats(
423+
input_dir: PathLike,
424+
categories: defaultdict[str, defaultdict[tuple[str, str], float]],
425+
output_prefix: PathLike,
426+
):
427+
input_dir = Path(input_dir)
428+
output_prefix = Path(output_prefix)
429+
430+
tail_call_stats = defaultdict(float)
431+
total_time = 0.0
432+
for (_, sym), self_time in categories["interpreter"].items():
433+
if (bytecode := sym.removeprefix("_TAIL_CALL_")) != sym:
434+
tail_call_stats[bytecode] += self_time
435+
total_time += self_time
436+
437+
if len(tail_call_stats) == 0:
438+
return
439+
440+
pystats_file = input_dir / "pystats.json"
441+
442+
if not pystats_file.is_file():
443+
print("No pystats.json file found. Skipping tail call stats.")
444+
return
445+
446+
with pystats_file.open() as fd:
447+
pystats = json.load(fd)
448+
449+
pystats_bytecodes = defaultdict(int)
450+
total_count = 0
451+
for key, val in pystats.items():
452+
if match := re.match(r"opcode\[(.+)\]\.execution_count", key):
453+
pystats_bytecodes[match.group(1)] += val
454+
total_count += val
455+
456+
with open(output_prefix.with_suffix(".tail_calls.csv"), "w") as csvfile:
457+
writer = csv.writer(csvfile, dialect="unix")
458+
writer.writerow(
459+
["Bytecode", "% time", "count", "% count", "time per count (μs)"]
460+
)
461+
for bytecode, periods in sorted(
462+
tail_call_stats.items(), key=itemgetter(1), reverse=True
463+
):
464+
count = pystats_bytecodes[bytecode]
465+
if count == 0:
466+
continue
467+
writer.writerow(
468+
[
469+
bytecode,
470+
f"{periods / total_time:.02%}",
471+
count,
472+
f"{count / total_count:.02%}",
473+
f"{((periods / PERF_PERIOD) / count) * 1e6:03f}",
474+
]
475+
)
476+
477+
417478
def _main(input_dir: PathLike, output_prefix: PathLike):
418479
input_dir = Path(input_dir)
419480
output_prefix = Path(output_prefix)
@@ -425,35 +486,38 @@ def _main(input_dir: PathLike, output_prefix: PathLike):
425486
print("No profiling data. Skipping.")
426487
return
427488

489+
total = 0.0
428490
with output_prefix.with_suffix(".md").open("w") as md:
429491
for csv_path in sorted(input_dir.glob("*.csv")):
430-
handle_benchmark(csv_path, md, results, categories)
492+
if ".tail_calls.csv" in csv_path.name:
493+
continue
494+
total += handle_benchmark(csv_path, md, results, categories)
431495

432496
sorted_categories = sorted(
433-
[
434-
(sum(val.values()) / len(results), key)
435-
for (key, val) in categories.items()
436-
],
497+
[(sum(val.values()), key) for (key, val) in categories.items()],
437498
reverse=True,
438499
)
439500

440501
md.write("\n\n## Categories\n")
441-
for total, category in sorted_categories:
502+
for category_total, category in sorted_categories:
442503
matches = categories[category]
443504
md.write(f"\n### {category}\n\n")
444-
md.write(f"{total:.2%} total\n\n")
505+
md.write(f"{category_total / total:.2%} total\n\n")
445506
md.write("| percentage | object | symbol |\n")
446507
md.write("| ---: | :--- | :--- |\n")
447508
for (obj, sym), self_time in sorted(
448509
matches.items(), key=itemgetter(1), reverse=True
449510
):
450-
if self_time < 0.0025:
511+
self_fraction = self_time / total
512+
if self_fraction < 0.000025:
451513
break
452-
md.write(f"| {self_time / len(results):.2%} | {obj} | {sym} |\n")
514+
md.write(f"| {self_fraction:.2%} | {obj} | {sym} |\n")
453515

454516
plot_bargraph(results, sorted_categories, output_prefix.with_suffix(".svg"))
455517
plot_pie(sorted_categories, output_prefix.with_suffix(".pie.svg"))
456518

519+
handle_tail_call_stats(input_dir, categories, output_prefix)
520+
457521

458522
def main():
459523
parser = argparse.ArgumentParser(

bench_runner/scripts/run_benchmarks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,9 @@ def perf_to_csv(lines: Iterable[str], output: PathLike):
189189
else:
190190
_, period, command, _, symbol, shared, _ = line.split(maxsplit=6)
191191
pid, command = command.split(":")
192-
self_time = float(int(period)) / total
193-
if self_time > 0.0:
194-
rows.append([self_time, pid, command, shared, symbol])
192+
period = float(period)
193+
if period > 0.0:
194+
rows.append([period, pid, command, shared, symbol])
195195

196196
rows.sort(key=itemgetter(0), reverse=True)
197197

bench_runner/templates/_benchmark.src.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ jobs:
225225
- name: Tune for (Linux) perf
226226
if: ${{ steps.should_run.outputs.should_run != 'false' && inputs.perf }}
227227
run: |
228+
# Must match the PERF_PERIOD value in profiling_plot.py
228229
sudo bash -c "echo 100000 > /proc/sys/kernel/perf_event_max_sample_rate"
229230
- name: Running pyperformance
230231
if: ${{ steps.should_run.outputs.should_run != 'false' }}

0 commit comments

Comments
 (0)