Skip to content
107 changes: 84 additions & 23 deletions misc/perf_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* Create a temp clone of the mypy repo for each target commit to measure
* Checkout a target commit in each of the clones
* Compile mypyc in each of the clones *in parallel*
* Create another temp clone of the mypy repo as the code to check
* Create another temp clone of the first provided revision (or, with -r, a foreign repo) as the code to check
* Self check with each of the compiled mypys N times
* Report the average runtimes and relative performance
* Remove the temp clones
Expand Down Expand Up @@ -44,13 +44,15 @@ def build_mypy(target_dir: str) -> None:
subprocess.run(cmd, env=env, check=True, cwd=target_dir)


def clone(target_dir: str, commit: str | None) -> None:
heading(f"Cloning mypy to {target_dir}")
repo_dir = os.getcwd()
def clone(target_dir: str, commit: str | None, repo_source: str | None = None) -> None:
source_name = repo_source or "mypy"
heading(f"Cloning {source_name} to {target_dir}")
if repo_source is None:
repo_source = os.getcwd()
if os.path.isdir(target_dir):
print(f"{target_dir} exists: deleting")
shutil.rmtree(target_dir)
subprocess.run(["git", "clone", repo_dir, target_dir], check=True)
subprocess.run(["git", "clone", repo_source, target_dir], check=True)
if commit:
subprocess.run(["git", "checkout", commit], check=True, cwd=target_dir)

Expand All @@ -64,7 +66,7 @@ def edit_python_file(fnam: str) -> None:


def run_benchmark(
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None
compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None
) -> float:
cache_dir = os.path.join(compiled_dir, ".mypy_cache")
if os.path.isdir(cache_dir) and not incremental:
Expand All @@ -76,6 +78,8 @@ def run_benchmark(
cmd = [sys.executable, "-m", "mypy"]
if code:
cmd += ["-c", code]
elif foreign:
pass
else:
cmd += ["--config-file", os.path.join(abschk, "mypy_self_check.ini")]
cmd += glob.glob(os.path.join(abschk, "mypy/*.py"))
Expand All @@ -86,18 +90,33 @@ def run_benchmark(
edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
t0 = time.time()
# Ignore errors, since some commits being measured may generate additional errors.
subprocess.run(cmd, cwd=compiled_dir, env=env)
if foreign:
subprocess.run(cmd, cwd=check_dir, env=env)
else:
subprocess.run(cmd, cwd=compiled_dir, env=env)
return time.time() - t0


def main() -> None:
parser = argparse.ArgumentParser()
whole_program_time_0 = time.time()
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=__doc__,
epilog="Remember: you usually want the first argument to this command to be 'master'.",
)
parser.add_argument(
"--incremental",
default=False,
action="store_true",
help="measure incremental run (fully cached)",
)
parser.add_argument(
"--dont-setup",
default=False,
action="store_true",
help="don't make the clones or compile mypy, just run the performance measurement benchmark "
+ "(this will fail unless the clones already exist, such as from a previous run that was canceled before it deleted them)",
)
parser.add_argument(
"--num-runs",
metavar="N",
Expand All @@ -112,42 +131,65 @@ def main() -> None:
type=int,
help="set maximum number of parallel builds (default=8)",
)
parser.add_argument(
"-r",
metavar="FOREIGN_REPOSITORY",
default=None,
type=str,
help="measure time to typecheck the project at FOREIGN_REPOSITORY instead of mypy self-check; "
+ "the provided value must be the URL or path of a git repo "
+ "(note that this script will take no special steps to *install* the foreign repo, so you will probably get a lot of missing import errors)",
)
parser.add_argument(
"-c",
metavar="CODE",
default=None,
type=str,
help="measure time to type check Python code fragment instead of mypy self-check",
)
parser.add_argument("commit", nargs="+", help="git revision to measure (e.g. branch name)")
parser.add_argument(
"commit",
nargs="+",
help="git revision(s), e.g. branch name or commit id, to measure the performance of",
)
args = parser.parse_args()
incremental: bool = args.incremental
dont_setup: bool = args.dont_setup
commits = args.commit
num_runs: int = args.num_runs + 1
max_workers: int = args.j
code: str | None = args.c
foreign_repo: str | None = args.r

if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
sys.exit("error: Run this the mypy repo root")
sys.exit("error: You must run this script from the mypy repo root")

target_dirs = []
for i, commit in enumerate(commits):
target_dir = f"mypy.{i}.tmpdir"
target_dirs.append(target_dir)
clone(target_dir, commit)
if not dont_setup:
clone(target_dir, commit)

self_check_dir = "mypy.self.tmpdir"
clone(self_check_dir, commits[0])
if foreign_repo:
check_dir = "mypy.foreign.tmpdir"
if not dont_setup:
clone(check_dir, None, foreign_repo)
else:
check_dir = "mypy.self.tmpdir"
if not dont_setup:
clone(check_dir, commits[0])

heading("Compiling mypy")
print("(This will take a while...)")
if not dont_setup:
heading("Compiling mypy")
print("(This will take a while...)")

with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(build_mypy, target_dir) for target_dir in target_dirs]
for future in as_completed(futures):
future.result()

print(f"Finished compiling mypy ({len(commits)} builds)")
print(f"Finished compiling mypy ({len(commits)} builds)")

heading("Performing measurements")

Expand All @@ -160,7 +202,13 @@ def main() -> None:
items = list(enumerate(commits))
random.shuffle(items)
for i, commit in items:
tt = run_benchmark(target_dirs[i], self_check_dir, incremental=incremental, code=code)
tt = run_benchmark(
target_dirs[i],
check_dir,
incremental=incremental,
code=code,
foreign=bool(foreign_repo),
)
# Don't record the first warm-up run
if n > 0:
print(f"{commit}: t={tt:.3f}s")
Expand All @@ -171,15 +219,28 @@ def main() -> None:
first = -1.0
for commit in commits:
tt = statistics.mean(results[commit])
# pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
if first < 0:
delta = "0.0%"
first = tt
else:
d = (tt / first) - 1
delta = f"{d:+.1%}"
print(f"{commit:<25} {tt:.3f}s ({delta})")
print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")

t = int(time.time() - whole_program_time_0)
total_time_taken_formatted = ", ".join(
f"{v} {n if v==1 else n+'s'}"
for v, n in ((t // 3600, "hour"), (t // 60 % 60, "minute"), (t % 60, "second"))
if v
)
print(
"Total time taken by the whole benchmarking program (including any setup):",
total_time_taken_formatted,
)

shutil.rmtree(self_check_dir)
shutil.rmtree(check_dir)
for target_dir in target_dirs:
shutil.rmtree(target_dir)

Expand Down