python · nascheme · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/Lib/test/pgo_task/__init__.py b/Lib/test/pgo_task/__init__.py
@@ -0,0 +1,25 @@
+# Most of the bm_*.py files are based on the corresponding benchmark from the
+# "pyperformance" project.  They have been slightly modified to be better
+# suited as a PGO task. The pyperformance code is licensed under the terms
+# stated below.
+#
+# The MIT License
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
diff --git a/Lib/test/pgo_task/__main__.py b/Lib/test/pgo_task/__main__.py
@@ -0,0 +1,46 @@
+import sys
+import glob
+import importlib
+import os
+import time
+import argparse
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-n',
+        '--iterations',
+        type=int,
+        default=1,
+        help='Number of iterations to run.',
+    )
+    parser.add_argument('tasks', nargs='*', help='Name of tasks to run.')
+    args = parser.parse_args()
+    cmdline_tasks = set(args.tasks)
+    tasks = []
+    package_dir = os.path.dirname(__file__)
+    for fn in glob.glob(os.path.join(package_dir, 'bm_*.py')):
+        tasks.append(fn)
+    total_time = 0
+    print('Running PGO tasks...')
+    for fn in sorted(tasks):
+        name, ext = os.path.splitext(os.path.basename(fn))
+        if cmdline_tasks and name not in cmdline_tasks:
+            continue
+        module = importlib.import_module(f'test.pgo_task.{name}')
+        if not hasattr(module, 'run_pgo'):
+            print('task module missing run_pgo()', fn)
+            continue
+        t0 = time.perf_counter()
+        print(f'{name:>40}', end='', flush=True)
+        for _ in range(args.iterations):
+            module.run_pgo()
+        tm = time.perf_counter() - t0
+        total_time += tm
+        print(f' {tm:.3f}s')
+    print(f'Total time for tasks {total_time:.3f} seconds')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/Lib/test/pgo_task/bm_argparse.py b/Lib/test/pgo_task/bm_argparse.py
@@ -0,0 +1,120 @@
+"""
+Benchmark argparse programs with:
+1) multiple subparsers, each with their own subcommands, and then parse a series of command-line arguments.
+2) a large number of optional arguments, and then parse a series of command-line arguments.
+
+Author: Savannah Ostrowski
+"""
+
+import argparse
+
+
+def generate_arguments(i: int) -> list:
+    arguments = ["input.txt", "output.txt"]
+    for i in range(i):
+        arguments.extend([f"--option{i}", f"value{i}"])
+    return arguments
+
+
+def bm_many_optionals() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="A version control system CLI")
+
+    parser.add_argument("--version", action="version", version="1.0")
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    add_parser = subparsers.add_parser("add", help="Add a file to the repository")
+    add_parser.add_argument("files", nargs="+", help="List of files to add to staging")
+
+    commit_parser = subparsers.add_parser(
+        "commit", help="Commit changes to the repository"
+    )
+    commit_parser.add_argument("-m", "--message", required=True, help="Commit message")
+
+    commit_group = commit_parser.add_mutually_exclusive_group(required=False)
+    commit_group.add_argument(
+        "--amend", action="store_true", help="Amend the last commit"
+    )
+    commit_group.add_argument(
+        "--no-edit", action="store_true", help="Reuse the last commit message"
+    )
+
+    push_parser = subparsers.add_parser(
+        "push", help="Push changes to remote repository"
+    )
+
+    network_group = push_parser.add_argument_group("Network options")
+    network_group.add_argument("--dryrun", action="store_true", help="Simulate changes")
+    network_group.add_argument(
+        "--timeout", type=int, default=30, help="Timeout in seconds"
+    )
+
+    auth_group = push_parser.add_argument_group("Authentication options")
+    auth_group.add_argument(
+        "--username", required=True, help="Username for authentication"
+    )
+    auth_group.add_argument(
+        "--password", required=True, help="Password for authentication"
+    )
+
+    global_group = parser.add_mutually_exclusive_group()
+    global_group.add_argument("--verbose", action="store_true", help="Verbose output")
+    global_group.add_argument("--quiet", action="store_true", help="Quiet output")
+
+    argument_lists = [
+        ["--verbose", "add", "file1.txt", "file2.txt"],
+        ["add", "file1.txt", "file2.txt"],
+        ["commit", "-m", "Initial commit"],
+        ["commit", "-m", "Add new feature", "--amend"],
+        [
+            "push",
+            "--dryrun",
+            "--timeout",
+            "60",
+            "--username",
+            "user",
+            "--password",
+            "pass",
+        ],
+    ]
+
+    for arguments in argument_lists:
+        parser.parse_args(arguments)
+
+
+def bm_subparsers() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("input_file", type=str, help="The input file")
+    parser.add_argument("output_file", type=str, help="The output file")
+
+    for i in range(1000):
+        parser.add_argument(f"--option{i}", type=str, help=f"Optional argument {i}")
+
+    argument_lists = [
+        generate_arguments(500),
+        generate_arguments(1000),
+    ]
+
+    for args in argument_lists:
+        parser.parse_args(args)
+
+
+BENCHMARKS = {
+    "many_optionals": bm_many_optionals,
+    "subparsers": bm_subparsers,
+}
+
+
+def add_cmdline_args(cmd, args):
+    cmd.append(args.benchmark)
+
+
+def add_parser_args(parser):
+    parser.add_argument("benchmark", choices=BENCHMARKS, help="Which benchmark to run.")
+
+
+def run_pgo():
+    for bm_func in BENCHMARKS.values():
+        for _ in range(10):
+            bm_func()
diff --git a/Lib/test/pgo_task/bm_async_generators.py b/Lib/test/pgo_task/bm_async_generators.py
@@ -0,0 +1,43 @@
+"""
+Benchmark recursive async generators implemented in python
+by traversing a binary tree.
+
+Author: Kumar Aditya
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+
+
+class Tree:
+    def __init__(self, left: Tree | None, value: int, right: Tree | None) -> None:
+        self.left = left
+        self.value = value
+        self.right = right
+
+    async def __aiter__(self) -> AsyncIterator[int]:
+        if self.left:
+            async for i in self.left:
+                yield i
+        yield self.value
+        if self.right:
+            async for i in self.right:
+                yield i
+
+
+def tree(input: range) -> Tree | None:
+    n = len(input)
+    if n == 0:
+        return None
+    i = n // 2
+    return Tree(tree(input[:i]), input[i], tree(input[i + 1:]))
+
+async def bench_async_generators(loops=100000) -> None:
+    async for _ in tree(range(loops)):
+        pass
+
+def run_pgo():
+    import asyncio
+    for _ in range(10):
+        asyncio.run(bench_async_generators(1000))
diff --git a/Lib/test/pgo_task/bm_async_tree.py b/Lib/test/pgo_task/bm_async_tree.py
@@ -0,0 +1,151 @@
+"""
+Benchmark for async tree workload, which calls asyncio.gather() on a tree
+(6 levels deep, 6 branches per level) with the leaf nodes simulating some
+(potentially) async work (depending on the benchmark variant). Benchmark
+variants include:
+
+1) "none": No actual async work in the async tree.
+2) "io": All leaf nodes simulate async IO workload (async sleep 50ms).
+3) "memoization": All leaf nodes simulate async IO workload with 90% of
+                  the data memoized
+4) "cpu_io_mixed": Half of the leaf nodes simulate CPU-bound workload and
+                   the other half simulate the same workload as the
+                   "memoization" variant.
+
+All variants also have an "eager" flavor that uses the asyncio eager task
+factory (if available), and a "tg" variant that uses TaskGroups.
+"""
+
+
+import asyncio
+import math
+import random
+
+NUM_RECURSE_LEVELS = 5
+NUM_RECURSE_BRANCHES = 5
+RANDOM_SEED = 0
+IO_SLEEP_TIME = 0.02
+MEMOIZABLE_PERCENTAGE = 90
+CPU_PROBABILITY = 0.5
+FACTORIAL_N = 500
+
+
+class AsyncTree:
+    def __init__(self, use_task_groups=False):
+        self.cache = {}
+        self.use_task_groups = use_task_groups
+        # set to deterministic random, so that the results are reproducible
+        random.seed(RANDOM_SEED)
+
+    async def mock_io_call(self):
+        await asyncio.sleep(IO_SLEEP_TIME)
+
+    async def workload_func(self):
+        raise NotImplementedError(
+            "To be implemented by each variant's derived class."
+        )
+
+    async def recurse_with_gather(self, recurse_level):
+        if recurse_level == 0:
+            await self.workload_func()
+            return
+
+        await asyncio.gather(
+            *[self.recurse_with_gather(recurse_level - 1)
+              for _ in range(NUM_RECURSE_BRANCHES)]
+        )
+
+    async def recurse_with_task_group(self, recurse_level):
+        if recurse_level == 0:
+            await self.workload_func()
+            return
+
+        async with asyncio.TaskGroup() as tg:
+            for _ in range(NUM_RECURSE_BRANCHES):
+                tg.create_task(
+                    self.recurse_with_task_group(recurse_level - 1))
+
+    async def run(self):
+        if self.use_task_groups:
+            await self.recurse_with_task_group(NUM_RECURSE_LEVELS)
+        else:
+            await self.recurse_with_gather(NUM_RECURSE_LEVELS)
+
+
+class EagerMixin:
+    async def run(self):
+        loop = asyncio.get_running_loop()
+        if hasattr(asyncio, 'eager_task_factory'):
+            loop.set_task_factory(asyncio.eager_task_factory)
+        return await super().run()
+
+
+class NoneAsyncTree(AsyncTree):
+    async def workload_func(self):
+        return
+
+
+class EagerAsyncTree(EagerMixin, NoneAsyncTree):
+    pass
+
+
+class IOAsyncTree(AsyncTree):
+    async def workload_func(self):
+        await self.mock_io_call()
+
+
+class EagerIOAsyncTree(EagerMixin, IOAsyncTree):
+    pass
+
+
+class MemoizationAsyncTree(AsyncTree):
+    async def workload_func(self):
+        # deterministic random, seed set in AsyncTree.__init__()
+        data = random.randint(1, 100)
+
+        if data <= MEMOIZABLE_PERCENTAGE:
+            if self.cache.get(data):
+                return data
+
+            self.cache[data] = True
+
+        await self.mock_io_call()
+        return data
+
+
+class EagerMemoizationAsyncTree(EagerMixin, MemoizationAsyncTree):
+    pass
+
+
+class CpuIoMixedAsyncTree(MemoizationAsyncTree):
+    async def workload_func(self):
+        # deterministic random, seed set in AsyncTree.__init__()
+        if random.random() < CPU_PROBABILITY:
+            # mock cpu-bound call
+            return math.factorial(FACTORIAL_N)
+        else:
+            return await MemoizationAsyncTree.workload_func(self)
+
+
+class EagerCpuIoMixedAsyncTree(EagerMixin, CpuIoMixedAsyncTree):
+    pass
+
+
+BENCHMARKS = {
+    "none": NoneAsyncTree,
+    "eager": EagerAsyncTree,
+    "io": IOAsyncTree,
+    "eager_io": EagerIOAsyncTree,
+    "memoization": MemoizationAsyncTree,
+    "eager_memoization": EagerMemoizationAsyncTree,
+    "cpu_io_mixed": CpuIoMixedAsyncTree,
+    "eager_cpu_io_mixed": EagerCpuIoMixedAsyncTree,
+}
+
+
+def run_pgo():
+    for benchmark in BENCHMARKS:
+        async_tree_class = BENCHMARKS[benchmark]
+        for use_task_groups in [True, False]:
+            async_tree = async_tree_class(use_task_groups=use_task_groups)
+            asyncio.run(async_tree.run())