Improve generate script

nat-n · nat-n · commit 3185c670981d · 2020-06-15T00:19:07.000+02:00
- Fix issue with __pycache__ dirs getting picked up
- parallelise code generation with asyncio for 3x speedup
- silence protoc output unless -v option is supplied
- Use pathlib ;)
diff --git a/betterproto/tests/generate.py b/betterproto/tests/generate.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
-import glob
+import asyncio
 import os
+from pathlib import Path
 import shutil
 import subprocess
 import sys
@@ -20,91 +21,122 @@
 os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 
 
-def clear_directory(path: str):
-    for file_or_directory in glob.glob(os.path.join(path, "*")):
-        if os.path.isdir(file_or_directory):
+def clear_directory(dir_path: Path):
+    for file_or_directory in dir_path.glob("*"):
+        if file_or_directory.is_dir():
             shutil.rmtree(file_or_directory)
         else:
-            os.remove(file_or_directory)
+            file_or_directory.unlink()
 
 
-def generate(whitelist: Set[str]):
-    path_whitelist = {os.path.realpath(e) for e in whitelist if os.path.exists(e)}
-    name_whitelist = {e for e in whitelist if not os.path.exists(e)}
+async def generate(whitelist: Set[str], verbose: bool):
+    test_case_names = set(get_directories(inputs_path)) - {"__pycache__"}
 
-    test_case_names = set(get_directories(inputs_path))
-
-    failed_test_cases = []
+    path_whitelist = set()
+    name_whitelist = set()
+    for item in whitelist:
+        if item in test_case_names:
+            name_whitelist.add(item)
+            continue
+        path_whitelist.add(item)
 
+    generation_tasks = []
     for test_case_name in sorted(test_case_names):
-        test_case_input_path = os.path.realpath(
-            os.path.join(inputs_path, test_case_name)
-        )
-
+        test_case_input_path = inputs_path.joinpath(test_case_name).resolve()
         if (
             whitelist
-            and test_case_input_path not in path_whitelist
+            and str(test_case_input_path) not in path_whitelist
             and test_case_name not in name_whitelist
         ):
             continue
+        generation_tasks.append(
+            generate_test_case_output(test_case_input_path, test_case_name, verbose)
+        )
 
-        print(f"Generating output for {test_case_name}")
-        try:
-            generate_test_case_output(test_case_name, test_case_input_path)
-        except subprocess.CalledProcessError as e:
+    failed_test_cases = []
+    # Wait for all subprocs and match any failures to names to report
+    for test_case_name, result in zip(
+        sorted(test_case_names), await asyncio.gather(*generation_tasks)
+    ):
+        if result != 0:
             failed_test_cases.append(test_case_name)
 
     if failed_test_cases:
-        sys.stderr.write("\nFailed to generate the following test cases:\n")
+        sys.stderr.write(
+            "\n\033[31;1;4mFailed to generate the following test cases:\033[0m\n"
+        )
         for failed_test_case in failed_test_cases:
             sys.stderr.write(f"- {failed_test_case}\n")
 
 
-def generate_test_case_output(test_case_name, test_case_input_path=None):
-    if not test_case_input_path:
-        test_case_input_path = os.path.realpath(
-            os.path.join(inputs_path, test_case_name)
-        )
+async def generate_test_case_output(
+    test_case_input_path: Path, test_case_name: str, verbose: bool
+) -> int:
+    """
+    Returns the max of the subprocess return values
+    """
 
-    test_case_output_path_reference = os.path.join(
-        output_path_reference, test_case_name
-    )
-    test_case_output_path_betterproto = os.path.join(
-        output_path_betterproto, test_case_name
-    )
+    test_case_output_path_reference = output_path_reference.joinpath(test_case_name)
+    test_case_output_path_betterproto = output_path_betterproto.joinpath(test_case_name)
 
     os.makedirs(test_case_output_path_reference, exist_ok=True)
     os.makedirs(test_case_output_path_betterproto, exist_ok=True)
 
     clear_directory(test_case_output_path_reference)
     clear_directory(test_case_output_path_betterproto)
 
-    protoc_reference(test_case_input_path, test_case_output_path_reference)
-    protoc_plugin(test_case_input_path, test_case_output_path_betterproto)
+    (
+        (ref_out, ref_err, ref_code),
+        (plg_out, plg_err, plg_code),
+    ) = await asyncio.gather(
+        protoc_reference(test_case_input_path, test_case_output_path_reference),
+        protoc_plugin(test_case_input_path, test_case_output_path_betterproto),
+    )
+
+    message = f"Generated output for {test_case_name!r}"
+    if verbose:
+        print(f"\033[31;1;4m{message}\033[0m")
+        if ref_out:
+            sys.stdout.buffer.write(ref_out)
+        if ref_err:
+            sys.stderr.buffer.write(ref_err)
+        if plg_out:
+            sys.stdout.buffer.write(plg_out)
+        if plg_err:
+            sys.stderr.buffer.write(plg_err)
+        sys.stdout.buffer.flush()
+        sys.stderr.buffer.flush()
+    else:
+        print(message)
+
+    return max(ref_code, plg_code)
 
 
 HELP = "\n".join(
-    [
-        "Usage: python generate.py",
-        "       python generate.py [DIRECTORIES or NAMES]",
+    (
+        "Usage: python generate.py [-h] [-v] [DIRECTORIES or NAMES]",
         "Generate python classes for standard tests.",
         "",
         "DIRECTORIES    One or more relative or absolute directories of test-cases to generate classes for.",
         "               python generate.py inputs/bool inputs/double inputs/enum",
         "",
         "NAMES          One or more test-case names to generate classes for.",
         "               python generate.py bool double enums",
-    ]
+    )
 )
 
 
 def main():
     if set(sys.argv).intersection({"-h", "--help"}):
         print(HELP)
         return
-    whitelist = set(sys.argv[1:])
-
-    generate(whitelist)
+    if sys.argv[1:2] == ["-v"]:
+        verbose = True
+        whitelist = set(sys.argv[2:])
+    else:
+        verbose = False
+        whitelist = set(sys.argv[1:])
+    asyncio.get_event_loop().run_until_complete(generate(whitelist, verbose))
 
 
 if __name__ == "__main__":
diff --git a/betterproto/tests/test_inputs.py b/betterproto/tests/test_inputs.py
@@ -23,7 +23,7 @@
 
 class TestCases:
     def __init__(self, path, services: Set[str], xfail: Set[str]):
-        _all = set(get_directories(path))
+        _all = set(get_directories(path)) - {"__pycache__"}
         _services = services
         _messages = _all - services
         _messages_with_json = {
diff --git a/betterproto/tests/util.py b/betterproto/tests/util.py
@@ -1,23 +1,24 @@
+import asyncio
 import os
-import subprocess
-from typing import Generator
+from pathlib import Path
+from typing import Generator, IO, Optional
 
 os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 
-root_path = os.path.dirname(os.path.realpath(__file__))
-inputs_path = os.path.join(root_path, "inputs")
-output_path_reference = os.path.join(root_path, "output_reference")
-output_path_betterproto = os.path.join(root_path, "output_betterproto")
+root_path = Path(__file__).resolve().parent
+inputs_path = root_path.joinpath("inputs")
+output_path_reference = root_path.joinpath("output_reference")
+output_path_betterproto = root_path.joinpath("output_betterproto")
 
 if os.name == "nt":
-    plugin_path = os.path.join(root_path, "..", "plugin.bat")
+    plugin_path = root_path.joinpath("..", "plugin.bat").resolve()
 else:
-    plugin_path = os.path.join(root_path, "..", "plugin.py")
+    plugin_path = root_path.joinpath("..", "plugin.py").resolve()
 
 
-def get_files(path, end: str) -> Generator[str, None, None]:
+def get_files(path, suffix: str) -> Generator[str, None, None]:
     for r, dirs, files in os.walk(path):
-        for filename in [f for f in files if f.endswith(end)]:
+        for filename in [f for f in files if f.endswith(suffix)]:
             yield os.path.join(r, filename)
 
 
@@ -27,36 +28,30 @@ def get_directories(path):
             yield directory
 
 
-def relative(file: str, path: str):
-    return os.path.join(os.path.dirname(file), path)
-
-
-def read_relative(file: str, path: str):
-    with open(relative(file, path)) as fh:
-        return fh.read()
-
-
-def protoc_plugin(path: str, output_dir: str) -> subprocess.CompletedProcess:
-    return subprocess.run(
+async def protoc_plugin(path: str, output_dir: str):
+    proc = await asyncio.create_subprocess_shell(
         f"protoc --plugin=protoc-gen-custom={plugin_path} --custom_out={output_dir} --proto_path={path} {path}/*.proto",
-        shell=True,
-        check=True,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
     )
+    return (*(await proc.communicate()), proc.returncode)
 
 
-def protoc_reference(path: str, output_dir: str):
-    subprocess.run(
+async def protoc_reference(path: str, output_dir: str):
+    proc = await asyncio.create_subprocess_shell(
         f"protoc --python_out={output_dir} --proto_path={path} {path}/*.proto",
-        shell=True,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
     )
+    return (*(await proc.communicate()), proc.returncode)
 
 
-def get_test_case_json_data(test_case_name, json_file_name=None):
+def get_test_case_json_data(test_case_name: str, json_file_name: Optional[str] = None):
     test_data_file_name = json_file_name if json_file_name else f"{test_case_name}.json"
-    test_data_file_path = os.path.join(inputs_path, test_case_name, test_data_file_name)
+    test_data_file_path = inputs_path.joinpath(test_case_name, test_data_file_name)
 
-    if not os.path.exists(test_data_file_path):
+    if not test_data_file_path.exists():
         return None
 
-    with open(test_data_file_path) as fh:
+    with test_data_file_path.open("r") as fh:
         return fh.read()