Upload tutorials output to a csv in 'artifacts' branch (#1695)

esantorella · facebook-github-bot · commit 68547512e047 · 2023-02-21T11:42:37.000-08:00
Summary: ## Motivation Writes the runtime and memory output we already produce to the 'artifacts' branch. The upload happens when the tutorials run on push and in the nightly cron. An example will show here after the nightly cron finishes running: https://github.com/pytorch/botorch/tree/artifacts/tutorial_performance_data Currently there are a couple test files in there, which I plan to clean up. Pull Request resolved: #1695 Test Plan: [x] Check that the upload works "with smoke test" by setting it to run on a push to this branch: https://github.com/pytorch/botorch/actions/runs/4226339782 [x] Check that it runs in the nightly cron and that output looks as expected: https://github.com/pytorch/botorch/blob/artifacts/tutorial_performance_data/standard_590b6edd_2023-02-20%2019%3A09%3A44.557008.csv Reviewed By: saitcakmak Differential Revision: D43446909 Pulled By: esantorella fbshipit-source-id: c84912d42316e75d7e5c40caa6dc40d4716e205a
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
@@ -131,3 +131,4 @@ jobs:
       smoke_test: false
       use_stable_pytorch_gpytorch: false
       use_stable_ax: false
+      upload_artifact: true
diff --git a/.github/workflows/reusable_tutorials.yml b/.github/workflows/reusable_tutorials.yml
@@ -12,6 +12,9 @@ on:
       use_stable_ax:
         required: true
         type: boolean
+      upload_artifact:
+        required: true
+        type: boolean
   workflow_call:
     inputs:
       smoke_test:
@@ -26,6 +29,10 @@ on:
         required: false
         type: boolean
         default: false
+      upload_artifact:
+        required: false
+        type: boolean
+        default: false
 
 jobs:
   tutorials:
@@ -81,3 +88,14 @@ jobs:
       name: Run tutorials without smoke test
       run: |
         python scripts/run_tutorials.py -p "$(pwd)"
+    - if: ${{ inputs.upload_artifact }}
+      name: Upload performance data to artifacts branch
+      # Upload any csv files (the tutorials script ensures there is only one)
+      run: |
+        git config user.email "github-actions@github.com"
+        git config user.name "github-actions"
+        git switch artifacts
+        mv *.csv tutorial_performance_data/
+        git add tutorial_performance_data/*.csv
+        git commit tutorial_performance_data/*.csv -m "Adding most recent tutorials output"
+        git push origin artifacts
diff --git a/.github/workflows/tutorials_smoke_test_on_pr.yml b/.github/workflows/tutorials_smoke_test_on_pr.yml
@@ -1,8 +1,6 @@
-name: Tutorials
+name: Tutorials on PR
 
 on:
-  push:
-    branches: [ main ]
   pull_request:
     branches: [ main ]
   workflow_dispatch:
@@ -16,3 +14,4 @@ jobs:
       smoke_test: true
       use_stable_pytorch_gpytorch: false
       use_stable_ax: false
+      upload_artifact: false
diff --git a/.github/workflows/tutorials_smoke_test_on_push.yml b/.github/workflows/tutorials_smoke_test_on_push.yml
@@ -0,0 +1,17 @@
+name: Tutorials on Push
+
+on:
+  push:
+    branches: [ main ]
+  workflow_dispatch:
+
+
+jobs:
+  run_tutorials_with_smoke_test:
+    name: Run tutorials with smoke test on latest PyTorch / GPyTorch / Ax
+    uses: ./.github/workflows/reusable_tutorials.yml
+    with:
+      smoke_test: true
+      use_stable_pytorch_gpytorch: false
+      use_stable_ax: false
+      upload_artifact: true
diff --git a/scripts/run_tutorials.py b/scripts/run_tutorials.py
@@ -7,15 +7,17 @@
 from __future__ import annotations
 
 import argparse
+import datetime
 import os
 import subprocess
 import tempfile
 import time
 from pathlib import Path
 from subprocess import CalledProcessError
-from typing import Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
 import nbformat
+import pandas as pd
 from memory_profiler import memory_usage
 from nbconvert import PythonExporter
 
@@ -40,6 +42,30 @@
 }
 
 
+def _read_command_line_output(command: str) -> str:
+    output = subprocess.run(command.split(" "), stdout=subprocess.PIPE).stdout.decode(
+        "utf-8"
+    )
+    return output
+
+
+def get_mode_as_str(smoke_test: bool) -> str:
+    return "smoke-test" if smoke_test else "standard"
+
+
+def get_output_file_path(smoke_test: bool) -> str:
+    """
+    On push and in the nightly cron, a csv will be uploaded to
+    https://github.com/pytorch/botorch/tree/artifacts/tutorial_performance_data .
+    So file name contains time (for uniqueness) and commit hash (for debugging)
+    """
+    commit_hash = _read_command_line_output("git rev-parse --short HEAD").strip("\n")
+    time = str(datetime.datetime.now())
+    mode = get_mode_as_str(smoke_test=smoke_test)
+    fname = f"{mode}_{commit_hash}_{time}.csv"
+    return fname
+
+
 def parse_ipynb(file: Path) -> str:
     with open(file, "r") as nb_file:
         nb_str = nb_file.read()
@@ -68,7 +94,13 @@ def run_script(script: str, env: Optional[Dict[str, str]] = None) -> None:
     return run_out
 
 
-def run_tutorial(tutorial: Path, smoke_test: bool = False) -> Optional[str]:
+def run_tutorial(
+    tutorial: Path, smoke_test: bool = False
+) -> Tuple[Optional[str], Dict[str, Any]]:
+    """
+    Runs the tutorial in a subprocess, catches any raised errors and returns
+    them as a string, and returns runtime and memory information as a dict.
+    """
     script = parse_ipynb(tutorial)
     tic = time.monotonic()
     print(f"Running tutorial {tutorial.name}.")
@@ -78,12 +110,13 @@ def run_tutorial(tutorial: Path, smoke_test: bool = False) -> Optional[str]:
             (run_script, (script,), {"env": env}), retval=True, include_children=True
         )
     except subprocess.TimeoutExpired:
-        return f"Tutorial {tutorial.name} exceeded the maximum runtime of 30 minutes."
+        error = f"Tutorial {tutorial.name} exceeded the maximum runtime of 30 minutes."
+        return error, {}
 
     try:
         run_out.check_returncode()
     except CalledProcessError:
-        return "\n".join(
+        error = "\n".join(
             [
                 f"Encountered error running tutorial {tutorial.name}:",
                 "stdout:",
@@ -92,11 +125,15 @@ def run_tutorial(tutorial: Path, smoke_test: bool = False) -> Optional[str]:
                 run_out.stderr,
             ]
         )
+        return error, {}
     runtime = time.monotonic() - tic
-    print(
-        f"Running tutorial {tutorial.name} took {runtime:.2f} seconds. Memory usage "
-        f"started at {mem_usage[0]} MB and the maximum was {max(mem_usage)} MB."
-    )
+    performance_info = {
+        "runtime": runtime,
+        "start_mem": mem_usage[0],
+        "max_mem": max(mem_usage),
+    }
+
+    return None, performance_info
 
 
 def run_tutorials(
@@ -105,7 +142,25 @@ def run_tutorials(
     smoke_test: bool = False,
     name: Optional[str] = None,
 ) -> None:
-    print(f"Running tutorial(s) in {'smoke test' if smoke_test else 'standard'} mode.")
+    """
+    Run each tutorial, print statements on how it ran, and write a data set as a csv
+    to a directory.
+    """
+    mode = "smoke test" if smoke_test else "standard"
+    results_already_stored = (
+        elt
+        for elt in os.listdir()
+        if elt[-4:] == ".csv" and elt.split("_")[0] in ("smoke-test", "standard")
+    )
+    for fname in results_already_stored:
+        raise RuntimeError(
+            f"There are already tutorial results files stored, such as {fname}. "
+            "This is not allowed because GitHub Actions will look for all "
+            "tutorial results files and write them to the 'artifacts' branch. "
+            "Please remove all files matching pattern "
+            "'standard_*.csv' or 'smoke-test_*.csv' in the current directory."
+        )
+    print(f"Running tutorial(s) in {mode} mode.")
     if not smoke_test:
         print("This may take a long time...")
     tutorial_dir = Path(repo_dir).joinpath("tutorials")
@@ -120,20 +175,47 @@ def run_tutorials(
         tutorials = [t for t in tutorials if t.name == name]
         if len(tutorials) == 0:
             raise RuntimeError(f"Specified tutorial {name} not found in directory.")
+
+    df = pd.DataFrame(
+        {
+            "name": [t.name for t in tutorials],
+            "ran_successfully": False,
+            "runtime": float("nan"),
+            "start_mem": float("nan"),
+            "max_mem": float("nan"),
+        }
+    ).set_index("name")
+
     for tutorial in tutorials:
         if not include_ignored and tutorial.name in ignored_tutorials:
             print(f"Ignoring tutorial {tutorial.name}.")
             continue
         num_runs += 1
-        error = run_tutorial(tutorial, smoke_test=smoke_test)
-        if error is not None:
+        error, performance_info = run_tutorial(tutorial, smoke_test=smoke_test)
+        if error:
             num_errors += 1
             print(error)
+        else:
+            print(
+                f"Running tutorial {tutorial.name} took "
+                f"{performance_info['runtime']:.2f} seconds. Memory usage "
+                f"started at {performance_info['start_mem']} MB and the maximum"
+                f" was {performance_info['max_mem']} MB."
+            )
+            df.loc[tutorial.name, "ran_successfully"] = True
+            for k in ["runtime", "start_mem", "max_mem"]:
+                df.loc[tutorial.name, k] = performance_info[k]
+        print(df)
+
     if num_errors > 0:
         raise RuntimeError(
             f"Running {num_runs} tutorials resulted in {num_errors} errors."
         )
 
+    fname = get_output_file_path(smoke_test=smoke_test)
+    print(f"Writing report to {fname}.")
+    df.to_csv(fname)
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run the tutorials.")