77from __future__ import annotations
88
99import argparse
10+ import datetime
1011import os
1112import subprocess
1213import tempfile
1314import time
1415from pathlib import Path
1516from subprocess import CalledProcessError
16- from typing import Dict , Optional
17+ from typing import Any , Dict , Optional , Tuple
1718
1819import nbformat
20+ import pandas as pd
1921from memory_profiler import memory_usage
2022from nbconvert import PythonExporter
2123
4042}
4143
4244
45+ def _read_command_line_output (command : str ) -> str :
46+ output = subprocess .run (command .split (" " ), stdout = subprocess .PIPE ).stdout .decode (
47+ "utf-8"
48+ )
49+ return output
50+
51+
52+ def get_mode_as_str (smoke_test : bool ) -> str :
53+ return "smoke-test" if smoke_test else "standard"
54+
55+
56+ def get_output_file_path (smoke_test : bool ) -> str :
57+ """
58+ On push and in the nightly cron, a csv will be uploaded to
59+ https://github.com/pytorch/botorch/tree/artifacts/tutorial_performance_data .
60+ So file name contains time (for uniqueness) and commit hash (for debugging)
61+ """
62+ commit_hash = _read_command_line_output ("git rev-parse --short HEAD" ).strip ("\n " )
63+ time = str (datetime .datetime .now ())
64+ mode = get_mode_as_str (smoke_test = smoke_test )
65+ fname = f"{ mode } _{ commit_hash } _{ time } .csv"
66+ return fname
67+
68+
4369def parse_ipynb (file : Path ) -> str :
4470 with open (file , "r" ) as nb_file :
4571 nb_str = nb_file .read ()
@@ -68,7 +94,13 @@ def run_script(script: str, env: Optional[Dict[str, str]] = None) -> None:
6894 return run_out
6995
7096
71- def run_tutorial (tutorial : Path , smoke_test : bool = False ) -> Optional [str ]:
97+ def run_tutorial (
98+ tutorial : Path , smoke_test : bool = False
99+ ) -> Tuple [Optional [str ], Dict [str , Any ]]:
100+ """
101+ Runs the tutorial in a subprocess, catches any raised errors and returns
102+ them as a string, and returns runtime and memory information as a dict.
103+ """
72104 script = parse_ipynb (tutorial )
73105 tic = time .monotonic ()
74106 print (f"Running tutorial { tutorial .name } ." )
@@ -78,12 +110,13 @@ def run_tutorial(tutorial: Path, smoke_test: bool = False) -> Optional[str]:
78110 (run_script , (script ,), {"env" : env }), retval = True , include_children = True
79111 )
80112 except subprocess .TimeoutExpired :
81- return f"Tutorial { tutorial .name } exceeded the maximum runtime of 30 minutes."
113+ error = f"Tutorial { tutorial .name } exceeded the maximum runtime of 30 minutes."
114+ return error , {}
82115
83116 try :
84117 run_out .check_returncode ()
85118 except CalledProcessError :
86- return "\n " .join (
119+ error = "\n " .join (
87120 [
88121 f"Encountered error running tutorial { tutorial .name } :" ,
89122 "stdout:" ,
@@ -92,11 +125,15 @@ def run_tutorial(tutorial: Path, smoke_test: bool = False) -> Optional[str]:
92125 run_out .stderr ,
93126 ]
94127 )
128+ return error , {}
95129 runtime = time .monotonic () - tic
96- print (
97- f"Running tutorial { tutorial .name } took { runtime :.2f} seconds. Memory usage "
98- f"started at { mem_usage [0 ]} MB and the maximum was { max (mem_usage )} MB."
99- )
130+ performance_info = {
131+ "runtime" : runtime ,
132+ "start_mem" : mem_usage [0 ],
133+ "max_mem" : max (mem_usage ),
134+ }
135+
136+ return None , performance_info
100137
101138
102139def run_tutorials (
@@ -105,7 +142,25 @@ def run_tutorials(
105142 smoke_test : bool = False ,
106143 name : Optional [str ] = None ,
107144) -> None :
108- print (f"Running tutorial(s) in { 'smoke test' if smoke_test else 'standard' } mode." )
145+ """
146+ Run each tutorial, print statements on how it ran, and write a data set as a csv
147+ to a directory.
148+ """
149+ mode = "smoke test" if smoke_test else "standard"
150+ results_already_stored = (
151+ elt
152+ for elt in os .listdir ()
153+ if elt [- 4 :] == ".csv" and elt .split ("_" )[0 ] in ("smoke-test" , "standard" )
154+ )
155+ for fname in results_already_stored :
156+ raise RuntimeError (
157+ f"There are already tutorial results files stored, such as { fname } . "
158+ "This is not allowed because GitHub Actions will look for all "
159+ "tutorial results files and write them to the 'artifacts' branch. "
160+ "Please remove all files matching pattern "
161+ "'standard_*.csv' or 'smoke-test_*.csv' in the current directory."
162+ )
163+ print (f"Running tutorial(s) in { mode } mode." )
109164 if not smoke_test :
110165 print ("This may take a long time..." )
111166 tutorial_dir = Path (repo_dir ).joinpath ("tutorials" )
@@ -120,20 +175,47 @@ def run_tutorials(
120175 tutorials = [t for t in tutorials if t .name == name ]
121176 if len (tutorials ) == 0 :
122177 raise RuntimeError (f"Specified tutorial { name } not found in directory." )
178+
179+ df = pd .DataFrame (
180+ {
181+ "name" : [t .name for t in tutorials ],
182+ "ran_successfully" : False ,
183+ "runtime" : float ("nan" ),
184+ "start_mem" : float ("nan" ),
185+ "max_mem" : float ("nan" ),
186+ }
187+ ).set_index ("name" )
188+
123189 for tutorial in tutorials :
124190 if not include_ignored and tutorial .name in ignored_tutorials :
125191 print (f"Ignoring tutorial { tutorial .name } ." )
126192 continue
127193 num_runs += 1
128- error = run_tutorial (tutorial , smoke_test = smoke_test )
129- if error is not None :
194+ error , performance_info = run_tutorial (tutorial , smoke_test = smoke_test )
195+ if error :
130196 num_errors += 1
131197 print (error )
198+ else :
199+ print (
200+ f"Running tutorial { tutorial .name } took "
201+ f"{ performance_info ['runtime' ]:.2f} seconds. Memory usage "
202+ f"started at { performance_info ['start_mem' ]} MB and the maximum"
203+ f" was { performance_info ['max_mem' ]} MB."
204+ )
205+ df .loc [tutorial .name , "ran_successfully" ] = True
206+ for k in ["runtime" , "start_mem" , "max_mem" ]:
207+ df .loc [tutorial .name , k ] = performance_info [k ]
208+ print (df )
209+
132210 if num_errors > 0 :
133211 raise RuntimeError (
134212 f"Running { num_runs } tutorials resulted in { num_errors } errors."
135213 )
136214
215+ fname = get_output_file_path (smoke_test = smoke_test )
216+ print (f"Writing report to { fname } ." )
217+ df .to_csv (fname )
218+
137219
138220if __name__ == "__main__" :
139221 parser = argparse .ArgumentParser (description = "Run the tutorials." )
0 commit comments