From 8db2f33713ff0925e98978fe90d904d498663979 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Wed, 4 Sep 2019 15:03:05 +1000 Subject: [PATCH 1/4] added support to execute downloaded notebooks --- sphinxcontrib/jupyter/__init__.py | 1 + sphinxcontrib/jupyter/builders/jupyter.py | 95 +++++++++++++++------ sphinxcontrib/jupyter/writers/execute_nb.py | 36 ++++---- 3 files changed, 87 insertions(+), 45 deletions(-) diff --git a/sphinxcontrib/jupyter/__init__.py b/sphinxcontrib/jupyter/__init__.py index ed99a1f2..ca6a5797 100644 --- a/sphinxcontrib/jupyter/__init__.py +++ b/sphinxcontrib/jupyter/__init__.py @@ -61,6 +61,7 @@ def setup(app): app.add_config_value("jupyter_theme_path", "theme", "jupyter") app.add_config_value("jupyter_template_path", "templates", "jupyter") app.add_config_value("jupyter_dependencies", None, "jupyter") + app.add_config_value("jupyter_download_nb_execute", None, "jupyter") # Jupyter Directive app.add_node(jupyter_node, html=(_noop, _noop), latex=(_noop, _noop)) diff --git a/sphinxcontrib/jupyter/builders/jupyter.py b/sphinxcontrib/jupyter/builders/jupyter.py index 7da2bde7..83cbe47e 100644 --- a/sphinxcontrib/jupyter/builders/jupyter.py +++ b/sphinxcontrib/jupyter/builders/jupyter.py @@ -35,6 +35,16 @@ class JupyterBuilder(Builder): logger = logging.getLogger(__name__) def init(self): + ### initializing required classes + self._execute_notebook_class = ExecuteNotebookWriter(self) + self._make_site_class = MakeSiteWriter(self) + self.executedir = self.outdir + '/executed' + self.reportdir = self.outdir + '/reports/' + self.errordir = self.outdir + "/reports/{}" + self.downloadsdir = self.outdir + "/_downloads" + self.downloadsExecutedir = self.downloadsdir + "/executed" + self.client = None + # Check default language is defined in the jupyter kernels def_lng = self.config["jupyter_default_lang"] if def_lng not in self.config["jupyter_kernels"]: @@ -68,20 +78,35 @@ def init(self): # start a dask client to process the notebooks efficiently. # processes = False. This is sometimes preferable if you want to avoid inter-worker communication and your computations release the GIL. This is common when primarily using NumPy or Dask Array. - if ("jupyter_make_site" in self.config and self.config["jupyter_execute_notebooks"]): + if (self.config["jupyter_execute_notebooks"]): self.client = Client(processes=False, threads_per_worker = self.threads_per_worker, n_workers = self.n_workers) - self.dependency_lists = self.config["jupyter_dependency_lists"] - self.executed_notebooks = [] - self.delayed_notebooks = dict() - self.futures = [] - self.delayed_futures = [] - - ### initializing required classes - self._execute_notebook_class = ExecuteNotebookWriter(self) - self._make_site_class = MakeSiteWriter(self) - self.executedir = self.outdir + '/executed' - self.reportdir = self.outdir + '/reports/' - self.errordir = self.outdir + "/reports/{}" + self.execution_vars = { + 'target': 'website', + 'dependency_lists': self.config["jupyter_dependency_lists"], + 'executed_notebooks': [], + 'delayed_notebooks': dict(), + 'futures': [], + 'delayed_futures': [], + 'destination': self.executedir + } + # self.dependency_lists = self.config["jupyter_dependency_lists"] + # self.executed_notebooks = [] + # self.delayed_notebooks = dict() + # self.futures = [] + # self.delayed_futures = [] + + if (self.config["jupyter_download_nb_execute"]): + if self.client is None: + self.client = Client(processes=False, threads_per_worker = self.threads_per_worker, n_workers = self.n_workers) + self.download_execution_vars = { + 'target': 'downloads', + 'dependency_lists': self.config["jupyter_dependency_lists"], + 'executed_notebooks': [], + 'delayed_notebooks': dict(), + 'futures': [], + 'delayed_futures': [], + 'destination': self.downloadsExecutedir + } def get_outdated_docs(self): for docname in self.env.found_docs: @@ -134,6 +159,14 @@ def write_doc(self, docname, doctree): except (IOError, OSError) as err: self.warn("error writing file %s: %s" % (outfilename, err)) + ### executing downloaded notebooks + if (self.config['jupyter_download_nb_execute']): + strDocname = str(docname) + if strDocname in self.download_execution_vars['dependency_lists'].keys(): + self.download_execution_vars['delayed_notebooks'].update({strDocname: self.writer.output}) + else: + self._execute_notebook_class.execute_notebook(self, self.writer.output, docname, self.download_execution_vars, self.download_execution_vars['futures']) + ### output notebooks for executing self.writer._set_ref_urlpath(None) self.writer._set_jupyter_download_nb_image_urlpath(None) @@ -142,10 +175,10 @@ def write_doc(self, docname, doctree): ### execute the notebook if (self.config["jupyter_execute_notebooks"]): strDocname = str(docname) - if strDocname in self.dependency_lists.keys(): - self.delayed_notebooks.update({strDocname: self.writer.output}) + if strDocname in self.execution_vars['dependency_lists'].keys(): + self.execution_vars['delayed_notebooks'].update({strDocname: self.writer.output}) else: - self._execute_notebook_class.execute_notebook(self, self.writer.output, docname, self.futures) + self._execute_notebook_class.execute_notebook(self, self.writer.output, docname, self.execution_vars, self.execution_vars['futures']) else: #do not execute if (self.config['jupyter_generate_html']): @@ -188,26 +221,34 @@ def copy_static_files(self): def finish(self): - self.finish_tasks.add_task(self.copy_static_files) if (self.config["jupyter_execute_notebooks"]): + self.finish_tasks.add_task(self.copy_static_files) + self.save_executed_and_generate_coverage(self.execution_vars,'website', self.config['jupyter_make_coverage']) + + if (self.config["jupyter_download_nb_execute"]): + self.finish_tasks.add_task(self.copy_static_files) + self.save_executed_and_generate_coverage(self.download_execution_vars, 'downloads') + + ### create a website folder + if "jupyter_make_site" in self.config and self.config['jupyter_make_site']: + self._make_site_class.build_website(self) + + def save_executed_and_generate_coverage(self, params, target, coverage = False): + # watch progress of the execution of futures - self.logger.info(bold("Starting notebook execution and html conversion(if set in config)...")) + self.logger.info(bold("Starting notebook execution for %s and html conversion(if set in config)..."), target) #progress(self.futures) # save executed notebook - error_results = self._execute_notebook_class.save_executed_notebook(self) + error_results = self._execute_notebook_class.save_executed_notebook(self, params) ##generate coverage if config value set - if self.config['jupyter_make_coverage']: + if coverage: ## produces a JSON file of dask execution - self._execute_notebook_class.produce_dask_processing_report(self) + self._execute_notebook_class.produce_dask_processing_report(self, params) ## generate the JSON code execution reports file - error_results = self._execute_notebook_class.produce_code_execution_report(self, error_results) - - self._execute_notebook_class.create_coverage_report(self, error_results) + error_results = self._execute_notebook_class.produce_code_execution_report(self, error_results, params) - ### create a website folder - if "jupyter_make_site" in self.config and self.config['jupyter_make_site']: - self._make_site_class.build_website(self) + self._execute_notebook_class.create_coverage_report(self, error_results, params) diff --git a/sphinxcontrib/jupyter/writers/execute_nb.py b/sphinxcontrib/jupyter/writers/execute_nb.py index a40b4fa2..04165636 100644 --- a/sphinxcontrib/jupyter/writers/execute_nb.py +++ b/sphinxcontrib/jupyter/writers/execute_nb.py @@ -20,7 +20,7 @@ class ExecuteNotebookWriter(): startFlag = 0 def __init__(self, builderSelf): pass - def execute_notebook(self, builderSelf, f, filename, futures): + def execute_notebook(self, builderSelf, f, filename, params, futures): execute_nb_config = builderSelf.config["jupyter_execute_nb"] coverage = builderSelf.config["jupyter_make_coverage"] timeout = execute_nb_config["timeout"] @@ -43,9 +43,9 @@ def execute_notebook(self, builderSelf, f, filename, futures): # - Parse Directories and execute them - # if coverage: - self.execution_cases(builderSelf, builderSelf.executedir, False, subdirectory, language, futures, nb, filename, full_path) + self.execution_cases(builderSelf, params['destination'], False, subdirectory, language, futures, nb, filename, full_path) else: - self.execution_cases(builderSelf, builderSelf.executedir, True, subdirectory, language, futures, nb, filename, full_path) + self.execution_cases(builderSelf, params['destination'], True, subdirectory, language, futures, nb, filename, full_path) def execution_cases(self, builderSelf, directory, allow_errors, subdirectory, language, futures, nb, filename, full_path): ## function to handle the cases of execution for coverage reports or html conversion pipeline @@ -88,7 +88,7 @@ def task_execution_time(self, builderSelf): computing_time = time_tuple[2] - time_tuple[1] return computing_time - def check_execution_completion(self, builderSelf, future, nb, error_results, count, total_count, futures_name): + def check_execution_completion(self, builderSelf, future, nb, error_results, count, total_count, futures_name, params): error_result = [] builderSelf.dask_log['futures'].append(str(future)) status = 'pass' @@ -118,19 +118,19 @@ def check_execution_completion(self, builderSelf, future, nb, error_results, cou executed_nb['metadata']['download_nb_path'] = builderSelf.config['jupyter_download_nb_urlpath'] if (futures_name.startswith('delayed') != -1): # adding in executed notebooks list - builderSelf.executed_notebooks.append(filename) + params['executed_notebooks'].append(filename) key_to_delete = False - for nb, arr in builderSelf.dependency_lists.items(): + for nb, arr in params['dependency_lists'].items(): executed = 0 for elem in arr: - if elem in builderSelf.executed_notebooks: + if elem in params['executed_notebooks']: executed += 1 if (executed == len(arr)): key_to_delete = nb - notebook = builderSelf.delayed_notebooks.get(nb) - builderSelf._execute_notebook_class.execute_notebook(builderSelf, notebook, nb, builderSelf.delayed_futures) + notebook = params['delayed_notebooks'].get(nb) + builderSelf._execute_notebook_class.execute_notebook(builderSelf, notebook, nb, params, params['delayed_futures']) if (key_to_delete): - del builderSelf.dependency_lists[str(key_to_delete)] + del params['dependency_lists'][str(key_to_delete)] key_to_delete = False notebook_name = "{}.ipynb".format(filename) executed_notebook_path = os.path.join(passed_metadata['path'], notebook_name) @@ -146,7 +146,7 @@ def check_execution_completion(self, builderSelf, future, nb, error_results, cou ## generate html if needed if (builderSelf.config['jupyter_generate_html']): - builderSelf._convert_class.convert(executed_nb, filename, language_info, builderSelf.executedir, passed_metadata['path']) + builderSelf._convert_class.convert(executed_nb, filename, language_info, params['destination'], passed_metadata['path']) print('({}/{}) {} -- {} -- {:.2f}s'.format(count, total_count, filename, status, computing_time)) @@ -160,7 +160,7 @@ def check_execution_completion(self, builderSelf, future, nb, error_results, cou results['language'] = language_info error_results.append(results) - def save_executed_notebook(self, builderSelf): + def save_executed_notebook(self, builderSelf, params): error_results = [] builderSelf.dask_log['scheduler_info'] = builderSelf.client.scheduler_info() @@ -171,19 +171,19 @@ def save_executed_notebook(self, builderSelf): builderSelf._convert_class = convertToHtmlWriter(builderSelf) # this for loop gathers results in the background - total_count = len(builderSelf.futures) + total_count = len(params['futures']) count = 0 update_count_delayed = 1 - for future, nb in as_completed(builderSelf.futures, with_results=True, raise_errors=False): + for future, nb in as_completed(params['futures'], with_results=True, raise_errors=False): count += 1 - builderSelf._execute_notebook_class.check_execution_completion(builderSelf, future, nb, error_results, count, total_count, 'futures') + builderSelf._execute_notebook_class.check_execution_completion(builderSelf, future, nb, error_results, count, total_count, 'futures', params) - for future, nb in as_completed(builderSelf.delayed_futures, with_results=True, raise_errors=False): + for future, nb in as_completed(params['delayed_futures'], with_results=True, raise_errors=False): count += 1 if update_count_delayed == 1: update_count_delayed = 0 - total_count += len(builderSelf.delayed_futures) - builderSelf._execute_notebook_class.check_execution_completion(builderSelf, future, nb, error_results, count, total_count, 'delayed_futures') + total_count += len(params['delayed_futures']) + builderSelf._execute_notebook_class.check_execution_completion(builderSelf, future, nb, error_results, count, total_count, 'delayed_futures', params) return error_results From 8d0b35006badc5613c06fbe0ca58cbd1e3f96d91 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Thu, 5 Sep 2019 11:39:41 +1000 Subject: [PATCH 2/4] passing params parameter to coverage --- sphinxcontrib/jupyter/builders/jupyter.py | 8 ++------ sphinxcontrib/jupyter/writers/execute_nb.py | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/sphinxcontrib/jupyter/builders/jupyter.py b/sphinxcontrib/jupyter/builders/jupyter.py index 83cbe47e..fc4e49c1 100644 --- a/sphinxcontrib/jupyter/builders/jupyter.py +++ b/sphinxcontrib/jupyter/builders/jupyter.py @@ -78,6 +78,7 @@ def init(self): # start a dask client to process the notebooks efficiently. # processes = False. This is sometimes preferable if you want to avoid inter-worker communication and your computations release the GIL. This is common when primarily using NumPy or Dask Array. + if (self.config["jupyter_execute_notebooks"]): self.client = Client(processes=False, threads_per_worker = self.threads_per_worker, n_workers = self.n_workers) self.execution_vars = { @@ -89,11 +90,6 @@ def init(self): 'delayed_futures': [], 'destination': self.executedir } - # self.dependency_lists = self.config["jupyter_dependency_lists"] - # self.executed_notebooks = [] - # self.delayed_notebooks = dict() - # self.futures = [] - # self.delayed_futures = [] if (self.config["jupyter_download_nb_execute"]): if self.client is None: @@ -147,7 +143,7 @@ def write_doc(self, docname, doctree): ### print an output for downloading notebooks as well with proper links if variable is set if "jupyter_download_nb" in self.config and self.config["jupyter_download_nb"]: - outfilename = os.path.join(self.outdir + "/_downloads", os_path(docname) + self.out_suffix) + outfilename = os.path.join(self.downloadsdir, os_path(docname) + self.out_suffix) ensuredir(os.path.dirname(outfilename)) self.writer._set_ref_urlpath(self.config["jupyter_download_nb_urlpath"]) self.writer._set_jupyter_download_nb_image_urlpath((self.config["jupyter_download_nb_image_urlpath"])) diff --git a/sphinxcontrib/jupyter/writers/execute_nb.py b/sphinxcontrib/jupyter/writers/execute_nb.py index 04165636..26db5818 100644 --- a/sphinxcontrib/jupyter/writers/execute_nb.py +++ b/sphinxcontrib/jupyter/writers/execute_nb.py @@ -187,7 +187,7 @@ def save_executed_notebook(self, builderSelf, params): return error_results - def produce_code_execution_report(self, builderSelf, error_results, fln = "code-execution-results.json"): + def produce_code_execution_report(self, builderSelf, error_results, params, fln = "code-execution-results.json"): """ Updates the JSON file that contains the results of the execution of each notebook. """ @@ -260,7 +260,7 @@ def produce_code_execution_report(self, builderSelf, error_results, fln = "code- except IOError: self.logger.warning("Unable to save lecture status JSON file. Does the {} directory exist?".format(builderSelf.reportdir)) - def produce_dask_processing_report(self, builderSelf, fln= "dask-reports.json"): + def produce_dask_processing_report(self, builderSelf, params, fln= "dask-reports.json"): """ produces a report of dask execution """ @@ -280,7 +280,7 @@ def produce_dask_processing_report(self, builderSelf, fln= "dask-reports.json"): except IOError: self.logger.warning("Unable to save dask reports JSON file. Does the {} directory exist?".format(builderSelf.reportdir)) - def create_coverage_report(self, builderSelf, error_results): + def create_coverage_report(self, builderSelf, error_results, params): """ Creates a coverage report of the errors in notebook """ From 1b60ef7e1b2ed66ffb51ce7a4c2592b90882fb47 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Fri, 6 Sep 2019 15:13:50 +1000 Subject: [PATCH 3/4] converting to html only when target is website --- sphinxcontrib/jupyter/writers/execute_nb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/jupyter/writers/execute_nb.py b/sphinxcontrib/jupyter/writers/execute_nb.py index 26db5818..baf11ee6 100644 --- a/sphinxcontrib/jupyter/writers/execute_nb.py +++ b/sphinxcontrib/jupyter/writers/execute_nb.py @@ -145,7 +145,7 @@ def check_execution_completion(self, builderSelf, future, nb, error_results, cou nbformat.write(executed_nb, f) ## generate html if needed - if (builderSelf.config['jupyter_generate_html']): + if (builderSelf.config['jupyter_generate_html'] and params['target'] == 'website'): builderSelf._convert_class.convert(executed_nb, filename, language_info, params['destination'], passed_metadata['path']) print('({}/{}) {} -- {} -- {:.2f}s'.format(count, total_count, filename, status, computing_time)) @@ -167,7 +167,7 @@ def save_executed_notebook(self, builderSelf, params): builderSelf.dask_log['futures'] = [] ## create an instance of the class id config set - if (builderSelf.config['jupyter_generate_html']): + if (builderSelf.config['jupyter_generate_html'] and params['target'] == 'website'): builderSelf._convert_class = convertToHtmlWriter(builderSelf) # this for loop gathers results in the background From 896daf164318a3283a4701bbe98ab33e4c7881a6 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Tue, 10 Sep 2019 15:34:28 +1000 Subject: [PATCH 4/4] copying dependencies for downloads folder and ipynb set for wbesite from executed downloads --- sphinxcontrib/jupyter/builders/jupyter.py | 3 +++ sphinxcontrib/jupyter/writers/make_site.py | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/jupyter/builders/jupyter.py b/sphinxcontrib/jupyter/builders/jupyter.py index fc4e49c1..909b012f 100644 --- a/sphinxcontrib/jupyter/builders/jupyter.py +++ b/sphinxcontrib/jupyter/builders/jupyter.py @@ -135,6 +135,9 @@ def prepare_writing(self, docnames): ## copies the dependencies to the executed folder copy_dependencies(self, self.executedir) + if (self.config["jupyter_download_nb_execute"]): + copy_dependencies(self, self.downloadsExecutedir) + def write_doc(self, docname, doctree): # work around multiple string % tuple issues in docutils; # replace tuples in attribute values with lists diff --git a/sphinxcontrib/jupyter/writers/make_site.py b/sphinxcontrib/jupyter/writers/make_site.py index ae0c0db5..9013a8c0 100644 --- a/sphinxcontrib/jupyter/writers/make_site.py +++ b/sphinxcontrib/jupyter/writers/make_site.py @@ -73,8 +73,12 @@ def build_website(self, builderSelf): ## copies the downloads folder if "jupyter_download_nb" in builderSelf.config and builderSelf.config["jupyter_download_nb"]: - if os.path.exists(builderSelf.outdir + "/_downloads"): - shutil.copytree(builderSelf.outdir + "/_downloads", self.downloadipynbdir, symlinks=True) + if builderSelf.config["jupyter_download_nb_execute"]: + sourceDownloads = builderSelf.outdir + "/_downloads/executed" + else: + sourceDownloads = builderSelf.outdir + "/_downloads" + if os.path.exists(sourceDownloads): + shutil.copytree(sourceDownloads, self.downloadipynbdir, symlinks=True) else: self.logger.warning("Downloads folder not created during build")