From b6557825287f682f1f9992f39a198108a3e8b1dd Mon Sep 17 00:00:00 2001 From: Kayce Basques Date: Fri, 23 May 2025 15:45:39 -0700 Subject: [PATCH 01/19] feat: add persistent worker for sphinxdocs --- sphinxdocs/private/sphinx.bzl | 87 +++++++++++----- sphinxdocs/private/sphinx_build.py | 158 ++++++++++++++++++++++++++++- 2 files changed, 218 insertions(+), 27 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 678d01bca7..dfda49c6a3 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -15,7 +15,6 @@ """Implementation of sphinx rules.""" load("@bazel_skylib//lib:paths.bzl", "paths") -load("@bazel_skylib//rules:build_test.bzl", "build_test") load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load("//python:py_binary.bzl", "py_binary") load("//python/private:util.bzl", "add_tag", "copy_propagating_kwargs") # buildifier: disable=bzl-visibility @@ -104,6 +103,7 @@ def sphinx_docs( strip_prefix = "", extra_opts = [], tools = [], + use_cache = False, **kwargs): """Generate docs using Sphinx. @@ -166,6 +166,7 @@ def sphinx_docs( source_tree = internal_name + "/_sources", extra_opts = extra_opts, tools = tools, + use_cache = use_cache, **kwargs ) @@ -177,6 +178,9 @@ def sphinx_docs( **common_kwargs ) + common_kwargs_with_manual_tag = dict(common_kwargs) + common_kwargs_with_manual_tag["tags"] = list(common_kwargs.get("tags") or []) + ["manual"] + py_binary( name = name + ".serve", srcs = [_SPHINX_SERVE_MAIN_SRC], @@ -185,17 +189,12 @@ def sphinx_docs( args = [ "$(execpath {})".format(html_name), ], - **common_kwargs + **common_kwargs_with_manual_tag ) sphinx_run( name = name + ".run", docs = name, - ) - - build_test( - name = name + "_build_test", - targets = [name], - **kwargs # kwargs used to pick up target_compatible_with + **common_kwargs_with_manual_tag ) def _sphinx_docs_impl(ctx): @@ -212,6 +211,7 @@ def _sphinx_docs_impl(ctx): source_path = source_dir_path, output_prefix = paths.join(ctx.label.name, "_build"), inputs = inputs, + use_cache = ctx.attr.use_cache, ) outputs[format] = output_dir per_format_args[format] = args_env @@ -243,7 +243,7 @@ _sphinx_docs = rule( ), "sphinx": attr.label( executable = True, - cfg = "exec", + cfg = "host", mandatory = True, doc = "Sphinx binary to generate documentation.", ), @@ -251,18 +251,26 @@ _sphinx_docs = rule( cfg = "exec", doc = "Additional tools that are used by Sphinx and its plugins.", ), + "use_cache": attr.bool( + doc = "TODO", + default = False, + ), "_extra_defines_flag": attr.label(default = "//sphinxdocs:extra_defines"), "_extra_env_flag": attr.label(default = "//sphinxdocs:extra_env"), "_quiet_flag": attr.label(default = "//sphinxdocs:quiet"), }, ) -def _run_sphinx(ctx, format, source_path, inputs, output_prefix): +def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): + output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format)) run_args = [] # Copy of the args to forward along to debug runner args = ctx.actions.args() # Args passed to the action + args.add(source_path) + args.add(output_dir.path) + args.add("--show-traceback") # Full tracebacks on error run_args.append("--show-traceback") args.add("--builder", format) @@ -275,10 +283,14 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix): # Build in parallel, if possible # Don't add to run_args: parallel building breaks interactive debugging args.add("--jobs", "auto") - args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. - run_args.append("--fresh-env") - args.add("--write-all") # Write all files; don't try to detect "changed" files - run_args.append("--write-all") + + if use_cache: + args.add("--doctree-dir", paths.join(output_dir.path, ".doctrees")) + else: + args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. + run_args.append("--fresh-env") + args.add("--write-all") # Write all files; don't try to detect "changed" files + run_args.append("--write-all") for opt in ctx.attr.extra_opts: expanded = ctx.expand_location(opt) @@ -290,8 +302,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix): for define in extra_defines: run_args.extend(("--define", define)) - args.add(source_path) - args.add(output_dir.path) env = dict([ v.split("=", 1) @@ -302,16 +312,41 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix): for tool in ctx.attr.tools: tools.append(tool[DefaultInfo].files_to_run) - ctx.actions.run( - executable = ctx.executable.sphinx, - arguments = [args], - inputs = inputs, - outputs = [output_dir], - tools = tools, - mnemonic = "SphinxBuildDocs", - progress_message = "Sphinx building {} for %{{label}}".format(format), - env = env, - ) + if use_cache: + worker_arg_file = ctx.actions.declare_file(ctx.attr.name + ".worker_args") + ctx.actions.write( + output = worker_arg_file, + content = args, + ) + all_inputs = depset( + direct = [worker_arg_file], + transitive = [inputs] + ) + ctx.actions.run( + executable = ctx.executable.sphinx, + arguments = ["@" + worker_arg_file.path], + inputs = all_inputs, + outputs = [output_dir], + tools = tools, + mnemonic = "SphinxBuildDocsCache", + progress_message = "Sphinx building {} for %{{label}}".format(format), + env = env, + execution_requirements = { + "supports-workers": "1", + "requires-worker-protocol": "json" + } + ) + else: + ctx.actions.run( + executable = ctx.executable.sphinx, + arguments = [args], + inputs = inputs, + outputs = [output_dir], + tools = tools, + mnemonic = "SphinxBuildDocsNoCache", + progress_message = "Sphinx building {} for %{{label}}".format(format), + env = env, + ) return output_dir, struct(args = run_args, env = env) def _sphinx_source_tree_impl(ctx): diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 3b7b32eaf6..a28b46b3e2 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -1,8 +1,164 @@ +from pathlib import Path + +import argparse +import json +import logging import os import pathlib import sys +import time +import traceback +import typing from sphinx.cmd.build import main + +WorkRequest = object +WorkResponse = object + + +parser = argparse.ArgumentParser( + fromfile_prefix_chars='@' +) +# parser.add_argument('srcdir') +# parser.add_argument('outdir') +parser.add_argument("--persistent_worker", action="store_true") +parser.add_argument("--doctree-dir") + + +class Worker: + + def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"): + self._instream = instream + self._outstream = outstream + self._logger = logging.getLogger("worker") + logging.basicConfig(filename='echo.log', encoding='utf-8', level=logging.DEBUG) + self._logger.info("starting worker") + self._current = {} + self._previous = {} + self._cache = {} + + def run(self) -> None: + try: + while True: + request = None + try: + request = self._get_next_request() + if request is None: + self._logger.info("Empty request: exiting") + break + response = self._process_request(request) + if response: + self._send_response(response) + except Exception: + self._logger.exception("Unhandled error: request=%s", request) + output = ( + f"Unhandled error:\nRequest: {request}\n" + + traceback.format_exc() + ) + request_id = 0 if not request else request.get("requestId", 0) + self._send_response( + { + "exitCode": 3, + "output": output, + "requestId": request_id, + } + ) + finally: + self._logger.info("Worker shutting down") + + def _get_next_request(self) -> "object | None": + line = self._instream.readline() + if not line: + return None + return json.loads(line) + + @property + def inputs(self): + self._previous + self._current + return self._value + + def _update_digest(self, request): + args, unknown = parser.parse_known_args(request["arguments"]) + # Make room for the new build's data. + self._previous = self._current + # Rearrange the new data into a dict to make comparisons easier. + self._current = {} + for page in request["inputs"]: + path = page["path"] + self._current[path] = page["digest"] + # Compare the content hashes to determine what pages have changed. + tmp = [] + for path in self._current: + if path not in self._previous: + tmp.append(path) + continue + if self._current[path] != self._previous[path]: + tmp.append(path) + continue + for path in self._previous: + if path not in self._current: + tmp.append(path) + continue + # Normalize the paths into docnames + digest = [] + for path in tmp: + if not path.endswith(".rst"): + continue + srcdir = self.args[0] + docname = path.replace(srcdir + "/", "") + docname = docname.replace(".rst", "") + digest.append(docname) + args, unknown = parser.parse_known_args(self.args) + # Save the digest. + doctree_dir = Path(args.doctree_dir) + # On a fresh build, _restore_cache() does nothing, so this dir won't exist yet. + if not doctree_dir.is_dir(): + doctree_dir.mkdir(parents=True) + with open(doctree_dir / Path("digest.json"), "w") as f: + json.dump(digest, f, indent=2) + + def _restore_cache(self): + for filepath in self._cache: + data = self._cache[filepath] + parent = Path(os.path.dirname(filepath)) + if not parent.is_dir(): + parent.mkdir(parents=True) + with open(filepath, "wb") as f: + f.write(data) + + def _update_cache(self): + args, unknown = parser.parse_known_args(self.args) + self._cache = {} + for root, _, files in os.walk(args.doctree_dir): + for filename in files: + filepath = Path(root) / Path(filename) + with open(filepath, "rb") as f: + self._cache[str(filepath)] = f.read() + + def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": + if request.get("cancel"): + return None + self.args = request["arguments"] + self._restore_cache() + self._update_digest(request) + main(self.args) + self._update_cache() + response = { + "requestId": request.get("requestId", 0), + "exitCode": 0, + } + return response + + def _send_response(self, response: "WorkResponse") -> None: + self._outstream.write(json.dumps(response) + "\n") + self._outstream.flush() + + if __name__ == "__main__": - sys.exit(main()) + args, unknown = parser.parse_known_args() + if args.persistent_worker: + Worker(sys.stdin, sys.stdout).run() + else: + sys.exit(main()) From fb6279b7b6bfb3e509ce7ef7dfebdc9e0afe8285 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 31 May 2025 10:45:19 -0700 Subject: [PATCH 02/19] rename tmp to changed paths --- sphinxdocs/private/sphinx_build.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index a28b46b3e2..3329707c40 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -89,21 +89,21 @@ def _update_digest(self, request): path = page["path"] self._current[path] = page["digest"] # Compare the content hashes to determine what pages have changed. - tmp = [] + changed_paths = [] for path in self._current: if path not in self._previous: - tmp.append(path) + changed_paths.append(path) continue if self._current[path] != self._previous[path]: - tmp.append(path) + changed_paths.append(path) continue for path in self._previous: if path not in self._current: - tmp.append(path) + changed_paths.append(path) continue # Normalize the paths into docnames digest = [] - for path in tmp: + for path in changed_paths: if not path.endswith(".rst"): continue srcdir = self.args[0] From 782b0d45b735d11d676cecdda5d42ffed9d6bf38 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 31 May 2025 10:46:52 -0700 Subject: [PATCH 03/19] rename use_cache to use_persistent_worker --- sphinxdocs/private/sphinx.bzl | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index dfda49c6a3..e2d6e95075 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -103,7 +103,7 @@ def sphinx_docs( strip_prefix = "", extra_opts = [], tools = [], - use_cache = False, + use_persistent_worker = False, **kwargs): """Generate docs using Sphinx. @@ -166,7 +166,7 @@ def sphinx_docs( source_tree = internal_name + "/_sources", extra_opts = extra_opts, tools = tools, - use_cache = use_cache, + use_persistent_worker = use_persistent_worker, **kwargs ) @@ -211,7 +211,7 @@ def _sphinx_docs_impl(ctx): source_path = source_dir_path, output_prefix = paths.join(ctx.label.name, "_build"), inputs = inputs, - use_cache = ctx.attr.use_cache, + use_persistent_worker = ctx.attr.use_persistent_worker, ) outputs[format] = output_dir per_format_args[format] = args_env @@ -251,7 +251,7 @@ _sphinx_docs = rule( cfg = "exec", doc = "Additional tools that are used by Sphinx and its plugins.", ), - "use_cache": attr.bool( + "use_persistent_worker": attr.bool( doc = "TODO", default = False, ), @@ -261,8 +261,7 @@ _sphinx_docs = rule( }, ) -def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): - +def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_worker): output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format)) run_args = [] # Copy of the args to forward along to debug runner @@ -284,7 +283,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): # Don't add to run_args: parallel building breaks interactive debugging args.add("--jobs", "auto") - if use_cache: + if use_persistent_worker: args.add("--doctree-dir", paths.join(output_dir.path, ".doctrees")) else: args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. @@ -302,7 +301,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): for define in extra_defines: run_args.extend(("--define", define)) - env = dict([ v.split("=", 1) for v in ctx.attr._extra_env_flag[_FlagInfo].value @@ -312,7 +310,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): for tool in ctx.attr.tools: tools.append(tool[DefaultInfo].files_to_run) - if use_cache: + if use_persistent_worker: worker_arg_file = ctx.actions.declare_file(ctx.attr.name + ".worker_args") ctx.actions.write( output = worker_arg_file, @@ -320,7 +318,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): ) all_inputs = depset( direct = [worker_arg_file], - transitive = [inputs] + transitive = [inputs], ) ctx.actions.run( executable = ctx.executable.sphinx, @@ -333,8 +331,8 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache): env = env, execution_requirements = { "supports-workers": "1", - "requires-worker-protocol": "json" - } + "requires-worker-protocol": "json", + }, ) else: ctx.actions.run( From 4cf2ca813acfe31a973a7f5368c981bbc9c77fa6 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 31 May 2025 11:00:19 -0700 Subject: [PATCH 04/19] cleanup logic to run action --- sphinxdocs/private/sphinx.bzl | 51 ++++++++++++----------------------- 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index e2d6e95075..d439efde06 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -310,41 +310,24 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ for tool in ctx.attr.tools: tools.append(tool[DefaultInfo].files_to_run) + execution_requirements = {} if use_persistent_worker: - worker_arg_file = ctx.actions.declare_file(ctx.attr.name + ".worker_args") - ctx.actions.write( - output = worker_arg_file, - content = args, - ) - all_inputs = depset( - direct = [worker_arg_file], - transitive = [inputs], - ) - ctx.actions.run( - executable = ctx.executable.sphinx, - arguments = ["@" + worker_arg_file.path], - inputs = all_inputs, - outputs = [output_dir], - tools = tools, - mnemonic = "SphinxBuildDocsCache", - progress_message = "Sphinx building {} for %{{label}}".format(format), - env = env, - execution_requirements = { - "supports-workers": "1", - "requires-worker-protocol": "json", - }, - ) - else: - ctx.actions.run( - executable = ctx.executable.sphinx, - arguments = [args], - inputs = inputs, - outputs = [output_dir], - tools = tools, - mnemonic = "SphinxBuildDocsNoCache", - progress_message = "Sphinx building {} for %{{label}}".format(format), - env = env, - ) + args.use_param_file("@%s", use_always = True) + args.set_param_file_format("multiline") + execution_requirements["supports-workers"] = "1" + execution_requirements["requires-worker-protocol"] = "json" + + ctx.actions.run( + executable = ctx.executable.sphinx, + arguments = [args], + inputs = inputs, + outputs = [output_dir], + tools = tools, + mnemonic = "SphinxBuildDocs", + progress_message = "Sphinx building {} for %{{label}}".format(format), + env = env, + execution_requirements = execution_requirements, + ) return output_dir, struct(args = run_args, env = env) def _sphinx_source_tree_impl(ctx): From bfa6cfb8f76b564167fce10922351a6580ba07bf Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 31 May 2025 11:03:25 -0700 Subject: [PATCH 05/19] fix use_persistent_workers type, enable for basic test --- sphinxdocs/private/sphinx.bzl | 14 +++++++------- sphinxdocs/tests/sphinx_docs/BUILD.bazel | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index d439efde06..922707fab9 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -103,7 +103,7 @@ def sphinx_docs( strip_prefix = "", extra_opts = [], tools = [], - use_persistent_worker = False, + use_persistent_workers = False, **kwargs): """Generate docs using Sphinx. @@ -166,7 +166,7 @@ def sphinx_docs( source_tree = internal_name + "/_sources", extra_opts = extra_opts, tools = tools, - use_persistent_worker = use_persistent_worker, + use_persistent_workers = use_persistent_workers, **kwargs ) @@ -211,7 +211,7 @@ def _sphinx_docs_impl(ctx): source_path = source_dir_path, output_prefix = paths.join(ctx.label.name, "_build"), inputs = inputs, - use_persistent_worker = ctx.attr.use_persistent_worker, + use_persistent_workers = ctx.attr.use_persistent_workers, ) outputs[format] = output_dir per_format_args[format] = args_env @@ -251,7 +251,7 @@ _sphinx_docs = rule( cfg = "exec", doc = "Additional tools that are used by Sphinx and its plugins.", ), - "use_persistent_worker": attr.bool( + "use_persistent_workers": attr.bool( doc = "TODO", default = False, ), @@ -261,7 +261,7 @@ _sphinx_docs = rule( }, ) -def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_worker): +def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_workers): output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format)) run_args = [] # Copy of the args to forward along to debug runner @@ -283,7 +283,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ # Don't add to run_args: parallel building breaks interactive debugging args.add("--jobs", "auto") - if use_persistent_worker: + if use_persistent_workers: args.add("--doctree-dir", paths.join(output_dir.path, ".doctrees")) else: args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. @@ -311,7 +311,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ tools.append(tool[DefaultInfo].files_to_run) execution_requirements = {} - if use_persistent_worker: + if use_persistent_workers: args.use_param_file("@%s", use_always = True) args.set_param_file_format("multiline") execution_requirements["supports-workers"] = "1" diff --git a/sphinxdocs/tests/sphinx_docs/BUILD.bazel b/sphinxdocs/tests/sphinx_docs/BUILD.bazel index f9c82967c1..11ece4905f 100644 --- a/sphinxdocs/tests/sphinx_docs/BUILD.bazel +++ b/sphinxdocs/tests/sphinx_docs/BUILD.bazel @@ -24,6 +24,7 @@ sphinx_docs( formats = ["html"], sphinx = ":sphinx-build", target_compatible_with = _TARGET_COMPATIBLE_WITH, + use_persistent_workers = True, ) gen_directory( From b497e62237bde4c7f4a810bbb20ceea21253b3c8 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 31 May 2025 12:29:07 -0700 Subject: [PATCH 06/19] basic incremental worker --- sphinxdocs/private/sphinx.bzl | 20 ++++++---- sphinxdocs/private/sphinx_build.py | 58 +++++++++++++++++++--------- sphinxdocs/tests/sphinx_docs/conf.py | 15 ++++++- sphinxdocs/tests/sphinx_docs/doc1.md | 4 ++ sphinxdocs/tests/sphinx_docs/doc2.md | 5 +++ 5 files changed, 75 insertions(+), 27 deletions(-) create mode 100644 sphinxdocs/tests/sphinx_docs/doc1.md create mode 100644 sphinxdocs/tests/sphinx_docs/doc2.md diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 922707fab9..9d5ce4254d 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -272,19 +272,22 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ args.add("--show-traceback") # Full tracebacks on error run_args.append("--show-traceback") - args.add("--builder", format) - run_args.extend(("--builder", format)) + args.add(format, format = "--builder=%s") + run_args.append("--builder={}".format(format)) - if ctx.attr._quiet_flag[BuildSettingInfo].value: - # Not added to run_args because run_args is for debugging - args.add("--quiet") # Suppress stdout informational text + ##if ctx.attr._quiet_flag[BuildSettingInfo].value: + ## # Not added to run_args because run_args is for debugging + ## args.add("--quiet") # Suppress stdout informational text # Build in parallel, if possible # Don't add to run_args: parallel building breaks interactive debugging - args.add("--jobs", "auto") + args.add("--jobs=auto") if use_persistent_workers: - args.add("--doctree-dir", paths.join(output_dir.path, ".doctrees")) + # Sphinx normally uses `.doctrees`, but we use underscore so it isn't + # hidden by default + args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s") + else: args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. run_args.append("--fresh-env") @@ -312,6 +315,9 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ execution_requirements = {} if use_persistent_workers: + args.add("-v") + args.add("-v") + args.add("-v") args.use_param_file("@%s", use_always = True) args.set_param_file_format("multiline") execution_requirements["supports-workers"] = "1" diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 3329707c40..8bd001fe43 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -23,17 +23,27 @@ # parser.add_argument('srcdir') # parser.add_argument('outdir') parser.add_argument("--persistent_worker", action="store_true") -parser.add_argument("--doctree-dir") +##parser.add_argument("--doctree-dir") +logger = logging.getLogger('sphinxdocs-build') class Worker: def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"): self._instream = instream self._outstream = outstream - self._logger = logging.getLogger("worker") - logging.basicConfig(filename='echo.log', encoding='utf-8', level=logging.DEBUG) - self._logger.info("starting worker") + # Annoying. Sphinx resets its loging config as part of main() + # and the Sphinx() app setup/invocation. So any logging we try + # to setup here to get info out of sphinx is meaningless. + # -v -v -v will output more logging, but to stderr/stdout, and thus + # bazel's worker log file, due to sphinx's logging re-configuration. + # one-liner to get most recent worker log: + # find $workerLogDir -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}' + logging.basicConfig( + ##filename='/tmp/sphinx-builder.log', encoding='utf-8', + level=logging.DEBUG + ) + logger.info("starting worker") self._current = {} self._previous = {} self._cache = {} @@ -45,13 +55,14 @@ def run(self) -> None: try: request = self._get_next_request() if request is None: - self._logger.info("Empty request: exiting") + logger.info("Empty request: exiting") break response = self._process_request(request) + logger.info("response:%s", response) if response: self._send_response(response) except Exception: - self._logger.exception("Unhandled error: request=%s", request) + logger.exception("Unhandled error: request=%s", request) output = ( f"Unhandled error:\nRequest: {request}\n" + traceback.format_exc() @@ -65,7 +76,7 @@ def run(self) -> None: } ) finally: - self._logger.info("Worker shutting down") + logger.info("Worker shutting down") def _get_next_request(self) -> "object | None": line = self._instream.readline() @@ -81,13 +92,14 @@ def inputs(self): def _update_digest(self, request): args, unknown = parser.parse_known_args(request["arguments"]) - # Make room for the new build's data. + # Make room for the new build's data. self._previous = self._current # Rearrange the new data into a dict to make comparisons easier. self._current = {} for page in request["inputs"]: path = page["path"] self._current[path] = page["digest"] + logger.info("path mtime: %s", pathlib.Path(path).stat().st_mtime) # Compare the content hashes to determine what pages have changed. changed_paths = [] for path in self._current: @@ -104,6 +116,7 @@ def _update_digest(self, request): # Normalize the paths into docnames digest = [] for path in changed_paths: + logger.info("Changed: %s", path) if not path.endswith(".rst"): continue srcdir = self.args[0] @@ -111,13 +124,13 @@ def _update_digest(self, request): docname = docname.replace(".rst", "") digest.append(docname) args, unknown = parser.parse_known_args(self.args) - # Save the digest. - doctree_dir = Path(args.doctree_dir) - # On a fresh build, _restore_cache() does nothing, so this dir won't exist yet. - if not doctree_dir.is_dir(): - doctree_dir.mkdir(parents=True) - with open(doctree_dir / Path("digest.json"), "w") as f: - json.dump(digest, f, indent=2) + ### Save the digest. + ##doctree_dir = Path(args.doctree_dir) + ### On a fresh build, _restore_cache() does nothing, so this dir won't exist yet. + ##if not doctree_dir.is_dir(): + ## doctree_dir.mkdir(parents=True) + ##with open(doctree_dir / Path("digest.json"), "w") as f: + ## json.dump(digest, f, indent=2) def _restore_cache(self): for filepath in self._cache: @@ -138,13 +151,20 @@ def _update_cache(self): self._cache[str(filepath)] = f.read() def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": + logger.info("request:%s", json.dumps(request, sort_keys=True, indent=2)) if request.get("cancel"): return None self.args = request["arguments"] - self._restore_cache() - self._update_digest(request) - main(self.args) - self._update_cache() + ##self._restore_cache() + ##self._update_digest(request) + logger.info("main: %s", self.args) + orig_stdout = sys.stdout + sys.stdout = sys.stderr + try: + main(self.args) + finally: + sys.stdout = orig_stdout + ##self._update_cache() response = { "requestId": request.get("requestId", 0), "exitCode": 0, diff --git a/sphinxdocs/tests/sphinx_docs/conf.py b/sphinxdocs/tests/sphinx_docs/conf.py index d96fa36690..2f7bb80476 100644 --- a/sphinxdocs/tests/sphinx_docs/conf.py +++ b/sphinxdocs/tests/sphinx_docs/conf.py @@ -5,7 +5,7 @@ # -- Project info -project = "Sphinx Docs Test" +project = "Sphinx Docs Test xx" extensions = [ "myst_parser", @@ -13,3 +13,16 @@ myst_enable_extensions = [ "colon_fence", ] + +import logging +logger = logging.getLogger('conf') + +def on_env_get_outdated(*args, **kwargs): + logger.info("env-get-outdated args: %s", args) + logger.info("env-get-outdated kwargs: %s", kwargs) + return [] + + +def setup(app): + + app.connect('env-get-outdated', on_env_get_outdated) diff --git a/sphinxdocs/tests/sphinx_docs/doc1.md b/sphinxdocs/tests/sphinx_docs/doc1.md new file mode 100644 index 0000000000..ae01f09c4e --- /dev/null +++ b/sphinxdocs/tests/sphinx_docs/doc1.md @@ -0,0 +1,4 @@ +# doc1 + +hello doc 1 +x diff --git a/sphinxdocs/tests/sphinx_docs/doc2.md b/sphinxdocs/tests/sphinx_docs/doc2.md new file mode 100644 index 0000000000..f59bd77d74 --- /dev/null +++ b/sphinxdocs/tests/sphinx_docs/doc2.md @@ -0,0 +1,5 @@ +# doc 2 + + +hello doc 3 +x From 3420416340eca7ea644ff133b892847eabc5f5b4 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 2 Jun 2025 14:52:50 -0700 Subject: [PATCH 07/19] generate info file for extensions to use. also cleanup --- sphinxdocs/private/sphinx.bzl | 18 +- sphinxdocs/private/sphinx_build.py | 251 ++++++++++++++++----------- sphinxdocs/tests/sphinx_docs/conf.py | 13 -- sphinxdocs/tests/sphinx_docs/doc2.md | 1 - 4 files changed, 154 insertions(+), 129 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 9d5ce4254d..7d7ce0416c 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -275,24 +275,25 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ args.add(format, format = "--builder=%s") run_args.append("--builder={}".format(format)) - ##if ctx.attr._quiet_flag[BuildSettingInfo].value: - ## # Not added to run_args because run_args is for debugging - ## args.add("--quiet") # Suppress stdout informational text + if ctx.attr._quiet_flag[BuildSettingInfo].value: + # Not added to run_args because run_args is for debugging + args.add("--quiet") # Suppress stdout informational text # Build in parallel, if possible # Don't add to run_args: parallel building breaks interactive debugging args.add("--jobs=auto") if use_persistent_workers: - # Sphinx normally uses `.doctrees`, but we use underscore so it isn't - # hidden by default + # * Normally Sphinx puts doctrees in the output dir. We can't do that + # because Bazel will clear the output directory every invocation. + # * Use a non-dot prefixed name so it shows up more visibly. args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s") else: + # These aren't added to run_args because we assume direct invocations + # will add them if necessary. args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. - run_args.append("--fresh-env") args.add("--write-all") # Write all files; don't try to detect "changed" files - run_args.append("--write-all") for opt in ctx.attr.extra_opts: expanded = ctx.expand_location(opt) @@ -315,9 +316,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ execution_requirements = {} if use_persistent_workers: - args.add("-v") - args.add("-v") - args.add("-v") args.use_param_file("@%s", use_always = True) args.set_param_file_format("multiline") execution_requirements["supports-workers"] = "1" diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 8bd001fe43..e2120c386a 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -1,5 +1,7 @@ from pathlib import Path +import shutil +import contextlib import argparse import json import logging @@ -9,6 +11,8 @@ import time import traceback import typing +import io +import sphinx.application from sphinx.cmd.build import main @@ -16,39 +20,50 @@ WorkRequest = object WorkResponse = object - -parser = argparse.ArgumentParser( - fromfile_prefix_chars='@' -) -# parser.add_argument('srcdir') -# parser.add_argument('outdir') -parser.add_argument("--persistent_worker", action="store_true") -##parser.add_argument("--doctree-dir") - logger = logging.getLogger('sphinxdocs-build') +_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext" + class Worker: - def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"): + def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str): + # NOTE: Sphinx performs its own logging re-configuration, so any + # logging config we do isn't respected by Sphinx. Controlling where + # stdout and stderr goes are the main mechanisms. Recall that + # Bazel send worker stderr to the worker log file. + # outputBase=$(bazel info output_base) + # find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}' + logging.basicConfig(level=logging.DEBUG) + logger.info("initializing worker") + + # The directory that paths are relative to. + self._exec_root = exec_root + # Where requests are read from. self._instream = instream + # Where responses are written to. self._outstream = outstream - # Annoying. Sphinx resets its loging config as part of main() - # and the Sphinx() app setup/invocation. So any logging we try - # to setup here to get info out of sphinx is meaningless. - # -v -v -v will output more logging, but to stderr/stdout, and thus - # bazel's worker log file, due to sphinx's logging re-configuration. - # one-liner to get most recent worker log: - # find $workerLogDir -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}' - logging.basicConfig( - ##filename='/tmp/sphinx-builder.log', encoding='utf-8', - level=logging.DEBUG - ) - logger.info("starting worker") - self._current = {} - self._previous = {} - self._cache = {} + + # dict[str srcdir, dict[str path, str digest]] + self._digests = {} + + # Internal output directories the worker gives to Sphinx that need + # to be cleaned up upon exit. + # set[str path] + self._worker_outdirs = set() + self._extension = BazelWorkerExtension() + + sys.modules[_WORKER_SPHINX_EXT_MODULE_NAME] = self._extension + sphinx.application.builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME,) + + def __enter__(self): + return self + + def __exit__(self): + for worker_outdir in self._worker_outdirs: + shutil.rmtree(worker_outdir, ignore_errors=True) def run(self) -> None: + logger.info("Worker started") try: while True: request = None @@ -58,7 +73,6 @@ def run(self) -> None: logger.info("Empty request: exiting") break response = self._process_request(request) - logger.info("response:%s", response) if response: self._send_response(response) except Exception: @@ -84,101 +98,128 @@ def _get_next_request(self) -> "object | None": return None return json.loads(line) - @property - def inputs(self): - self._previous - self._current - return self._value - - def _update_digest(self, request): - args, unknown = parser.parse_known_args(request["arguments"]) - # Make room for the new build's data. - self._previous = self._current - # Rearrange the new data into a dict to make comparisons easier. - self._current = {} - for page in request["inputs"]: - path = page["path"] - self._current[path] = page["digest"] - logger.info("path mtime: %s", pathlib.Path(path).stat().st_mtime) - # Compare the content hashes to determine what pages have changed. + def _send_response(self, response: "WorkResponse") -> None: + self._outstream.write(json.dumps(response) + "\n") + self._outstream.flush() + + def _prepare_sphinx(self, request): + sphinx_args = request["arguments"] + srcdir = sphinx_args[0] + + incoming_digests = {} + current_digests = self._digests.setdefault(srcdir, {}) changed_paths = [] - for path in self._current: - if path not in self._previous: - changed_paths.append(path) - continue - if self._current[path] != self._previous[path]: + request_info = { + "exec_root": self._exec_root, + "inputs": request["inputs"] + } + for entry in request["inputs"]: + path = entry["path"] + digest = entry["digest"] + + ##mtime = pathlib.Path(path).stat().st_mtime + ##logger.info("incoming path %s mtime: %s", path, mtime) + ### Sphinx appears to treat 0 mtime as always changed + ##os.utime(path, (100, 100)) + + # Make the path srcdir-relative so Sphinx understands it. + path = path.removeprefix(srcdir + "/") + incoming_digests[path] = digest + + if path not in current_digests: + logger.info("path %s new", path) changed_paths.append(path) - continue - for path in self._previous: - if path not in self._current: + elif current_digests[path] != digest: + logger.info("path %s changed", path) changed_paths.append(path) - continue - # Normalize the paths into docnames - digest = [] - for path in changed_paths: - logger.info("Changed: %s", path) - if not path.endswith(".rst"): - continue - srcdir = self.args[0] - docname = path.replace(srcdir + "/", "") - docname = docname.replace(".rst", "") - digest.append(docname) - args, unknown = parser.parse_known_args(self.args) - ### Save the digest. - ##doctree_dir = Path(args.doctree_dir) - ### On a fresh build, _restore_cache() does nothing, so this dir won't exist yet. - ##if not doctree_dir.is_dir(): - ## doctree_dir.mkdir(parents=True) - ##with open(doctree_dir / Path("digest.json"), "w") as f: - ## json.dump(digest, f, indent=2) - - def _restore_cache(self): - for filepath in self._cache: - data = self._cache[filepath] - parent = Path(os.path.dirname(filepath)) - if not parent.is_dir(): - parent.mkdir(parents=True) - with open(filepath, "wb") as f: - f.write(data) - - def _update_cache(self): - args, unknown = parser.parse_known_args(self.args) - self._cache = {} - for root, _, files in os.walk(args.doctree_dir): - for filename in files: - filepath = Path(root) / Path(filename) - with open(filepath, "rb") as f: - self._cache[str(filepath)] = f.read() - def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": - logger.info("request:%s", json.dumps(request, sort_keys=True, indent=2)) - if request.get("cancel"): - return None - self.args = request["arguments"] - ##self._restore_cache() - ##self._update_digest(request) - logger.info("main: %s", self.args) + self._digests[srcdir] = incoming_digests + self._extension.changed_paths = changed_paths + request_info["changed_sources"] = changed_paths + + bazel_outdir = sphinx_args[1] + worker_outdir = bazel_outdir + ".worker-out.d" + self._worker_outdirs.add(worker_outdir) + sphinx_args[1] = worker_outdir + + request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json") + with open(request_info_path, "w") as fp: + json.dump(request_info, fp) + sphinx_args.append(f"--define=bazel_worker_request_info={request_info_path}") + + return worker_outdir, bazel_outdir, sphinx_args + + @contextlib.contextmanager + def _redirect_streams(self): + out = io.StringIO() orig_stdout = sys.stdout - sys.stdout = sys.stderr try: - main(self.args) + sys.stdout = out + yield out finally: sys.stdout = orig_stdout - ##self._update_cache() + + def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": + logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2)) + if request.get("cancel"): + return None + + worker_outdir, bazel_outdir, sphinx_args = self._prepare_sphinx(request) + + # Prevent anything from going to stdout because it breaks the worker + # protocol. We have limited control over where Sphinx sends output. + with self._redirect_streams() as stdout: + logger.info("main args: %s", sphinx_args) + exit_code = main(sphinx_args) + + if exit_code: + raise Exception( + "Sphinx main() returned failure: " + + f" exit code: {exit_code}\n" + + "========== STDOUT START ==========\n" + + stdout.getvalue().rstrip("\n") + "\n" + + "========== STDOUT END ==========\n" + ) + + # Copying is unfortunately necessary because Bazel doesn't know to + # implicily bring along what the symlinks point to. + shutil.copytree(worker_outdir, bazel_outdir, dirs_exist_ok=True) + response = { "requestId": request.get("requestId", 0), + "output": stdout.getvalue(), "exitCode": 0, } return response - def _send_response(self, response: "WorkResponse") -> None: - self._outstream.write(json.dumps(response) + "\n") - self._outstream.flush() + + +# todo: make this parallel-safe +class BazelWorkerExtension: + def __init__(self): + self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME + # set[str] of src-dir relative path names + self.changed_paths = set() + + def setup(self, app): + app.connect('env-get-outdated', self._handle_env_get_outdated) + return { + "parallel_read_safe": True, + "parallel_write_safe": True + } + + def _handle_env_get_outdated(self, app, env, added, changed, removed): + changed = { + # NOTE: path2doc returns None if it's not a doc path + env.path2doc(p) for p in self.changed_paths + } + logger.info("changed docs: %s", changed) + return changed if __name__ == "__main__": - args, unknown = parser.parse_known_args() - if args.persistent_worker: - Worker(sys.stdin, sys.stdout).run() + if '--persistent_worker' in sys.argv: + with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker: + sys.exit(worker.run()) else: sys.exit(main()) diff --git a/sphinxdocs/tests/sphinx_docs/conf.py b/sphinxdocs/tests/sphinx_docs/conf.py index 2f7bb80476..247d0f4a3d 100644 --- a/sphinxdocs/tests/sphinx_docs/conf.py +++ b/sphinxdocs/tests/sphinx_docs/conf.py @@ -13,16 +13,3 @@ myst_enable_extensions = [ "colon_fence", ] - -import logging -logger = logging.getLogger('conf') - -def on_env_get_outdated(*args, **kwargs): - logger.info("env-get-outdated args: %s", args) - logger.info("env-get-outdated kwargs: %s", kwargs) - return [] - - -def setup(app): - - app.connect('env-get-outdated', on_env_get_outdated) diff --git a/sphinxdocs/tests/sphinx_docs/doc2.md b/sphinxdocs/tests/sphinx_docs/doc2.md index f59bd77d74..a86d9a1025 100644 --- a/sphinxdocs/tests/sphinx_docs/doc2.md +++ b/sphinxdocs/tests/sphinx_docs/doc2.md @@ -2,4 +2,3 @@ hello doc 3 -x From 34e1cdea7cdab7141d493c69f18a5b4a1c0dd078 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 2 Jun 2025 14:57:37 -0700 Subject: [PATCH 08/19] cleanup --- sphinxdocs/private/sphinx.bzl | 4 ++- sphinxdocs/private/sphinx_build.py | 52 ++++++++++++++---------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 7d7ce0416c..5477055c54 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -143,6 +143,8 @@ def sphinx_docs( tools: {type}`list[label]` Additional tools that are used by Sphinx and its plugins. This just makes the tools available during Sphinx execution. To locate them, use {obj}`extra_opts` and `$(location)`. + use_persistent_workers: {type}`bool` (experimental) If enable, use a persistent + worker to run Sphinx for improved incremental, caching, and startup performance. **kwargs: {type}`dict` Common attributes to pass onto rules. """ add_tag(kwargs, "@rules_python//sphinxdocs:sphinx_docs") @@ -243,7 +245,7 @@ _sphinx_docs = rule( ), "sphinx": attr.label( executable = True, - cfg = "host", + cfg = "exec", mandatory = True, doc = "Sphinx binary to generate documentation.", ), diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index e2120c386a..5185d30a88 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -1,32 +1,33 @@ -from pathlib import Path - -import shutil -import contextlib import argparse +import contextlib +import io import json import logging import os import pathlib +import shutil import sys import time import traceback import typing -import io -import sphinx.application +from pathlib import Path +import sphinx.application from sphinx.cmd.build import main - WorkRequest = object WorkResponse = object -logger = logging.getLogger('sphinxdocs-build') +logger = logging.getLogger("sphinxdocs_build") _WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext" + class Worker: - def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str): + def __init__( + self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str + ): # NOTE: Sphinx performs its own logging re-configuration, so any # logging config we do isn't respected by Sphinx. Controlling where # stdout and stderr goes are the main mechanisms. Recall that @@ -109,10 +110,7 @@ def _prepare_sphinx(self, request): incoming_digests = {} current_digests = self._digests.setdefault(srcdir, {}) changed_paths = [] - request_info = { - "exec_root": self._exec_root, - "inputs": request["inputs"] - } + request_info = {"exec_root": self._exec_root, "inputs": request["inputs"]} for entry in request["inputs"]: path = entry["path"] digest = entry["digest"] @@ -174,11 +172,12 @@ def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": if exit_code: raise Exception( - "Sphinx main() returned failure: " + - f" exit code: {exit_code}\n" + - "========== STDOUT START ==========\n" + - stdout.getvalue().rstrip("\n") + "\n" + - "========== STDOUT END ==========\n" + "Sphinx main() returned failure: " + + f" exit code: {exit_code}\n" + + "========== STDOUT START ==========\n" + + stdout.getvalue().rstrip("\n") + + "\n" + + "========== STDOUT END ==========\n" ) # Copying is unfortunately necessary because Bazel doesn't know to @@ -193,32 +192,31 @@ def _process_request(self, request: "WorkRequest") -> "WorkResponse | None": return response - -# todo: make this parallel-safe class BazelWorkerExtension: + """A Sphinx extension implemented as a class acting like a module.""" + def __init__(self): + # Make it look like a Module object self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME # set[str] of src-dir relative path names self.changed_paths = set() def setup(self, app): - app.connect('env-get-outdated', self._handle_env_get_outdated) - return { - "parallel_read_safe": True, - "parallel_write_safe": True - } + app.connect("env-get-outdated", self._handle_env_get_outdated) + return {"parallel_read_safe": True, "parallel_write_safe": True} def _handle_env_get_outdated(self, app, env, added, changed, removed): changed = { # NOTE: path2doc returns None if it's not a doc path - env.path2doc(p) for p in self.changed_paths + env.path2doc(p) + for p in self.changed_paths } logger.info("changed docs: %s", changed) return changed if __name__ == "__main__": - if '--persistent_worker' in sys.argv: + if "--persistent_worker" in sys.argv: with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker: sys.exit(worker.run()) else: From bed455604a562c280d742ff752bb1bfcf1b65d66 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 2 Jun 2025 15:34:33 -0700 Subject: [PATCH 09/19] cleanup --- docs/BUILD.bazel | 1 + sphinxdocs/private/sphinx.bzl | 2 ++ sphinxdocs/private/sphinx_build.py | 14 ++------------ sphinxdocs/tests/sphinx_docs/conf.py | 2 +- sphinxdocs/tests/sphinx_docs/doc1.md | 1 - sphinxdocs/tests/sphinx_docs/doc2.md | 1 - 6 files changed, 6 insertions(+), 15 deletions(-) diff --git a/docs/BUILD.bazel b/docs/BUILD.bazel index 852c4d4fa6..1204fa8a60 100644 --- a/docs/BUILD.bazel +++ b/docs/BUILD.bazel @@ -70,6 +70,7 @@ sphinx_docs( strip_prefix = package_name() + "/", tags = ["docs"], target_compatible_with = _TARGET_COMPATIBLE_WITH, + use_persistent_workers = True, deps = [ ":bzl_api_docs", ":py_api_srcs", diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 5477055c54..0bdf9dc257 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -269,6 +269,8 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ run_args = [] # Copy of the args to forward along to debug runner args = ctx.actions.args() # Args passed to the action + # NOTE: sphinx_build.py relies on the first two args being the srcdir and + # outputdir, in that order. args.add(source_path) args.add(output_dir.path) diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 5185d30a88..f89e464e0b 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -1,16 +1,12 @@ -import argparse import contextlib import io import json import logging import os -import pathlib import shutil import sys -import time import traceback import typing -from pathlib import Path import sphinx.application from sphinx.cmd.build import main @@ -34,8 +30,8 @@ def __init__( # Bazel send worker stderr to the worker log file. # outputBase=$(bazel info output_base) # find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}' - logging.basicConfig(level=logging.DEBUG) - logger.info("initializing worker") + logging.basicConfig(level=logging.WARN) + logger.info("Initializing worker") # The directory that paths are relative to. self._exec_root = exec_root @@ -114,12 +110,6 @@ def _prepare_sphinx(self, request): for entry in request["inputs"]: path = entry["path"] digest = entry["digest"] - - ##mtime = pathlib.Path(path).stat().st_mtime - ##logger.info("incoming path %s mtime: %s", path, mtime) - ### Sphinx appears to treat 0 mtime as always changed - ##os.utime(path, (100, 100)) - # Make the path srcdir-relative so Sphinx understands it. path = path.removeprefix(srcdir + "/") incoming_digests[path] = digest diff --git a/sphinxdocs/tests/sphinx_docs/conf.py b/sphinxdocs/tests/sphinx_docs/conf.py index 247d0f4a3d..d96fa36690 100644 --- a/sphinxdocs/tests/sphinx_docs/conf.py +++ b/sphinxdocs/tests/sphinx_docs/conf.py @@ -5,7 +5,7 @@ # -- Project info -project = "Sphinx Docs Test xx" +project = "Sphinx Docs Test" extensions = [ "myst_parser", diff --git a/sphinxdocs/tests/sphinx_docs/doc1.md b/sphinxdocs/tests/sphinx_docs/doc1.md index ae01f09c4e..f6f70ba28c 100644 --- a/sphinxdocs/tests/sphinx_docs/doc1.md +++ b/sphinxdocs/tests/sphinx_docs/doc1.md @@ -1,4 +1,3 @@ # doc1 hello doc 1 -x diff --git a/sphinxdocs/tests/sphinx_docs/doc2.md b/sphinxdocs/tests/sphinx_docs/doc2.md index a86d9a1025..06eb76a596 100644 --- a/sphinxdocs/tests/sphinx_docs/doc2.md +++ b/sphinxdocs/tests/sphinx_docs/doc2.md @@ -1,4 +1,3 @@ # doc 2 - hello doc 3 From c4870604fc2dbb62b11526a5fc67d6e5ae136eb6 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 2 Jun 2025 15:59:19 -0700 Subject: [PATCH 10/19] doc attr --- sphinxdocs/private/sphinx.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 0bdf9dc257..421f10a26c 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -254,7 +254,7 @@ _sphinx_docs = rule( doc = "Additional tools that are used by Sphinx and its plugins.", ), "use_persistent_workers": attr.bool( - doc = "TODO", + doc = "(experimental) Whether to invoke Sphinx as a persistent worker.", default = False, ), "_extra_defines_flag": attr.label(default = "//sphinxdocs:extra_defines"), From d968327a5c08b007b1863f8b4f117848fe59ed8d Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Mon, 2 Jun 2025 16:20:14 -0700 Subject: [PATCH 11/19] trying to debug rbe --- sphinxdocs/private/sphinx_build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index f89e464e0b..11cba5c3cb 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -210,4 +210,5 @@ def _handle_env_get_outdated(self, app, env, added, changed, removed): with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker: sys.exit(worker.run()) else: + raise Exception("non-worker path taken") sys.exit(main()) From 4cea20a3e126d50fc2985e3a0ccc7fca07baa0cd Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 10:25:27 -0700 Subject: [PATCH 12/19] support non-worker invocation when rules try to use a worker invocation --- sphinxdocs/private/sphinx.bzl | 14 +++++++++++--- sphinxdocs/private/sphinx_build.py | 23 +++++++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 421f10a26c..244cce5f30 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -269,6 +269,12 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ run_args = [] # Copy of the args to forward along to debug runner args = ctx.actions.args() # Args passed to the action + # An args file is required for persistent workers, but we don't know if + # the action will use worker mode or not (settings we can't see may + # force non-worker mode). For consistency, always use a params file. + args.use_param_file("@%s", use_always = True) + args.set_param_file_format("multiline") + # NOTE: sphinx_build.py relies on the first two args being the srcdir and # outputdir, in that order. args.add(source_path) @@ -289,7 +295,8 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ if use_persistent_workers: # * Normally Sphinx puts doctrees in the output dir. We can't do that - # because Bazel will clear the output directory every invocation. + # because Bazel will clear the output directory every invocation. To + # work around that, create it as a sibling directory. # * Use a non-dot prefixed name so it shows up more visibly. args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s") @@ -318,10 +325,11 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ for tool in ctx.attr.tools: tools.append(tool[DefaultInfo].files_to_run) + # NOTE: Command line flags or RBE capabilities may override the execution + # requirements and disable workers. Thus, we can't assume that these + # exec requirements will actually be respected. execution_requirements = {} if use_persistent_workers: - args.use_param_file("@%s", use_always = True) - args.set_param_file_format("multiline") execution_requirements["supports-workers"] = "1" execution_requirements["requires-worker-protocol"] = "json" diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 11cba5c3cb..25a660d14b 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -201,14 +201,29 @@ def _handle_env_get_outdated(self, app, env, added, changed, removed): env.path2doc(p) for p in self.changed_paths } + logger.info("changed docs: %s", changed) return changed +def _worker_main(stdin, stdout, exec_root): + with Worker(stdin, stdout, exec_root) as worker: + return worker.run() + + +def _non_worker_main(argv): + args = [] + for arg in sys.argv: + if arg.startswith("@"): + with open(arg.removeprefix("@")) as fp: + lines = [line.strip() for line in fp if line.strip()] + args.extend(lines) + else: + args.append(arg) + sys.argv[:] = args + return main() if __name__ == "__main__": if "--persistent_worker" in sys.argv: - with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker: - sys.exit(worker.run()) + sys.exit(_worker_main(sys.stdin, sys.stdout, os.getcwd())) else: - raise Exception("non-worker path taken") - sys.exit(main()) + sys.exit(_non_worker_main()) From 2912dafa9cdc778e49f7f7ffdac3840e4dc0a972 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 11:48:32 -0700 Subject: [PATCH 13/19] fix bad arg --- sphinxdocs/private/sphinx_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 25a660d14b..0fe92e608c 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -210,7 +210,7 @@ def _worker_main(stdin, stdout, exec_root): return worker.run() -def _non_worker_main(argv): +def _non_worker_main(): args = [] for arg in sys.argv: if arg.startswith("@"): From 78651ead9b29eacf8b93aade1d766677825959f2 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 14:08:41 -0700 Subject: [PATCH 14/19] always set doctreedir --- sphinxdocs/private/sphinx.bzl | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 244cce5f30..e992ff9412 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -293,18 +293,15 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ # Don't add to run_args: parallel building breaks interactive debugging args.add("--jobs=auto") - if use_persistent_workers: - # * Normally Sphinx puts doctrees in the output dir. We can't do that - # because Bazel will clear the output directory every invocation. To - # work around that, create it as a sibling directory. - # * Use a non-dot prefixed name so it shows up more visibly. - args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s") - - else: - # These aren't added to run_args because we assume direct invocations - # will add them if necessary. - args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them. - args.add("--write-all") # Write all files; don't try to detect "changed" files + # Put the doctree dir outside of the output directory. + # This allows it to be reused between invocations when possible; Bazel + # clears the output directory every action invocation. + # * For workers, they can fully re-use it. + # * For non-workers, it can be reused when sandboxing is disabled via + # the `no-sandbox` tag or execution requirement. + # + # We also use a non-dot prefixed name so it shows up more visibly. + args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s") for opt in ctx.attr.extra_opts: expanded = ctx.expand_location(opt) From 283e565443e1029bb770d19dc98b816ad8aec8cf Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 15:18:02 -0700 Subject: [PATCH 15/19] enable worker by default so its used when available --- docs/BUILD.bazel | 1 - sphinxdocs/docs/index.md | 23 +++++++++++++++++++++++ sphinxdocs/private/sphinx.bzl | 17 +++++++++-------- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/docs/BUILD.bazel b/docs/BUILD.bazel index 1204fa8a60..852c4d4fa6 100644 --- a/docs/BUILD.bazel +++ b/docs/BUILD.bazel @@ -70,7 +70,6 @@ sphinx_docs( strip_prefix = package_name() + "/", tags = ["docs"], target_compatible_with = _TARGET_COMPATIBLE_WITH, - use_persistent_workers = True, deps = [ ":bzl_api_docs", ":py_api_srcs", diff --git a/sphinxdocs/docs/index.md b/sphinxdocs/docs/index.md index bd6448ced9..0c1d945416 100644 --- a/sphinxdocs/docs/index.md +++ b/sphinxdocs/docs/index.md @@ -11,6 +11,29 @@ documentation. It comes with: While it is primarily oriented towards docgen for Starlark code, the core of it is agnostic as to what is being documented. +### Optimization + +Normally, Sphinx keeps various cache files to improve incremental building. +Unfortunately, programs performing their own caching don't interact well +with Bazel's model of precisely declaring and strictly enforcing what are +inputs, what are outputs, and what files are available when running a program. +The net effect is programs don't have a prior invocation's cache files +available. + +There are two mechanisms available to make some cache available to Sphinx under +Bazel: + +* Disable sandboxing, which allows some files from prior invocations to be + visible to subsequent invocations. This can be done multiple ways: + * Set `tags = ["no-sandbox"]` on the `sphinx_docs` target + * `--modify_execution_info=SphinxBuildDocs=+no-sandbox` (Bazel flag) + * `--strategy=SphinxBuildDocs=local` (Bazel flag) +* Use persistent workers (enabled by default) by setting + `allow_persistent_workers=True` on the `sphinx_docs` target. Note that other + Bazel flags can disable using workers even if an action supports it. Setting + `--strategy=SphinxBuildDocs=dynamic,worker,local,sandbox` should tell Bazel + to use workers if possible, otherwise fallback to non-worker invocations. + ```{toctree} :hidden: diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index e992ff9412..8b5558a34d 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -103,7 +103,7 @@ def sphinx_docs( strip_prefix = "", extra_opts = [], tools = [], - use_persistent_workers = False, + allow_persistent_workers = True, **kwargs): """Generate docs using Sphinx. @@ -143,8 +143,9 @@ def sphinx_docs( tools: {type}`list[label]` Additional tools that are used by Sphinx and its plugins. This just makes the tools available during Sphinx execution. To locate them, use {obj}`extra_opts` and `$(location)`. - use_persistent_workers: {type}`bool` (experimental) If enable, use a persistent - worker to run Sphinx for improved incremental, caching, and startup performance. + allow_persistent_workers: {type}`bool` (experimental) If true, allow + using persistent workers for running Sphinx, if Bazel decides to do so. + This can improve incremental building of docs. **kwargs: {type}`dict` Common attributes to pass onto rules. """ add_tag(kwargs, "@rules_python//sphinxdocs:sphinx_docs") @@ -168,7 +169,7 @@ def sphinx_docs( source_tree = internal_name + "/_sources", extra_opts = extra_opts, tools = tools, - use_persistent_workers = use_persistent_workers, + allow_persistent_workers = allow_persistent_workers, **kwargs ) @@ -213,7 +214,7 @@ def _sphinx_docs_impl(ctx): source_path = source_dir_path, output_prefix = paths.join(ctx.label.name, "_build"), inputs = inputs, - use_persistent_workers = ctx.attr.use_persistent_workers, + allow_persistent_workers = ctx.attr.allow_persistent_workers, ) outputs[format] = output_dir per_format_args[format] = args_env @@ -253,7 +254,7 @@ _sphinx_docs = rule( cfg = "exec", doc = "Additional tools that are used by Sphinx and its plugins.", ), - "use_persistent_workers": attr.bool( + "allow_persistent_workers": attr.bool( doc = "(experimental) Whether to invoke Sphinx as a persistent worker.", default = False, ), @@ -263,7 +264,7 @@ _sphinx_docs = rule( }, ) -def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_workers): +def _run_sphinx(ctx, format, source_path, inputs, output_prefix, allow_persistent_workers): output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format)) run_args = [] # Copy of the args to forward along to debug runner @@ -326,7 +327,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_ # requirements and disable workers. Thus, we can't assume that these # exec requirements will actually be respected. execution_requirements = {} - if use_persistent_workers: + if allow_persistent_workers: execution_requirements["supports-workers"] = "1" execution_requirements["requires-worker-protocol"] = "json" From 1e7fa2aa4a63a02dcd9a53fa93292119e8430b74 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 15:22:19 -0700 Subject: [PATCH 16/19] fix doc typo --- sphinxdocs/docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxdocs/docs/index.md b/sphinxdocs/docs/index.md index 0c1d945416..2ea1146e1b 100644 --- a/sphinxdocs/docs/index.md +++ b/sphinxdocs/docs/index.md @@ -27,7 +27,7 @@ Bazel: visible to subsequent invocations. This can be done multiple ways: * Set `tags = ["no-sandbox"]` on the `sphinx_docs` target * `--modify_execution_info=SphinxBuildDocs=+no-sandbox` (Bazel flag) - * `--strategy=SphinxBuildDocs=local` (Bazel flag) + * `--strategy=SphinxBuildDocs=local` (Bazel flag) * Use persistent workers (enabled by default) by setting `allow_persistent_workers=True` on the `sphinx_docs` target. Note that other Bazel flags can disable using workers even if an action supports it. Setting From ceefdce4be3e97b47cd477aef6021d1cf73bb4a4 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 15:24:38 -0700 Subject: [PATCH 17/19] rm old use_persistent_Worker arg name --- sphinxdocs/tests/sphinx_docs/BUILD.bazel | 1 - 1 file changed, 1 deletion(-) diff --git a/sphinxdocs/tests/sphinx_docs/BUILD.bazel b/sphinxdocs/tests/sphinx_docs/BUILD.bazel index 11ece4905f..f9c82967c1 100644 --- a/sphinxdocs/tests/sphinx_docs/BUILD.bazel +++ b/sphinxdocs/tests/sphinx_docs/BUILD.bazel @@ -24,7 +24,6 @@ sphinx_docs( formats = ["html"], sphinx = ":sphinx-build", target_compatible_with = _TARGET_COMPATIBLE_WITH, - use_persistent_workers = True, ) gen_directory( From f6e1c8b3b3fb7088cbf801d16997ef1ebe86a2ca Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 15:27:01 -0700 Subject: [PATCH 18/19] format files --- sphinxdocs/private/sphinx.bzl | 8 ++++---- sphinxdocs/private/sphinx_build.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sphinxdocs/private/sphinx.bzl b/sphinxdocs/private/sphinx.bzl index 8b5558a34d..ee6b994e2e 100644 --- a/sphinxdocs/private/sphinx.bzl +++ b/sphinxdocs/private/sphinx.bzl @@ -235,6 +235,10 @@ def _sphinx_docs_impl(ctx): _sphinx_docs = rule( implementation = _sphinx_docs_impl, attrs = { + "allow_persistent_workers": attr.bool( + doc = "(experimental) Whether to invoke Sphinx as a persistent worker.", + default = False, + ), "extra_opts": attr.string_list( doc = "Additional options to pass onto Sphinx. These are added after " + "other options, but before the source/output args.", @@ -254,10 +258,6 @@ _sphinx_docs = rule( cfg = "exec", doc = "Additional tools that are used by Sphinx and its plugins.", ), - "allow_persistent_workers": attr.bool( - doc = "(experimental) Whether to invoke Sphinx as a persistent worker.", - default = False, - ), "_extra_defines_flag": attr.label(default = "//sphinxdocs:extra_defines"), "_extra_env_flag": attr.label(default = "//sphinxdocs:extra_env"), "_quiet_flag": attr.label(default = "//sphinxdocs:quiet"), diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 0fe92e608c..4d94ab631b 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -205,6 +205,7 @@ def _handle_env_get_outdated(self, app, env, added, changed, removed): logger.info("changed docs: %s", changed) return changed + def _worker_main(stdin, stdout, exec_root): with Worker(stdin, stdout, exec_root) as worker: return worker.run() @@ -222,6 +223,7 @@ def _non_worker_main(): sys.argv[:] = args return main() + if __name__ == "__main__": if "--persistent_worker" in sys.argv: sys.exit(_worker_main(sys.stdin, sys.stdout, os.getcwd())) From 2520a2d9540e1cdea19101efb98121d0e1268c42 Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Tue, 3 Jun 2025 16:16:59 -0700 Subject: [PATCH 19/19] register config key so its available --- sphinxdocs/private/sphinx_build.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sphinxdocs/private/sphinx_build.py b/sphinxdocs/private/sphinx_build.py index 4d94ab631b..e9711042f6 100644 --- a/sphinxdocs/private/sphinx_build.py +++ b/sphinxdocs/private/sphinx_build.py @@ -18,6 +18,9 @@ _WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext" +# Config value name for getting the path to the request info file +_REQUEST_INFO_CONFIG_NAME = "bazel_worker_request_info_path" + class Worker: @@ -75,7 +78,7 @@ def run(self) -> None: except Exception: logger.exception("Unhandled error: request=%s", request) output = ( - f"Unhandled error:\nRequest: {request}\n" + f"Unhandled error:\nRequest id: {request.get('id')}\n" + traceback.format_exc() ) request_id = 0 if not request else request.get("requestId", 0) @@ -133,7 +136,7 @@ def _prepare_sphinx(self, request): request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json") with open(request_info_path, "w") as fp: json.dump(request_info, fp) - sphinx_args.append(f"--define=bazel_worker_request_info={request_info_path}") + sphinx_args.append(f"--define={_REQUEST_INFO_CONFIG_NAME}={request_info_path}") return worker_outdir, bazel_outdir, sphinx_args @@ -192,6 +195,7 @@ def __init__(self): self.changed_paths = set() def setup(self, app): + app.add_config_value(_REQUEST_INFO_CONFIG_NAME, "", "") app.connect("env-get-outdated", self._handle_env_get_outdated) return {"parallel_read_safe": True, "parallel_write_safe": True}