bazel-contrib · rickeylev · Jun 5, 2025 · May 23, 2025 · May 31, 2025 · May 31, 2025
@@ -103,6 +103,7 @@ def sphinx_docs(
         strip_prefix = "",
         extra_opts = [],
         tools = [],
+        use_cache = False,
         **kwargs):
     """Generate docs using Sphinx.
 
@@ -165,6 +166,7 @@ def sphinx_docs(
         source_tree = internal_name + "/_sources",
         extra_opts = extra_opts,
         tools = tools,
+        use_cache = use_cache,
         **kwargs
     )
 
@@ -209,6 +211,7 @@ def _sphinx_docs_impl(ctx):
             source_path = source_dir_path,
             output_prefix = paths.join(ctx.label.name, "_build"),
             inputs = inputs,
+            use_cache = ctx.attr.use_cache,
         )
         outputs[format] = output_dir
         per_format_args[format] = args_env
@@ -240,26 +243,34 @@ _sphinx_docs = rule(
         ),
         "sphinx": attr.label(
             executable = True,
-            cfg = "exec",
+            cfg = "host",
             mandatory = True,
             doc = "Sphinx binary to generate documentation.",
         ),
         "tools": attr.label_list(
             cfg = "exec",
             doc = "Additional tools that are used by Sphinx and its plugins.",
         ),
+        "use_cache": attr.bool(
+            doc = "TODO",
+            default = False,
+        ),
         "_extra_defines_flag": attr.label(default = "//sphinxdocs:extra_defines"),
         "_extra_env_flag": attr.label(default = "//sphinxdocs:extra_env"),
         "_quiet_flag": attr.label(default = "//sphinxdocs:quiet"),
     },
 )
 
-def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
+def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_cache):
+
     output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format))
 
     run_args = []  # Copy of the args to forward along to debug runner
     args = ctx.actions.args()  # Args passed to the action
 
+    args.add(source_path)
+    args.add(output_dir.path)
+
     args.add("--show-traceback")  # Full tracebacks on error
     run_args.append("--show-traceback")
     args.add("--builder", format)
@@ -272,10 +283,14 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
     # Build in parallel, if possible
     # Don't add to run_args: parallel building breaks interactive debugging
     args.add("--jobs", "auto")
-    args.add("--fresh-env")  # Don't try to use cache files. Bazel can't make use of them.
-    run_args.append("--fresh-env")
-    args.add("--write-all")  # Write all files; don't try to detect "changed" files
-    run_args.append("--write-all")
+
+    if use_cache:
+        args.add("--doctree-dir", paths.join(output_dir.path, ".doctrees"))
+    else:
+        args.add("--fresh-env")  # Don't try to use cache files. Bazel can't make use of them.
+        run_args.append("--fresh-env")
+        args.add("--write-all")  # Write all files; don't try to detect "changed" files
+        run_args.append("--write-all")
 
     for opt in ctx.attr.extra_opts:
         expanded = ctx.expand_location(opt)
@@ -287,8 +302,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
     for define in extra_defines:
         run_args.extend(("--define", define))
 
-    args.add(source_path)
-    args.add(output_dir.path)
 
     env = dict([
         v.split("=", 1)
@@ -299,16 +312,41 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
     for tool in ctx.attr.tools:
         tools.append(tool[DefaultInfo].files_to_run)
 
-    ctx.actions.run(
-        executable = ctx.executable.sphinx,
-        arguments = [args],
-        inputs = inputs,
-        outputs = [output_dir],
-        tools = tools,
-        mnemonic = "SphinxBuildDocs",
-        progress_message = "Sphinx building {} for %{{label}}".format(format),
-        env = env,
-    )
+    if use_cache:
+        worker_arg_file = ctx.actions.declare_file(ctx.attr.name + ".worker_args")
+        ctx.actions.write(
+            output = worker_arg_file,
+            content = args,
+        )
+        all_inputs = depset(
+            direct = [worker_arg_file],
+            transitive = [inputs]
+        )
+        ctx.actions.run(
+            executable = ctx.executable.sphinx,
+            arguments = ["@" + worker_arg_file.path],
+            inputs = all_inputs,
+            outputs = [output_dir],
+            tools = tools,
+            mnemonic = "SphinxBuildDocsCache",
+            progress_message = "Sphinx building {} for %{{label}}".format(format),
+            env = env,
+            execution_requirements = {
+                "supports-workers": "1",
+                "requires-worker-protocol": "json"
+            }
+        )
+    else:
+        ctx.actions.run(
+            executable = ctx.executable.sphinx,
+            arguments = [args],
+            inputs = inputs,
+            outputs = [output_dir],
+            tools = tools,
+            mnemonic = "SphinxBuildDocsNoCache",
+            progress_message = "Sphinx building {} for %{{label}}".format(format),
+            env = env,
+        )
     return output_dir, struct(args = run_args, env = env)
 
 def _sphinx_source_tree_impl(ctx):

@@ -1,8 +1,164 @@
+from pathlib import Path
+
+import argparse
+import json
+import logging
 import os
 import pathlib
 import sys
+import time
+import traceback
+import typing
 
 from sphinx.cmd.build import main
 
+
+WorkRequest = object
+WorkResponse = object
+
+
+parser = argparse.ArgumentParser(
+    fromfile_prefix_chars='@'
+)
+# parser.add_argument('srcdir')
+# parser.add_argument('outdir')
+parser.add_argument("--persistent_worker", action="store_true")
+parser.add_argument("--doctree-dir")
+
+
+class Worker:
+
+    def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"):
+        self._instream = instream
+        self._outstream = outstream
+        self._logger = logging.getLogger("worker")
+        logging.basicConfig(filename='echo.log', encoding='utf-8', level=logging.DEBUG)
+        self._logger.info("starting worker")
+        self._current = {}
+        self._previous = {}
+        self._cache = {}
+
+    def run(self) -> None:
+        try:
+            while True:
+                request = None
+                try:
+                    request = self._get_next_request()
+                    if request is None:
+                        self._logger.info("Empty request: exiting")
+                        break
+                    response = self._process_request(request)
+                    if response:
+                        self._send_response(response)
+                except Exception:
+                    self._logger.exception("Unhandled error: request=%s", request)
+                    output = (
+                        f"Unhandled error:\nRequest: {request}\n"
+                        + traceback.format_exc()
+                    )
+                    request_id = 0 if not request else request.get("requestId", 0)
+                    self._send_response(
+                        {
+                            "exitCode": 3,
+                            "output": output,
+                            "requestId": request_id,
+                        }
+                    )
+        finally:
+            self._logger.info("Worker shutting down")
+
+    def _get_next_request(self) -> "object | None":
+        line = self._instream.readline()
+        if not line:
+            return None
+        return json.loads(line)
+
+    @property
+    def inputs(self):
+        self._previous
+        self._current
+        return self._value
+
+    def _update_digest(self, request):
+        args, unknown = parser.parse_known_args(request["arguments"])
+        # Make room for the new build's data. 
+        self._previous = self._current
+        # Rearrange the new data into a dict to make comparisons easier.
+        self._current = {}
+        for page in request["inputs"]:
+            path = page["path"]
+            self._current[path] = page["digest"]
+        # Compare the content hashes to determine what pages have changed.
+        tmp = []
+        for path in self._current:
+            if path not in self._previous:
+                tmp.append(path)
+                continue
+            if self._current[path] != self._previous[path]:
+                tmp.append(path)
+                continue
+        for path in self._previous:
+            if path not in self._current:
+                tmp.append(path)
+                continue
+        # Normalize the paths into docnames
+        digest = []
+        for path in tmp:
+            if not path.endswith(".rst"):
+                continue
+            srcdir = self.args[0]
+            docname = path.replace(srcdir + "/", "")
+            docname = docname.replace(".rst", "")
+            digest.append(docname)
+        args, unknown = parser.parse_known_args(self.args)
+        # Save the digest.
+        doctree_dir = Path(args.doctree_dir)
+        # On a fresh build, _restore_cache() does nothing, so this dir won't exist yet.
+        if not doctree_dir.is_dir():
+            doctree_dir.mkdir(parents=True)
+        with open(doctree_dir / Path("digest.json"), "w") as f:
+            json.dump(digest, f, indent=2)
+
+    def _restore_cache(self):
+        for filepath in self._cache:
+            data = self._cache[filepath]
+            parent = Path(os.path.dirname(filepath))
+            if not parent.is_dir():
+                parent.mkdir(parents=True)
+            with open(filepath, "wb") as f:
+                f.write(data)
+
+    def _update_cache(self):
+        args, unknown = parser.parse_known_args(self.args)
+        self._cache = {}
+        for root, _, files in os.walk(args.doctree_dir):
+            for filename in files:
+                filepath = Path(root) / Path(filename)
+                with open(filepath, "rb") as f:
+                    self._cache[str(filepath)] = f.read()
+
+    def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
+        if request.get("cancel"):
+            return None
+        self.args = request["arguments"]
+        self._restore_cache()
+        self._update_digest(request)
+        main(self.args)
+        self._update_cache()
+        response = {
+            "requestId": request.get("requestId", 0),
+            "exitCode": 0,
+        }
+        return response
+
+    def _send_response(self, response: "WorkResponse") -> None:
+        self._outstream.write(json.dumps(response) + "\n")
+        self._outstream.flush()
+
+
 if __name__ == "__main__":
-    sys.exit(main())
+    args, unknown = parser.parse_known_args()
+    if args.persistent_worker:
+        Worker(sys.stdin, sys.stdout).run()
+    else:
+        sys.exit(main())