Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions sphinxdocs/private/sphinx.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def sphinx_docs(
strip_prefix = "",
extra_opts = [],
tools = [],
use_persistent_workers = False,
**kwargs):
"""Generate docs using Sphinx.

Expand Down Expand Up @@ -165,6 +166,7 @@ def sphinx_docs(
source_tree = internal_name + "/_sources",
extra_opts = extra_opts,
tools = tools,
use_persistent_workers = use_persistent_workers,
**kwargs
)

Expand Down Expand Up @@ -209,6 +211,7 @@ def _sphinx_docs_impl(ctx):
source_path = source_dir_path,
output_prefix = paths.join(ctx.label.name, "_build"),
inputs = inputs,
use_persistent_workers = ctx.attr.use_persistent_workers,
)
outputs[format] = output_dir
per_format_args[format] = args_env
Expand Down Expand Up @@ -240,42 +243,57 @@ _sphinx_docs = rule(
),
"sphinx": attr.label(
executable = True,
cfg = "exec",
cfg = "host",
mandatory = True,
doc = "Sphinx binary to generate documentation.",
),
"tools": attr.label_list(
cfg = "exec",
doc = "Additional tools that are used by Sphinx and its plugins.",
),
"use_persistent_workers": attr.bool(
doc = "TODO",
default = False,
),
"_extra_defines_flag": attr.label(default = "//sphinxdocs:extra_defines"),
"_extra_env_flag": attr.label(default = "//sphinxdocs:extra_env"),
"_quiet_flag": attr.label(default = "//sphinxdocs:quiet"),
},
)

def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_workers):
output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format))

run_args = [] # Copy of the args to forward along to debug runner
args = ctx.actions.args() # Args passed to the action

args.add(source_path)
args.add(output_dir.path)

args.add("--show-traceback") # Full tracebacks on error
run_args.append("--show-traceback")
args.add("--builder", format)
run_args.extend(("--builder", format))
args.add(format, format = "--builder=%s")
run_args.append("--builder={}".format(format))

if ctx.attr._quiet_flag[BuildSettingInfo].value:
# Not added to run_args because run_args is for debugging
args.add("--quiet") # Suppress stdout informational text

# Build in parallel, if possible
# Don't add to run_args: parallel building breaks interactive debugging
args.add("--jobs", "auto")
args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them.
run_args.append("--fresh-env")
args.add("--write-all") # Write all files; don't try to detect "changed" files
run_args.append("--write-all")
args.add("--jobs=auto")

if use_persistent_workers:
# * Normally Sphinx puts doctrees in the output dir. We can't do that
# because Bazel will clear the output directory every invocation.
# * Use a non-dot prefixed name so it shows up more visibly.
args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s")

else:
# These aren't added to run_args because we assume direct invocations
# will add them if necessary.
args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them.
args.add("--write-all") # Write all files; don't try to detect "changed" files

for opt in ctx.attr.extra_opts:
expanded = ctx.expand_location(opt)
Expand All @@ -287,9 +305,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
for define in extra_defines:
run_args.extend(("--define", define))

args.add(source_path)
args.add(output_dir.path)

env = dict([
v.split("=", 1)
for v in ctx.attr._extra_env_flag[_FlagInfo].value
Expand All @@ -299,6 +314,13 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
for tool in ctx.attr.tools:
tools.append(tool[DefaultInfo].files_to_run)

execution_requirements = {}
if use_persistent_workers:
args.use_param_file("@%s", use_always = True)
args.set_param_file_format("multiline")
execution_requirements["supports-workers"] = "1"
execution_requirements["requires-worker-protocol"] = "json"

ctx.actions.run(
executable = ctx.executable.sphinx,
arguments = [args],
Expand All @@ -308,6 +330,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
mnemonic = "SphinxBuildDocs",
progress_message = "Sphinx building {} for %{{label}}".format(format),
env = env,
execution_requirements = execution_requirements,
)
return output_dir, struct(args = run_args, env = env)

Expand Down
219 changes: 218 additions & 1 deletion sphinxdocs/private/sphinx_build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,225 @@
from pathlib import Path

import shutil
import contextlib
import argparse
import json
import logging
import os
import pathlib
import sys
import time
import traceback
import typing
import io
import sphinx.application

from sphinx.cmd.build import main


WorkRequest = object
WorkResponse = object

logger = logging.getLogger('sphinxdocs-build')

_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext"

class Worker:

def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str):
# NOTE: Sphinx performs its own logging re-configuration, so any
# logging config we do isn't respected by Sphinx. Controlling where
# stdout and stderr goes are the main mechanisms. Recall that
# Bazel send worker stderr to the worker log file.
# outputBase=$(bazel info output_base)
# find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
logging.basicConfig(level=logging.DEBUG)
logger.info("initializing worker")

# The directory that paths are relative to.
self._exec_root = exec_root
# Where requests are read from.
self._instream = instream
# Where responses are written to.
self._outstream = outstream

# dict[str srcdir, dict[str path, str digest]]
self._digests = {}

# Internal output directories the worker gives to Sphinx that need
# to be cleaned up upon exit.
# set[str path]
self._worker_outdirs = set()
self._extension = BazelWorkerExtension()

sys.modules[_WORKER_SPHINX_EXT_MODULE_NAME] = self._extension
sphinx.application.builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME,)

def __enter__(self):
return self

def __exit__(self):
for worker_outdir in self._worker_outdirs:
shutil.rmtree(worker_outdir, ignore_errors=True)

def run(self) -> None:
logger.info("Worker started")
try:
while True:
request = None
try:
request = self._get_next_request()
if request is None:
logger.info("Empty request: exiting")
break
response = self._process_request(request)
if response:
self._send_response(response)
except Exception:
logger.exception("Unhandled error: request=%s", request)
output = (
f"Unhandled error:\nRequest: {request}\n"
+ traceback.format_exc()
)
request_id = 0 if not request else request.get("requestId", 0)
self._send_response(
{
"exitCode": 3,
"output": output,
"requestId": request_id,
}
)
finally:
logger.info("Worker shutting down")

def _get_next_request(self) -> "object | None":
line = self._instream.readline()
if not line:
return None
return json.loads(line)

def _send_response(self, response: "WorkResponse") -> None:
self._outstream.write(json.dumps(response) + "\n")
self._outstream.flush()

def _prepare_sphinx(self, request):
sphinx_args = request["arguments"]
srcdir = sphinx_args[0]

incoming_digests = {}
current_digests = self._digests.setdefault(srcdir, {})
changed_paths = []
request_info = {
"exec_root": self._exec_root,
"inputs": request["inputs"]
}
for entry in request["inputs"]:
path = entry["path"]
digest = entry["digest"]

##mtime = pathlib.Path(path).stat().st_mtime
##logger.info("incoming path %s mtime: %s", path, mtime)
### Sphinx appears to treat 0 mtime as always changed
##os.utime(path, (100, 100))

# Make the path srcdir-relative so Sphinx understands it.
path = path.removeprefix(srcdir + "/")
incoming_digests[path] = digest

if path not in current_digests:
logger.info("path %s new", path)
changed_paths.append(path)
elif current_digests[path] != digest:
logger.info("path %s changed", path)
changed_paths.append(path)

self._digests[srcdir] = incoming_digests
self._extension.changed_paths = changed_paths
request_info["changed_sources"] = changed_paths

bazel_outdir = sphinx_args[1]
worker_outdir = bazel_outdir + ".worker-out.d"
self._worker_outdirs.add(worker_outdir)
sphinx_args[1] = worker_outdir

request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json")
with open(request_info_path, "w") as fp:
json.dump(request_info, fp)
sphinx_args.append(f"--define=bazel_worker_request_info={request_info_path}")

return worker_outdir, bazel_outdir, sphinx_args

@contextlib.contextmanager
def _redirect_streams(self):
out = io.StringIO()
orig_stdout = sys.stdout
try:
sys.stdout = out
yield out
finally:
sys.stdout = orig_stdout

def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2))
if request.get("cancel"):
return None

worker_outdir, bazel_outdir, sphinx_args = self._prepare_sphinx(request)

# Prevent anything from going to stdout because it breaks the worker
# protocol. We have limited control over where Sphinx sends output.
with self._redirect_streams() as stdout:
logger.info("main args: %s", sphinx_args)
exit_code = main(sphinx_args)

if exit_code:
raise Exception(
"Sphinx main() returned failure: " +
f" exit code: {exit_code}\n" +
"========== STDOUT START ==========\n" +
stdout.getvalue().rstrip("\n") + "\n" +
"========== STDOUT END ==========\n"
)

# Copying is unfortunately necessary because Bazel doesn't know to
# implicily bring along what the symlinks point to.
shutil.copytree(worker_outdir, bazel_outdir, dirs_exist_ok=True)

response = {
"requestId": request.get("requestId", 0),
"output": stdout.getvalue(),
"exitCode": 0,
}
return response



# todo: make this parallel-safe
class BazelWorkerExtension:
def __init__(self):
self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME
# set[str] of src-dir relative path names
self.changed_paths = set()

def setup(self, app):
app.connect('env-get-outdated', self._handle_env_get_outdated)
return {
"parallel_read_safe": True,
"parallel_write_safe": True
}

def _handle_env_get_outdated(self, app, env, added, changed, removed):
changed = {
# NOTE: path2doc returns None if it's not a doc path
env.path2doc(p) for p in self.changed_paths
}
logger.info("changed docs: %s", changed)
return changed


if __name__ == "__main__":
sys.exit(main())
if '--persistent_worker' in sys.argv:
with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker:
sys.exit(worker.run())
else:
sys.exit(main())
1 change: 1 addition & 0 deletions sphinxdocs/tests/sphinx_docs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ sphinx_docs(
formats = ["html"],
sphinx = ":sphinx-build",
target_compatible_with = _TARGET_COMPATIBLE_WITH,
use_persistent_workers = True,
)

gen_directory(
Expand Down
2 changes: 1 addition & 1 deletion sphinxdocs/tests/sphinx_docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# -- Project info

project = "Sphinx Docs Test"
project = "Sphinx Docs Test xx"

extensions = [
"myst_parser",
Expand Down
4 changes: 4 additions & 0 deletions sphinxdocs/tests/sphinx_docs/doc1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# doc1

hello doc 1
x
4 changes: 4 additions & 0 deletions sphinxdocs/tests/sphinx_docs/doc2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# doc 2


hello doc 3