Skip to content

Commit ce46a7f

Browse files
authored
Merge branch 'main' into aaron_update_setuptools
2 parents 2fe0132 + 0498664 commit ce46a7f

File tree

5 files changed

+302
-13
lines changed

5 files changed

+302
-13
lines changed

sphinxdocs/docs/index.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,29 @@ documentation. It comes with:
1111
While it is primarily oriented towards docgen for Starlark code, the core of it
1212
is agnostic as to what is being documented.
1313

14+
### Optimization
15+
16+
Normally, Sphinx keeps various cache files to improve incremental building.
17+
Unfortunately, programs performing their own caching don't interact well
18+
with Bazel's model of precisely declaring and strictly enforcing what are
19+
inputs, what are outputs, and what files are available when running a program.
20+
The net effect is programs don't have a prior invocation's cache files
21+
available.
22+
23+
There are two mechanisms available to make some cache available to Sphinx under
24+
Bazel:
25+
26+
* Disable sandboxing, which allows some files from prior invocations to be
27+
visible to subsequent invocations. This can be done multiple ways:
28+
* Set `tags = ["no-sandbox"]` on the `sphinx_docs` target
29+
* `--modify_execution_info=SphinxBuildDocs=+no-sandbox` (Bazel flag)
30+
* `--strategy=SphinxBuildDocs=local` (Bazel flag)
31+
* Use persistent workers (enabled by default) by setting
32+
`allow_persistent_workers=True` on the `sphinx_docs` target. Note that other
33+
Bazel flags can disable using workers even if an action supports it. Setting
34+
`--strategy=SphinxBuildDocs=dynamic,worker,local,sandbox` should tell Bazel
35+
to use workers if possible, otherwise fallback to non-worker invocations.
36+
1437

1538
```{toctree}
1639
:hidden:

sphinxdocs/private/sphinx.bzl

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def sphinx_docs(
103103
strip_prefix = "",
104104
extra_opts = [],
105105
tools = [],
106+
allow_persistent_workers = True,
106107
**kwargs):
107108
"""Generate docs using Sphinx.
108109
@@ -142,6 +143,9 @@ def sphinx_docs(
142143
tools: {type}`list[label]` Additional tools that are used by Sphinx and its plugins.
143144
This just makes the tools available during Sphinx execution. To locate
144145
them, use {obj}`extra_opts` and `$(location)`.
146+
allow_persistent_workers: {type}`bool` (experimental) If true, allow
147+
using persistent workers for running Sphinx, if Bazel decides to do so.
148+
This can improve incremental building of docs.
145149
**kwargs: {type}`dict` Common attributes to pass onto rules.
146150
"""
147151
add_tag(kwargs, "@rules_python//sphinxdocs:sphinx_docs")
@@ -165,6 +169,7 @@ def sphinx_docs(
165169
source_tree = internal_name + "/_sources",
166170
extra_opts = extra_opts,
167171
tools = tools,
172+
allow_persistent_workers = allow_persistent_workers,
168173
**kwargs
169174
)
170175

@@ -209,6 +214,7 @@ def _sphinx_docs_impl(ctx):
209214
source_path = source_dir_path,
210215
output_prefix = paths.join(ctx.label.name, "_build"),
211216
inputs = inputs,
217+
allow_persistent_workers = ctx.attr.allow_persistent_workers,
212218
)
213219
outputs[format] = output_dir
214220
per_format_args[format] = args_env
@@ -229,6 +235,10 @@ def _sphinx_docs_impl(ctx):
229235
_sphinx_docs = rule(
230236
implementation = _sphinx_docs_impl,
231237
attrs = {
238+
"allow_persistent_workers": attr.bool(
239+
doc = "(experimental) Whether to invoke Sphinx as a persistent worker.",
240+
default = False,
241+
),
232242
"extra_opts": attr.string_list(
233243
doc = "Additional options to pass onto Sphinx. These are added after " +
234244
"other options, but before the source/output args.",
@@ -254,28 +264,45 @@ _sphinx_docs = rule(
254264
},
255265
)
256266

257-
def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
267+
def _run_sphinx(ctx, format, source_path, inputs, output_prefix, allow_persistent_workers):
258268
output_dir = ctx.actions.declare_directory(paths.join(output_prefix, format))
259269

260270
run_args = [] # Copy of the args to forward along to debug runner
261271
args = ctx.actions.args() # Args passed to the action
262272

273+
# An args file is required for persistent workers, but we don't know if
274+
# the action will use worker mode or not (settings we can't see may
275+
# force non-worker mode). For consistency, always use a params file.
276+
args.use_param_file("@%s", use_always = True)
277+
args.set_param_file_format("multiline")
278+
279+
# NOTE: sphinx_build.py relies on the first two args being the srcdir and
280+
# outputdir, in that order.
281+
args.add(source_path)
282+
args.add(output_dir.path)
283+
263284
args.add("--show-traceback") # Full tracebacks on error
264285
run_args.append("--show-traceback")
265-
args.add("--builder", format)
266-
run_args.extend(("--builder", format))
286+
args.add(format, format = "--builder=%s")
287+
run_args.append("--builder={}".format(format))
267288

268289
if ctx.attr._quiet_flag[BuildSettingInfo].value:
269290
# Not added to run_args because run_args is for debugging
270291
args.add("--quiet") # Suppress stdout informational text
271292

272293
# Build in parallel, if possible
273294
# Don't add to run_args: parallel building breaks interactive debugging
274-
args.add("--jobs", "auto")
275-
args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them.
276-
run_args.append("--fresh-env")
277-
args.add("--write-all") # Write all files; don't try to detect "changed" files
278-
run_args.append("--write-all")
295+
args.add("--jobs=auto")
296+
297+
# Put the doctree dir outside of the output directory.
298+
# This allows it to be reused between invocations when possible; Bazel
299+
# clears the output directory every action invocation.
300+
# * For workers, they can fully re-use it.
301+
# * For non-workers, it can be reused when sandboxing is disabled via
302+
# the `no-sandbox` tag or execution requirement.
303+
#
304+
# We also use a non-dot prefixed name so it shows up more visibly.
305+
args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s")
279306

280307
for opt in ctx.attr.extra_opts:
281308
expanded = ctx.expand_location(opt)
@@ -287,9 +314,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
287314
for define in extra_defines:
288315
run_args.extend(("--define", define))
289316

290-
args.add(source_path)
291-
args.add(output_dir.path)
292-
293317
env = dict([
294318
v.split("=", 1)
295319
for v in ctx.attr._extra_env_flag[_FlagInfo].value
@@ -299,6 +323,14 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
299323
for tool in ctx.attr.tools:
300324
tools.append(tool[DefaultInfo].files_to_run)
301325

326+
# NOTE: Command line flags or RBE capabilities may override the execution
327+
# requirements and disable workers. Thus, we can't assume that these
328+
# exec requirements will actually be respected.
329+
execution_requirements = {}
330+
if allow_persistent_workers:
331+
execution_requirements["supports-workers"] = "1"
332+
execution_requirements["requires-worker-protocol"] = "json"
333+
302334
ctx.actions.run(
303335
executable = ctx.executable.sphinx,
304336
arguments = [args],
@@ -308,6 +340,7 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix):
308340
mnemonic = "SphinxBuildDocs",
309341
progress_message = "Sphinx building {} for %{{label}}".format(format),
310342
env = env,
343+
execution_requirements = execution_requirements,
311344
)
312345
return output_dir, struct(args = run_args, env = env)
313346

sphinxdocs/private/sphinx_build.py

Lines changed: 229 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,235 @@
1+
import contextlib
2+
import io
3+
import json
4+
import logging
15
import os
2-
import pathlib
6+
import shutil
37
import sys
8+
import traceback
9+
import typing
410

11+
import sphinx.application
512
from sphinx.cmd.build import main
613

14+
WorkRequest = object
15+
WorkResponse = object
16+
17+
logger = logging.getLogger("sphinxdocs_build")
18+
19+
_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext"
20+
21+
# Config value name for getting the path to the request info file
22+
_REQUEST_INFO_CONFIG_NAME = "bazel_worker_request_info_path"
23+
24+
25+
class Worker:
26+
27+
def __init__(
28+
self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str
29+
):
30+
# NOTE: Sphinx performs its own logging re-configuration, so any
31+
# logging config we do isn't respected by Sphinx. Controlling where
32+
# stdout and stderr goes are the main mechanisms. Recall that
33+
# Bazel send worker stderr to the worker log file.
34+
# outputBase=$(bazel info output_base)
35+
# find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
36+
logging.basicConfig(level=logging.WARN)
37+
logger.info("Initializing worker")
38+
39+
# The directory that paths are relative to.
40+
self._exec_root = exec_root
41+
# Where requests are read from.
42+
self._instream = instream
43+
# Where responses are written to.
44+
self._outstream = outstream
45+
46+
# dict[str srcdir, dict[str path, str digest]]
47+
self._digests = {}
48+
49+
# Internal output directories the worker gives to Sphinx that need
50+
# to be cleaned up upon exit.
51+
# set[str path]
52+
self._worker_outdirs = set()
53+
self._extension = BazelWorkerExtension()
54+
55+
sys.modules[_WORKER_SPHINX_EXT_MODULE_NAME] = self._extension
56+
sphinx.application.builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME,)
57+
58+
def __enter__(self):
59+
return self
60+
61+
def __exit__(self):
62+
for worker_outdir in self._worker_outdirs:
63+
shutil.rmtree(worker_outdir, ignore_errors=True)
64+
65+
def run(self) -> None:
66+
logger.info("Worker started")
67+
try:
68+
while True:
69+
request = None
70+
try:
71+
request = self._get_next_request()
72+
if request is None:
73+
logger.info("Empty request: exiting")
74+
break
75+
response = self._process_request(request)
76+
if response:
77+
self._send_response(response)
78+
except Exception:
79+
logger.exception("Unhandled error: request=%s", request)
80+
output = (
81+
f"Unhandled error:\nRequest id: {request.get('id')}\n"
82+
+ traceback.format_exc()
83+
)
84+
request_id = 0 if not request else request.get("requestId", 0)
85+
self._send_response(
86+
{
87+
"exitCode": 3,
88+
"output": output,
89+
"requestId": request_id,
90+
}
91+
)
92+
finally:
93+
logger.info("Worker shutting down")
94+
95+
def _get_next_request(self) -> "object | None":
96+
line = self._instream.readline()
97+
if not line:
98+
return None
99+
return json.loads(line)
100+
101+
def _send_response(self, response: "WorkResponse") -> None:
102+
self._outstream.write(json.dumps(response) + "\n")
103+
self._outstream.flush()
104+
105+
def _prepare_sphinx(self, request):
106+
sphinx_args = request["arguments"]
107+
srcdir = sphinx_args[0]
108+
109+
incoming_digests = {}
110+
current_digests = self._digests.setdefault(srcdir, {})
111+
changed_paths = []
112+
request_info = {"exec_root": self._exec_root, "inputs": request["inputs"]}
113+
for entry in request["inputs"]:
114+
path = entry["path"]
115+
digest = entry["digest"]
116+
# Make the path srcdir-relative so Sphinx understands it.
117+
path = path.removeprefix(srcdir + "/")
118+
incoming_digests[path] = digest
119+
120+
if path not in current_digests:
121+
logger.info("path %s new", path)
122+
changed_paths.append(path)
123+
elif current_digests[path] != digest:
124+
logger.info("path %s changed", path)
125+
changed_paths.append(path)
126+
127+
self._digests[srcdir] = incoming_digests
128+
self._extension.changed_paths = changed_paths
129+
request_info["changed_sources"] = changed_paths
130+
131+
bazel_outdir = sphinx_args[1]
132+
worker_outdir = bazel_outdir + ".worker-out.d"
133+
self._worker_outdirs.add(worker_outdir)
134+
sphinx_args[1] = worker_outdir
135+
136+
request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json")
137+
with open(request_info_path, "w") as fp:
138+
json.dump(request_info, fp)
139+
sphinx_args.append(f"--define={_REQUEST_INFO_CONFIG_NAME}={request_info_path}")
140+
141+
return worker_outdir, bazel_outdir, sphinx_args
142+
143+
@contextlib.contextmanager
144+
def _redirect_streams(self):
145+
out = io.StringIO()
146+
orig_stdout = sys.stdout
147+
try:
148+
sys.stdout = out
149+
yield out
150+
finally:
151+
sys.stdout = orig_stdout
152+
153+
def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
154+
logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2))
155+
if request.get("cancel"):
156+
return None
157+
158+
worker_outdir, bazel_outdir, sphinx_args = self._prepare_sphinx(request)
159+
160+
# Prevent anything from going to stdout because it breaks the worker
161+
# protocol. We have limited control over where Sphinx sends output.
162+
with self._redirect_streams() as stdout:
163+
logger.info("main args: %s", sphinx_args)
164+
exit_code = main(sphinx_args)
165+
166+
if exit_code:
167+
raise Exception(
168+
"Sphinx main() returned failure: "
169+
+ f" exit code: {exit_code}\n"
170+
+ "========== STDOUT START ==========\n"
171+
+ stdout.getvalue().rstrip("\n")
172+
+ "\n"
173+
+ "========== STDOUT END ==========\n"
174+
)
175+
176+
# Copying is unfortunately necessary because Bazel doesn't know to
177+
# implicily bring along what the symlinks point to.
178+
shutil.copytree(worker_outdir, bazel_outdir, dirs_exist_ok=True)
179+
180+
response = {
181+
"requestId": request.get("requestId", 0),
182+
"output": stdout.getvalue(),
183+
"exitCode": 0,
184+
}
185+
return response
186+
187+
188+
class BazelWorkerExtension:
189+
"""A Sphinx extension implemented as a class acting like a module."""
190+
191+
def __init__(self):
192+
# Make it look like a Module object
193+
self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME
194+
# set[str] of src-dir relative path names
195+
self.changed_paths = set()
196+
197+
def setup(self, app):
198+
app.add_config_value(_REQUEST_INFO_CONFIG_NAME, "", "")
199+
app.connect("env-get-outdated", self._handle_env_get_outdated)
200+
return {"parallel_read_safe": True, "parallel_write_safe": True}
201+
202+
def _handle_env_get_outdated(self, app, env, added, changed, removed):
203+
changed = {
204+
# NOTE: path2doc returns None if it's not a doc path
205+
env.path2doc(p)
206+
for p in self.changed_paths
207+
}
208+
209+
logger.info("changed docs: %s", changed)
210+
return changed
211+
212+
213+
def _worker_main(stdin, stdout, exec_root):
214+
with Worker(stdin, stdout, exec_root) as worker:
215+
return worker.run()
216+
217+
218+
def _non_worker_main():
219+
args = []
220+
for arg in sys.argv:
221+
if arg.startswith("@"):
222+
with open(arg.removeprefix("@")) as fp:
223+
lines = [line.strip() for line in fp if line.strip()]
224+
args.extend(lines)
225+
else:
226+
args.append(arg)
227+
sys.argv[:] = args
228+
return main()
229+
230+
7231
if __name__ == "__main__":
8-
sys.exit(main())
232+
if "--persistent_worker" in sys.argv:
233+
sys.exit(_worker_main(sys.stdin, sys.stdout, os.getcwd()))
234+
else:
235+
sys.exit(_non_worker_main())

0 commit comments

Comments
 (0)