Skip to content

Commit 3420416

Browse files
committed
generate info file for extensions to use. also cleanup
1 parent b497e62 commit 3420416

File tree

4 files changed

+154
-129
lines changed

4 files changed

+154
-129
lines changed

sphinxdocs/private/sphinx.bzl

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -275,24 +275,25 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_
275275
args.add(format, format = "--builder=%s")
276276
run_args.append("--builder={}".format(format))
277277

278-
##if ctx.attr._quiet_flag[BuildSettingInfo].value:
279-
## # Not added to run_args because run_args is for debugging
280-
## args.add("--quiet") # Suppress stdout informational text
278+
if ctx.attr._quiet_flag[BuildSettingInfo].value:
279+
# Not added to run_args because run_args is for debugging
280+
args.add("--quiet") # Suppress stdout informational text
281281

282282
# Build in parallel, if possible
283283
# Don't add to run_args: parallel building breaks interactive debugging
284284
args.add("--jobs=auto")
285285

286286
if use_persistent_workers:
287-
# Sphinx normally uses `.doctrees`, but we use underscore so it isn't
288-
# hidden by default
287+
# * Normally Sphinx puts doctrees in the output dir. We can't do that
288+
# because Bazel will clear the output directory every invocation.
289+
# * Use a non-dot prefixed name so it shows up more visibly.
289290
args.add(paths.join(output_dir.path + "_doctrees"), format = "--doctree-dir=%s")
290291

291292
else:
293+
# These aren't added to run_args because we assume direct invocations
294+
# will add them if necessary.
292295
args.add("--fresh-env") # Don't try to use cache files. Bazel can't make use of them.
293-
run_args.append("--fresh-env")
294296
args.add("--write-all") # Write all files; don't try to detect "changed" files
295-
run_args.append("--write-all")
296297

297298
for opt in ctx.attr.extra_opts:
298299
expanded = ctx.expand_location(opt)
@@ -315,9 +316,6 @@ def _run_sphinx(ctx, format, source_path, inputs, output_prefix, use_persistent_
315316

316317
execution_requirements = {}
317318
if use_persistent_workers:
318-
args.add("-v")
319-
args.add("-v")
320-
args.add("-v")
321319
args.use_param_file("@%s", use_always = True)
322320
args.set_param_file_format("multiline")
323321
execution_requirements["supports-workers"] = "1"

sphinxdocs/private/sphinx_build.py

Lines changed: 146 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from pathlib import Path
22

3+
import shutil
4+
import contextlib
35
import argparse
46
import json
57
import logging
@@ -9,46 +11,59 @@
911
import time
1012
import traceback
1113
import typing
14+
import io
15+
import sphinx.application
1216

1317
from sphinx.cmd.build import main
1418

1519

1620
WorkRequest = object
1721
WorkResponse = object
1822

19-
20-
parser = argparse.ArgumentParser(
21-
fromfile_prefix_chars='@'
22-
)
23-
# parser.add_argument('srcdir')
24-
# parser.add_argument('outdir')
25-
parser.add_argument("--persistent_worker", action="store_true")
26-
##parser.add_argument("--doctree-dir")
27-
2823
logger = logging.getLogger('sphinxdocs-build')
2924

25+
_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext"
26+
3027
class Worker:
3128

32-
def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO"):
29+
def __init__(self, instream: "typing.TextIO", outstream: "typing.TextIO", exec_root: str):
30+
# NOTE: Sphinx performs its own logging re-configuration, so any
31+
# logging config we do isn't respected by Sphinx. Controlling where
32+
# stdout and stderr goes are the main mechanisms. Recall that
33+
# Bazel send worker stderr to the worker log file.
34+
# outputBase=$(bazel info output_base)
35+
# find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
36+
logging.basicConfig(level=logging.DEBUG)
37+
logger.info("initializing worker")
38+
39+
# The directory that paths are relative to.
40+
self._exec_root = exec_root
41+
# Where requests are read from.
3342
self._instream = instream
43+
# Where responses are written to.
3444
self._outstream = outstream
35-
# Annoying. Sphinx resets its loging config as part of main()
36-
# and the Sphinx() app setup/invocation. So any logging we try
37-
# to setup here to get info out of sphinx is meaningless.
38-
# -v -v -v will output more logging, but to stderr/stdout, and thus
39-
# bazel's worker log file, due to sphinx's logging re-configuration.
40-
# one-liner to get most recent worker log:
41-
# find $workerLogDir -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
42-
logging.basicConfig(
43-
##filename='/tmp/sphinx-builder.log', encoding='utf-8',
44-
level=logging.DEBUG
45-
)
46-
logger.info("starting worker")
47-
self._current = {}
48-
self._previous = {}
49-
self._cache = {}
45+
46+
# dict[str srcdir, dict[str path, str digest]]
47+
self._digests = {}
48+
49+
# Internal output directories the worker gives to Sphinx that need
50+
# to be cleaned up upon exit.
51+
# set[str path]
52+
self._worker_outdirs = set()
53+
self._extension = BazelWorkerExtension()
54+
55+
sys.modules[_WORKER_SPHINX_EXT_MODULE_NAME] = self._extension
56+
sphinx.application.builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME,)
57+
58+
def __enter__(self):
59+
return self
60+
61+
def __exit__(self):
62+
for worker_outdir in self._worker_outdirs:
63+
shutil.rmtree(worker_outdir, ignore_errors=True)
5064

5165
def run(self) -> None:
66+
logger.info("Worker started")
5267
try:
5368
while True:
5469
request = None
@@ -58,7 +73,6 @@ def run(self) -> None:
5873
logger.info("Empty request: exiting")
5974
break
6075
response = self._process_request(request)
61-
logger.info("response:%s", response)
6276
if response:
6377
self._send_response(response)
6478
except Exception:
@@ -84,101 +98,128 @@ def _get_next_request(self) -> "object | None":
8498
return None
8599
return json.loads(line)
86100

87-
@property
88-
def inputs(self):
89-
self._previous
90-
self._current
91-
return self._value
92-
93-
def _update_digest(self, request):
94-
args, unknown = parser.parse_known_args(request["arguments"])
95-
# Make room for the new build's data.
96-
self._previous = self._current
97-
# Rearrange the new data into a dict to make comparisons easier.
98-
self._current = {}
99-
for page in request["inputs"]:
100-
path = page["path"]
101-
self._current[path] = page["digest"]
102-
logger.info("path mtime: %s", pathlib.Path(path).stat().st_mtime)
103-
# Compare the content hashes to determine what pages have changed.
101+
def _send_response(self, response: "WorkResponse") -> None:
102+
self._outstream.write(json.dumps(response) + "\n")
103+
self._outstream.flush()
104+
105+
def _prepare_sphinx(self, request):
106+
sphinx_args = request["arguments"]
107+
srcdir = sphinx_args[0]
108+
109+
incoming_digests = {}
110+
current_digests = self._digests.setdefault(srcdir, {})
104111
changed_paths = []
105-
for path in self._current:
106-
if path not in self._previous:
107-
changed_paths.append(path)
108-
continue
109-
if self._current[path] != self._previous[path]:
112+
request_info = {
113+
"exec_root": self._exec_root,
114+
"inputs": request["inputs"]
115+
}
116+
for entry in request["inputs"]:
117+
path = entry["path"]
118+
digest = entry["digest"]
119+
120+
##mtime = pathlib.Path(path).stat().st_mtime
121+
##logger.info("incoming path %s mtime: %s", path, mtime)
122+
### Sphinx appears to treat 0 mtime as always changed
123+
##os.utime(path, (100, 100))
124+
125+
# Make the path srcdir-relative so Sphinx understands it.
126+
path = path.removeprefix(srcdir + "/")
127+
incoming_digests[path] = digest
128+
129+
if path not in current_digests:
130+
logger.info("path %s new", path)
110131
changed_paths.append(path)
111-
continue
112-
for path in self._previous:
113-
if path not in self._current:
132+
elif current_digests[path] != digest:
133+
logger.info("path %s changed", path)
114134
changed_paths.append(path)
115-
continue
116-
# Normalize the paths into docnames
117-
digest = []
118-
for path in changed_paths:
119-
logger.info("Changed: %s", path)
120-
if not path.endswith(".rst"):
121-
continue
122-
srcdir = self.args[0]
123-
docname = path.replace(srcdir + "/", "")
124-
docname = docname.replace(".rst", "")
125-
digest.append(docname)
126-
args, unknown = parser.parse_known_args(self.args)
127-
### Save the digest.
128-
##doctree_dir = Path(args.doctree_dir)
129-
### On a fresh build, _restore_cache() does nothing, so this dir won't exist yet.
130-
##if not doctree_dir.is_dir():
131-
## doctree_dir.mkdir(parents=True)
132-
##with open(doctree_dir / Path("digest.json"), "w") as f:
133-
## json.dump(digest, f, indent=2)
134-
135-
def _restore_cache(self):
136-
for filepath in self._cache:
137-
data = self._cache[filepath]
138-
parent = Path(os.path.dirname(filepath))
139-
if not parent.is_dir():
140-
parent.mkdir(parents=True)
141-
with open(filepath, "wb") as f:
142-
f.write(data)
143-
144-
def _update_cache(self):
145-
args, unknown = parser.parse_known_args(self.args)
146-
self._cache = {}
147-
for root, _, files in os.walk(args.doctree_dir):
148-
for filename in files:
149-
filepath = Path(root) / Path(filename)
150-
with open(filepath, "rb") as f:
151-
self._cache[str(filepath)] = f.read()
152135

153-
def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
154-
logger.info("request:%s", json.dumps(request, sort_keys=True, indent=2))
155-
if request.get("cancel"):
156-
return None
157-
self.args = request["arguments"]
158-
##self._restore_cache()
159-
##self._update_digest(request)
160-
logger.info("main: %s", self.args)
136+
self._digests[srcdir] = incoming_digests
137+
self._extension.changed_paths = changed_paths
138+
request_info["changed_sources"] = changed_paths
139+
140+
bazel_outdir = sphinx_args[1]
141+
worker_outdir = bazel_outdir + ".worker-out.d"
142+
self._worker_outdirs.add(worker_outdir)
143+
sphinx_args[1] = worker_outdir
144+
145+
request_info_path = os.path.join(srcdir, "_bazel_worker_request_info.json")
146+
with open(request_info_path, "w") as fp:
147+
json.dump(request_info, fp)
148+
sphinx_args.append(f"--define=bazel_worker_request_info={request_info_path}")
149+
150+
return worker_outdir, bazel_outdir, sphinx_args
151+
152+
@contextlib.contextmanager
153+
def _redirect_streams(self):
154+
out = io.StringIO()
161155
orig_stdout = sys.stdout
162-
sys.stdout = sys.stderr
163156
try:
164-
main(self.args)
157+
sys.stdout = out
158+
yield out
165159
finally:
166160
sys.stdout = orig_stdout
167-
##self._update_cache()
161+
162+
def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
163+
logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2))
164+
if request.get("cancel"):
165+
return None
166+
167+
worker_outdir, bazel_outdir, sphinx_args = self._prepare_sphinx(request)
168+
169+
# Prevent anything from going to stdout because it breaks the worker
170+
# protocol. We have limited control over where Sphinx sends output.
171+
with self._redirect_streams() as stdout:
172+
logger.info("main args: %s", sphinx_args)
173+
exit_code = main(sphinx_args)
174+
175+
if exit_code:
176+
raise Exception(
177+
"Sphinx main() returned failure: " +
178+
f" exit code: {exit_code}\n" +
179+
"========== STDOUT START ==========\n" +
180+
stdout.getvalue().rstrip("\n") + "\n" +
181+
"========== STDOUT END ==========\n"
182+
)
183+
184+
# Copying is unfortunately necessary because Bazel doesn't know to
185+
# implicily bring along what the symlinks point to.
186+
shutil.copytree(worker_outdir, bazel_outdir, dirs_exist_ok=True)
187+
168188
response = {
169189
"requestId": request.get("requestId", 0),
190+
"output": stdout.getvalue(),
170191
"exitCode": 0,
171192
}
172193
return response
173194

174-
def _send_response(self, response: "WorkResponse") -> None:
175-
self._outstream.write(json.dumps(response) + "\n")
176-
self._outstream.flush()
195+
196+
197+
# todo: make this parallel-safe
198+
class BazelWorkerExtension:
199+
def __init__(self):
200+
self.__name__ = _WORKER_SPHINX_EXT_MODULE_NAME
201+
# set[str] of src-dir relative path names
202+
self.changed_paths = set()
203+
204+
def setup(self, app):
205+
app.connect('env-get-outdated', self._handle_env_get_outdated)
206+
return {
207+
"parallel_read_safe": True,
208+
"parallel_write_safe": True
209+
}
210+
211+
def _handle_env_get_outdated(self, app, env, added, changed, removed):
212+
changed = {
213+
# NOTE: path2doc returns None if it's not a doc path
214+
env.path2doc(p) for p in self.changed_paths
215+
}
216+
logger.info("changed docs: %s", changed)
217+
return changed
177218

178219

179220
if __name__ == "__main__":
180-
args, unknown = parser.parse_known_args()
181-
if args.persistent_worker:
182-
Worker(sys.stdin, sys.stdout).run()
221+
if '--persistent_worker' in sys.argv:
222+
with Worker(sys.stdin, sys.stdout, os.getcwd()) as worker:
223+
sys.exit(worker.run())
183224
else:
184225
sys.exit(main())

sphinxdocs/tests/sphinx_docs/conf.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,3 @@
1313
myst_enable_extensions = [
1414
"colon_fence",
1515
]
16-
17-
import logging
18-
logger = logging.getLogger('conf')
19-
20-
def on_env_get_outdated(*args, **kwargs):
21-
logger.info("env-get-outdated args: %s", args)
22-
logger.info("env-get-outdated kwargs: %s", kwargs)
23-
return []
24-
25-
26-
def setup(app):
27-
28-
app.connect('env-get-outdated', on_env_get_outdated)

sphinxdocs/tests/sphinx_docs/doc2.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@
22

33

44
hello doc 3
5-
x

0 commit comments

Comments
 (0)