11from pathlib import Path
22
3+ import shutil
4+ import contextlib
35import argparse
46import json
57import logging
911import time
1012import traceback
1113import typing
14+ import io
15+ import sphinx .application
1216
1317from sphinx .cmd .build import main
1418
1519
1620WorkRequest = object
1721WorkResponse = object
1822
19-
20- parser = argparse .ArgumentParser (
21- fromfile_prefix_chars = '@'
22- )
23- # parser.add_argument('srcdir')
24- # parser.add_argument('outdir')
25- parser .add_argument ("--persistent_worker" , action = "store_true" )
26- ##parser.add_argument("--doctree-dir")
27-
2823logger = logging .getLogger ('sphinxdocs-build' )
2924
25+ _WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext"
26+
3027class Worker :
3128
32- def __init__ (self , instream : "typing.TextIO" , outstream : "typing.TextIO" ):
29+ def __init__ (self , instream : "typing.TextIO" , outstream : "typing.TextIO" , exec_root : str ):
30+ # NOTE: Sphinx performs its own logging re-configuration, so any
31+ # logging config we do isn't respected by Sphinx. Controlling where
32+ # stdout and stderr goes are the main mechanisms. Recall that
33+ # Bazel send worker stderr to the worker log file.
34+ # outputBase=$(bazel info output_base)
35+ # find $outputBase/bazel-workers/ -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
36+ logging .basicConfig (level = logging .DEBUG )
37+ logger .info ("initializing worker" )
38+
39+ # The directory that paths are relative to.
40+ self ._exec_root = exec_root
41+ # Where requests are read from.
3342 self ._instream = instream
43+ # Where responses are written to.
3444 self ._outstream = outstream
35- # Annoying. Sphinx resets its loging config as part of main()
36- # and the Sphinx() app setup/invocation. So any logging we try
37- # to setup here to get info out of sphinx is meaningless.
38- # -v -v -v will output more logging, but to stderr/stdout, and thus
39- # bazel's worker log file, due to sphinx's logging re-configuration.
40- # one-liner to get most recent worker log:
41- # find $workerLogDir -type f -printf '%T@ %p\n' | sort -n | tail -1 | awk '{print $2}'
42- logging .basicConfig (
43- ##filename='/tmp/sphinx-builder.log', encoding='utf-8',
44- level = logging .DEBUG
45- )
46- logger .info ("starting worker" )
47- self ._current = {}
48- self ._previous = {}
49- self ._cache = {}
45+
46+ # dict[str srcdir, dict[str path, str digest]]
47+ self ._digests = {}
48+
49+ # Internal output directories the worker gives to Sphinx that need
50+ # to be cleaned up upon exit.
51+ # set[str path]
52+ self ._worker_outdirs = set ()
53+ self ._extension = BazelWorkerExtension ()
54+
55+ sys .modules [_WORKER_SPHINX_EXT_MODULE_NAME ] = self ._extension
56+ sphinx .application .builtin_extensions += (_WORKER_SPHINX_EXT_MODULE_NAME ,)
57+
58+ def __enter__ (self ):
59+ return self
60+
61+ def __exit__ (self ):
62+ for worker_outdir in self ._worker_outdirs :
63+ shutil .rmtree (worker_outdir , ignore_errors = True )
5064
5165 def run (self ) -> None :
66+ logger .info ("Worker started" )
5267 try :
5368 while True :
5469 request = None
@@ -58,7 +73,6 @@ def run(self) -> None:
5873 logger .info ("Empty request: exiting" )
5974 break
6075 response = self ._process_request (request )
61- logger .info ("response:%s" , response )
6276 if response :
6377 self ._send_response (response )
6478 except Exception :
@@ -84,101 +98,128 @@ def _get_next_request(self) -> "object | None":
8498 return None
8599 return json .loads (line )
86100
87- @property
88- def inputs (self ):
89- self ._previous
90- self ._current
91- return self ._value
92-
93- def _update_digest (self , request ):
94- args , unknown = parser .parse_known_args (request ["arguments" ])
95- # Make room for the new build's data.
96- self ._previous = self ._current
97- # Rearrange the new data into a dict to make comparisons easier.
98- self ._current = {}
99- for page in request ["inputs" ]:
100- path = page ["path" ]
101- self ._current [path ] = page ["digest" ]
102- logger .info ("path mtime: %s" , pathlib .Path (path ).stat ().st_mtime )
103- # Compare the content hashes to determine what pages have changed.
101+ def _send_response (self , response : "WorkResponse" ) -> None :
102+ self ._outstream .write (json .dumps (response ) + "\n " )
103+ self ._outstream .flush ()
104+
105+ def _prepare_sphinx (self , request ):
106+ sphinx_args = request ["arguments" ]
107+ srcdir = sphinx_args [0 ]
108+
109+ incoming_digests = {}
110+ current_digests = self ._digests .setdefault (srcdir , {})
104111 changed_paths = []
105- for path in self ._current :
106- if path not in self ._previous :
107- changed_paths .append (path )
108- continue
109- if self ._current [path ] != self ._previous [path ]:
112+ request_info = {
113+ "exec_root" : self ._exec_root ,
114+ "inputs" : request ["inputs" ]
115+ }
116+ for entry in request ["inputs" ]:
117+ path = entry ["path" ]
118+ digest = entry ["digest" ]
119+
120+ ##mtime = pathlib.Path(path).stat().st_mtime
121+ ##logger.info("incoming path %s mtime: %s", path, mtime)
122+ ### Sphinx appears to treat 0 mtime as always changed
123+ ##os.utime(path, (100, 100))
124+
125+ # Make the path srcdir-relative so Sphinx understands it.
126+ path = path .removeprefix (srcdir + "/" )
127+ incoming_digests [path ] = digest
128+
129+ if path not in current_digests :
130+ logger .info ("path %s new" , path )
110131 changed_paths .append (path )
111- continue
112- for path in self ._previous :
113- if path not in self ._current :
132+ elif current_digests [path ] != digest :
133+ logger .info ("path %s changed" , path )
114134 changed_paths .append (path )
115- continue
116- # Normalize the paths into docnames
117- digest = []
118- for path in changed_paths :
119- logger .info ("Changed: %s" , path )
120- if not path .endswith (".rst" ):
121- continue
122- srcdir = self .args [0 ]
123- docname = path .replace (srcdir + "/" , "" )
124- docname = docname .replace (".rst" , "" )
125- digest .append (docname )
126- args , unknown = parser .parse_known_args (self .args )
127- ### Save the digest.
128- ##doctree_dir = Path(args.doctree_dir)
129- ### On a fresh build, _restore_cache() does nothing, so this dir won't exist yet.
130- ##if not doctree_dir.is_dir():
131- ## doctree_dir.mkdir(parents=True)
132- ##with open(doctree_dir / Path("digest.json"), "w") as f:
133- ## json.dump(digest, f, indent=2)
134-
135- def _restore_cache (self ):
136- for filepath in self ._cache :
137- data = self ._cache [filepath ]
138- parent = Path (os .path .dirname (filepath ))
139- if not parent .is_dir ():
140- parent .mkdir (parents = True )
141- with open (filepath , "wb" ) as f :
142- f .write (data )
143-
144- def _update_cache (self ):
145- args , unknown = parser .parse_known_args (self .args )
146- self ._cache = {}
147- for root , _ , files in os .walk (args .doctree_dir ):
148- for filename in files :
149- filepath = Path (root ) / Path (filename )
150- with open (filepath , "rb" ) as f :
151- self ._cache [str (filepath )] = f .read ()
152135
153- def _process_request (self , request : "WorkRequest" ) -> "WorkResponse | None" :
154- logger .info ("request:%s" , json .dumps (request , sort_keys = True , indent = 2 ))
155- if request .get ("cancel" ):
156- return None
157- self .args = request ["arguments" ]
158- ##self._restore_cache()
159- ##self._update_digest(request)
160- logger .info ("main: %s" , self .args )
136+ self ._digests [srcdir ] = incoming_digests
137+ self ._extension .changed_paths = changed_paths
138+ request_info ["changed_sources" ] = changed_paths
139+
140+ bazel_outdir = sphinx_args [1 ]
141+ worker_outdir = bazel_outdir + ".worker-out.d"
142+ self ._worker_outdirs .add (worker_outdir )
143+ sphinx_args [1 ] = worker_outdir
144+
145+ request_info_path = os .path .join (srcdir , "_bazel_worker_request_info.json" )
146+ with open (request_info_path , "w" ) as fp :
147+ json .dump (request_info , fp )
148+ sphinx_args .append (f"--define=bazel_worker_request_info={ request_info_path } " )
149+
150+ return worker_outdir , bazel_outdir , sphinx_args
151+
152+ @contextlib .contextmanager
153+ def _redirect_streams (self ):
154+ out = io .StringIO ()
161155 orig_stdout = sys .stdout
162- sys .stdout = sys .stderr
163156 try :
164- main (self .args )
157+ sys .stdout = out
158+ yield out
165159 finally :
166160 sys .stdout = orig_stdout
167- ##self._update_cache()
161+
162+ def _process_request (self , request : "WorkRequest" ) -> "WorkResponse | None" :
163+ logger .info ("Request: %s" , json .dumps (request , sort_keys = True , indent = 2 ))
164+ if request .get ("cancel" ):
165+ return None
166+
167+ worker_outdir , bazel_outdir , sphinx_args = self ._prepare_sphinx (request )
168+
169+ # Prevent anything from going to stdout because it breaks the worker
170+ # protocol. We have limited control over where Sphinx sends output.
171+ with self ._redirect_streams () as stdout :
172+ logger .info ("main args: %s" , sphinx_args )
173+ exit_code = main (sphinx_args )
174+
175+ if exit_code :
176+ raise Exception (
177+ "Sphinx main() returned failure: " +
178+ f" exit code: { exit_code } \n " +
179+ "========== STDOUT START ==========\n " +
180+ stdout .getvalue ().rstrip ("\n " ) + "\n " +
181+ "========== STDOUT END ==========\n "
182+ )
183+
184+ # Copying is unfortunately necessary because Bazel doesn't know to
185+ # implicily bring along what the symlinks point to.
186+ shutil .copytree (worker_outdir , bazel_outdir , dirs_exist_ok = True )
187+
168188 response = {
169189 "requestId" : request .get ("requestId" , 0 ),
190+ "output" : stdout .getvalue (),
170191 "exitCode" : 0 ,
171192 }
172193 return response
173194
174- def _send_response (self , response : "WorkResponse" ) -> None :
175- self ._outstream .write (json .dumps (response ) + "\n " )
176- self ._outstream .flush ()
195+
196+
197+ # todo: make this parallel-safe
198+ class BazelWorkerExtension :
199+ def __init__ (self ):
200+ self .__name__ = _WORKER_SPHINX_EXT_MODULE_NAME
201+ # set[str] of src-dir relative path names
202+ self .changed_paths = set ()
203+
204+ def setup (self , app ):
205+ app .connect ('env-get-outdated' , self ._handle_env_get_outdated )
206+ return {
207+ "parallel_read_safe" : True ,
208+ "parallel_write_safe" : True
209+ }
210+
211+ def _handle_env_get_outdated (self , app , env , added , changed , removed ):
212+ changed = {
213+ # NOTE: path2doc returns None if it's not a doc path
214+ env .path2doc (p ) for p in self .changed_paths
215+ }
216+ logger .info ("changed docs: %s" , changed )
217+ return changed
177218
178219
179220if __name__ == "__main__" :
180- args , unknown = parser . parse_known_args ()
181- if args . persistent_worker :
182- Worker ( sys . stdin , sys .stdout ) .run ()
221+ if '--persistent_worker' in sys . argv :
222+ with Worker ( sys . stdin , sys . stdout , os . getcwd ()) as worker :
223+ sys .exit ( worker .run () )
183224 else :
184225 sys .exit (main ())
0 commit comments