Skip to content

Commit 5cbb5b1

Browse files
authored
fix(sphinxdocs): add retry logic when exit code 2 occurs (#3241)
Running Sphinx multiple times in the same process sometimes results in an error ("exit code 2"). Digging in, this is likely a bug in the sphinx_bzl plugin in how it merges data when parallel or incremental builds are performed. Until that's fixed, work around the problem by internally retrying the Sphinx build when exit code 2 occurs. This is basically what we're doing today and should reduce the number of flakes for the RTD builds. Along the way, improve the error reporting to make it easier to diagnose the underlying failure.
1 parent 277089e commit 5cbb5b1

File tree

1 file changed

+60
-14
lines changed

1 file changed

+60
-14
lines changed

sphinxdocs/private/sphinx_build.py

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@
1414
WorkRequest = object
1515
WorkResponse = object
1616

17+
18+
class SphinxMainError(Exception):
19+
def __init__(self, message, exit_code):
20+
super().__init__(message)
21+
self.exit_code = exit_code
22+
23+
1724
logger = logging.getLogger("sphinxdocs_build")
1825

1926
_WORKER_SPHINX_EXT_MODULE_NAME = "bazel_worker_sphinx_ext"
@@ -58,7 +65,7 @@ def __init__(
5865
def __enter__(self):
5966
return self
6067

61-
def __exit__(self):
68+
def __exit__(self, exc_type, exc_val, exc_tb):
6269
for worker_outdir in self._worker_outdirs:
6370
shutil.rmtree(worker_outdir, ignore_errors=True)
6471

@@ -75,6 +82,17 @@ def run(self) -> None:
7582
response = self._process_request(request)
7683
if response:
7784
self._send_response(response)
85+
except SphinxMainError as e:
86+
logger.error("Sphinx main returned failure: exit_code=%s request=%s",
87+
request, e.exit_code)
88+
request_id = 0 if not request else request.get("requestId", 0)
89+
self._send_response(
90+
{
91+
"exitCode": e.exit_code,
92+
"output": str(e),
93+
"requestId": request_id,
94+
}
95+
)
7896
except Exception:
7997
logger.exception("Unhandled error: request=%s", request)
8098
output = (
@@ -142,13 +160,10 @@ def _prepare_sphinx(self, request):
142160

143161
@contextlib.contextmanager
144162
def _redirect_streams(self):
145-
out = io.StringIO()
146-
orig_stdout = sys.stdout
147-
try:
148-
sys.stdout = out
149-
yield out
150-
finally:
151-
sys.stdout = orig_stdout
163+
stdout = io.StringIO()
164+
stderr = io.StringIO()
165+
with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
166+
yield stdout, stderr
152167

153168
def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
154169
logger.info("Request: %s", json.dumps(request, sort_keys=True, indent=2))
@@ -159,19 +174,50 @@ def _process_request(self, request: "WorkRequest") -> "WorkResponse | None":
159174

160175
# Prevent anything from going to stdout because it breaks the worker
161176
# protocol. We have limited control over where Sphinx sends output.
162-
with self._redirect_streams() as stdout:
177+
with self._redirect_streams() as (stdout, stderr):
163178
logger.info("main args: %s", sphinx_args)
164179
exit_code = main(sphinx_args)
180+
# Running Sphinx multiple times in a process can give spurious
181+
# errors. An invocation after an error seems to work, though.
182+
if exit_code == 2:
183+
logger.warning("Sphinx main() returned exit_code=2, retrying...")
184+
# Reset streams to capture output of the retry cleanly
185+
stdout.seek(0)
186+
stdout.truncate(0)
187+
stderr.seek(0)
188+
stderr.truncate(0)
189+
exit_code = main(sphinx_args)
165190

166191
if exit_code:
167-
raise Exception(
192+
stdout_output = stdout.getvalue().strip()
193+
stderr_output = stderr.getvalue().strip()
194+
if stdout_output:
195+
stdout_output = (
196+
"========== STDOUT START ==========\n"
197+
+ stdout_output
198+
+ "\n"
199+
+ "========== STDOUT END ==========\n"
200+
)
201+
else:
202+
stdout_output = "========== STDOUT EMPTY ==========\n"
203+
if stderr_output:
204+
stderr_output = (
205+
"========== STDERR START ==========\n"
206+
+ stderr_output
207+
+ "\n"
208+
+ "========== STDERR END ==========\n"
209+
)
210+
else:
211+
stderr_output = "========== STDERR EMPTY ==========\n"
212+
213+
message = (
168214
"Sphinx main() returned failure: "
169215
+ f" exit code: {exit_code}\n"
170-
+ "========== STDOUT START ==========\n"
171-
+ stdout.getvalue().rstrip("\n")
172-
+ "\n"
173-
+ "========== STDOUT END ==========\n"
216+
+ stdout_output
217+
+ stderr_output
174218
)
219+
raise SphinxMainError(message, exit_code)
220+
175221

176222
# Copying is unfortunately necessary because Bazel doesn't know to
177223
# implicily bring along what the symlinks point to.

0 commit comments

Comments
 (0)