Skip to content

Commit d4a4f36

Browse files
committed
Support jobserver client mode automatically.
Detect that the environment variable MAKEFLAGS specifies a jobserver pool to use, and automatically use it to control build parallelism when this is the case. NOTE: On Posix, the pipe-based protocol is not supported and will be detected. Ninja will print a warning on startup then ignore the content of MAKEFLAGS (there is a regression test for this). This is disabled is `--dry-run` or an explicit `-j<COUNT>` is passed on the command-line. Note that the `-l` option used to limit dispatch based on the overall load factor will still be in effect if used. + Use default member initialization for BuildConfig struct. + Add a new regression test suite that uses the misc/jobserver_pool.py script that was introduced in a previous commit, to verify that everything works properly.
1 parent ae09557 commit d4a4f36

File tree

12 files changed

+542
-31
lines changed

12 files changed

+542
-31
lines changed

.github/workflows/linux.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
run: |
2828
./ninja_test --gtest_color=yes
2929
../../misc/output_test.py
30+
../../misc/jobserver_test.py
3031
- name: Build release ninja
3132
run: CLICOLOR_FORCE=1 ninja -f build-Release.ninja
3233
working-directory: build
@@ -35,6 +36,7 @@ jobs:
3536
run: |
3637
./ninja_test --gtest_color=yes
3738
../../misc/output_test.py
39+
../../misc/jobserver_test.py
3840
3941
build:
4042
runs-on: [ubuntu-latest]

doc/manual.asciidoc

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,10 +187,46 @@ Ninja defaults to running commands in parallel anyway, so typically
187187
you don't need to pass `-j`.)
188188
189189
190+
GNU Jobserver support
191+
~~~~~~~~~~~~~~~~~~~~~
192+
193+
Since version 1.13., Ninja builds can follow the
194+
https://https://www.gnu.org/software/make/manual/html_node/Job-Slots.html[GNU Make jobserver]
195+
client protocol. This is useful when Ninja is invoked as part of a larger
196+
build system controlled by a top-level GNU Make instance, or any other
197+
jobserver pool implementation, as it allows better coordination between
198+
concurrent build tasks.
199+
200+
This feature is automatically enabled under the following conditions:
201+
202+
- Dry-run (i.e. `-n` or `--dry-run`) is not enabled.
203+
204+
- No explicit job count (e.g. `-j<COUNT>`) is passed on the command
205+
line.
206+
207+
- The `MAKEFLAGS` environment variable is defined and describes a valid
208+
jobserver mode using `--jobserver-auth=SEMAPHORE_NAME` on Windows, or
209+
`--jobserver-auth=fifo:PATH` on Posix.
210+
211+
In this case, Ninja will use the jobserver pool of job slots to control
212+
parallelism, instead of its default parallel implementation.
213+
214+
Note that load-average limitations (i.e. when using `-l<count>`)
215+
are still being enforced in this mode.
216+
217+
On Posix, Ninja supports both the `pipe` and `fifo` client modes, based on
218+
the content of `MAKEFLAGS`.
219+
220+
IMPORTANT: On Posix, only the FIFO-based version of the protocol, which is
221+
implemented by GNU Make 4.4 and higher, is supported. Ninja will detect
222+
when a pipe-based jobserver is being used (i.e. when `MAKEFLAGS` contains
223+
`--jobserver-auth=<read>,<write>`) and will print a warning, but will
224+
otherwise ignore it.
225+
190226
Environment variables
191227
~~~~~~~~~~~~~~~~~~~~~
192228
193-
Ninja supports one environment variable to control its behavior:
229+
Ninja supports two environment variables to control its behavior:
194230
`NINJA_STATUS`, the progress status printed before the rule being run.
195231
196232
Several placeholders are available:
@@ -215,6 +251,10 @@ The default progress status is `"[%f/%t] "` (note the trailing space
215251
to separate from the build rule). Another example of possible progress status
216252
could be `"[%u/%r/%f] "`.
217253
254+
If `MAKEFLAGS` is defined in the environment, if may alter how
255+
Ninja dispatches parallel build commands. See the GNU Jobserver support
256+
section for details.
257+
218258
Extra tools
219259
~~~~~~~~~~~
220260

misc/jobserver_test.py

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2024 Google Inc. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from textwrap import dedent
17+
import os
18+
import platform
19+
import subprocess
20+
import tempfile
21+
import typing as T
22+
import shlex
23+
import sys
24+
import unittest
25+
26+
_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
27+
_JOBSERVER_POOL_SCRIPT = os.path.join(_SCRIPT_DIR, "jobserver_pool.py")
28+
_JOBSERVER_TEST_HELPER_SCRIPT = os.path.join(_SCRIPT_DIR, "jobserver_test_helper.py")
29+
30+
_PLATFORM_IS_WINDOWS = platform.system() == "Windows"
31+
32+
# Set this to True to debug command invocations.
33+
_DEBUG = False
34+
35+
default_env = dict(os.environ)
36+
default_env.pop("NINJA_STATUS", None)
37+
default_env.pop("MAKEFLAGS", None)
38+
default_env["TERM"] = "dumb"
39+
NINJA_PATH = os.path.abspath("./ninja")
40+
41+
42+
class BuildDir:
43+
def __init__(self, build_ninja: str):
44+
self.build_ninja = dedent(build_ninja)
45+
self.d: T.Optional[tempfile.TemporaryDirectory] = None
46+
47+
def __enter__(self):
48+
self.d = tempfile.TemporaryDirectory()
49+
with open(os.path.join(self.d.name, "build.ninja"), "w") as f:
50+
f.write(self.build_ninja)
51+
return self
52+
53+
def __exit__(self, exc_type, exc_val, exc_tb):
54+
self.d.cleanup()
55+
56+
@property
57+
def path(self) -> str:
58+
assert self.d
59+
return self.d.name
60+
61+
def run(
62+
self,
63+
cmd_flags: T.Sequence[str] = [],
64+
env: T.Dict[str, str] = default_env,
65+
) -> None:
66+
"""Run a command, raise exception on error. Do not capture outputs."""
67+
ret = subprocess.run(cmd_flags, env=env)
68+
ret.check_returncode()
69+
70+
def ninja_run(
71+
self,
72+
ninja_args: T.List[str],
73+
prefix_args: T.List[str] = [],
74+
extra_env: T.Dict[str, str] = {},
75+
) -> "subprocess.CompletedProcess[str]":
76+
ret = self.ninja_spawn(
77+
ninja_args,
78+
prefix_args=prefix_args,
79+
extra_env=extra_env,
80+
capture_output=False,
81+
)
82+
ret.check_returncode()
83+
return ret
84+
85+
def ninja_clean(self) -> None:
86+
self.ninja_run(["-t", "clean"])
87+
88+
def ninja_spawn(
89+
self,
90+
ninja_args: T.List[str],
91+
prefix_args: T.List[str] = [],
92+
extra_env: T.Dict[str, str] = {},
93+
capture_output: bool = True,
94+
) -> "subprocess.CompletedProcess[str]":
95+
"""Run Ninja command and capture outputs."""
96+
cmd_args = prefix_args + [NINJA_PATH, "-C", self.path] + ninja_args
97+
if _DEBUG:
98+
cmd_str = " ".join(shlex.quote(c) for c in cmd_args)
99+
print(f"CMD [{cmd_str}]", file=sys.stderr)
100+
return subprocess.run(
101+
cmd_args,
102+
text=True,
103+
stdout=subprocess.PIPE if capture_output else None,
104+
stderr=subprocess.PIPE if capture_output else None,
105+
env={**default_env, **extra_env},
106+
)
107+
108+
109+
def span_output_file(span_n: int) -> str:
110+
return "out%02d" % span_n
111+
112+
113+
def generate_build_plan(command_count: int) -> str:
114+
"""Generate a Ninja build plan for |command_count| parallel tasks.
115+
116+
Each task calls the test helper script which waits for 50ms
117+
then writes its own start and end time to its output file.
118+
"""
119+
result = f"""
120+
rule span
121+
command = {sys.executable} -S {_JOBSERVER_TEST_HELPER_SCRIPT} --duration-ms=50 $out
122+
123+
"""
124+
125+
for n in range(command_count):
126+
result += "build %s: span\n" % span_output_file(n)
127+
128+
result += "build all: phony %s\n" % " ".join(
129+
[span_output_file(n) for n in range(command_count)]
130+
)
131+
return result
132+
133+
134+
def compute_max_overlapped_spans(build_dir: str, command_count: int) -> int:
135+
"""Compute the maximum number of overlapped spanned tasks.
136+
137+
This reads the output files from |build_dir| and look at their start and end times
138+
to compute the maximum number of tasks that were run in parallel.
139+
"""
140+
# Read the output files.
141+
if command_count < 2:
142+
return 0
143+
144+
spans: T.List[T.Tuple[int, int]] = []
145+
for n in range(command_count):
146+
with open(os.path.join(build_dir, span_output_file(n)), "rb") as f:
147+
content = f.read().decode("utf-8")
148+
lines = content.splitlines()
149+
assert len(lines) == 2, f"Unexpected output file content: [{content}]"
150+
spans.append((int(lines[0]), int(lines[1])))
151+
152+
# Stupid but simple, for each span, count the number of other spans that overlap it.
153+
max_overlaps = 1
154+
for n in range(command_count):
155+
cur_start, cur_end = spans[n]
156+
cur_overlaps = 1
157+
for m in range(command_count):
158+
other_start, other_end = spans[m]
159+
if n != m and other_end > cur_start and other_start < cur_end:
160+
cur_overlaps += 1
161+
162+
if cur_overlaps > max_overlaps:
163+
max_overlaps = cur_overlaps
164+
165+
return max_overlaps
166+
167+
168+
class JobserverTest(unittest.TestCase):
169+
170+
def test_no_jobserver_client(self):
171+
task_count = 4
172+
build_plan = generate_build_plan(task_count)
173+
with BuildDir(build_plan) as b:
174+
output = b.run([NINJA_PATH, "-C", b.path, f"-j{task_count}", "all"])
175+
176+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
177+
self.assertEqual(max_overlaps, task_count)
178+
179+
b.ninja_clean()
180+
output = b.run([NINJA_PATH, "-C", b.path, "-j1", "all"])
181+
182+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
183+
self.assertEqual(max_overlaps, 1)
184+
185+
def _run_client_test(self, jobserver_args: T.List[str]) -> None:
186+
task_count = 4
187+
build_plan = generate_build_plan(task_count)
188+
with BuildDir(build_plan) as b:
189+
# First, run the full tasks with with {task_count} tokens, this should allow all
190+
# tasks to run in parallel.
191+
ret = b.ninja_run(
192+
ninja_args=["all"],
193+
prefix_args=jobserver_args + [f"--jobs={task_count}"],
194+
)
195+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
196+
self.assertEqual(max_overlaps, task_count)
197+
198+
# Second, use 2 tokens only, and verify that this was enforced by Ninja.
199+
b.ninja_clean()
200+
b.ninja_run(
201+
["all"],
202+
prefix_args=jobserver_args + ["--jobs=2"],
203+
)
204+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
205+
self.assertEqual(max_overlaps, 2)
206+
207+
# Third, verify that --jobs=1 serializes all tasks.
208+
b.ninja_clean()
209+
b.ninja_run(
210+
["all"],
211+
prefix_args=jobserver_args + ["--jobs=1"],
212+
)
213+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
214+
self.assertEqual(max_overlaps, 1)
215+
216+
# Finally, verify that -j1 overrides the pool.
217+
b.ninja_clean()
218+
b.ninja_run(
219+
["-j1", "all"],
220+
prefix_args=jobserver_args + [f"--jobs={task_count}"],
221+
)
222+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
223+
self.assertEqual(max_overlaps, 1)
224+
225+
# On Linux, use taskset to limit the number of available cores to 1
226+
# and verify that the jobserver overrides the default Ninja parallelism
227+
# and that {task_count} tasks are still spawned in parallel.
228+
if platform.system() == "Linux":
229+
# First, run without a jobserver, with a single CPU, Ninja will
230+
# use a parallelism of 2 in this case (GuessParallelism() in ninja.cc)
231+
b.ninja_clean()
232+
b.ninja_run(
233+
["all"],
234+
prefix_args=["taskset", "-c", "0"],
235+
)
236+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
237+
self.assertEqual(max_overlaps, 2)
238+
239+
# Now with a jobserver with {task_count} tasks.
240+
b.ninja_clean()
241+
b.ninja_run(
242+
["all"],
243+
prefix_args=jobserver_args
244+
+ [f"--jobs={task_count}"]
245+
+ ["taskset", "-c", "0"],
246+
)
247+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
248+
self.assertEqual(max_overlaps, task_count)
249+
250+
@unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows")
251+
def test_jobserver_client_with_posix_fifo(self):
252+
self._run_client_test([sys.executable, "-S", _JOBSERVER_POOL_SCRIPT])
253+
254+
@unittest.skipIf(_PLATFORM_IS_WINDOWS, "These test methods do not work on Windows")
255+
def test_jobserver_client_with_posix_pipe(self):
256+
# Verify that setting up a --pipe server does not make Ninja exit with an error.
257+
# Instead, a warning is printed.
258+
task_count = 4
259+
build_plan = generate_build_plan(task_count)
260+
with BuildDir(build_plan) as b:
261+
262+
prefix_args = [
263+
sys.executable,
264+
"-S",
265+
_JOBSERVER_POOL_SCRIPT,
266+
"--pipe",
267+
f"--jobs={task_count}",
268+
]
269+
270+
def run_ninja_with_jobserver_pipe(args):
271+
ret = b.ninja_spawn(args, prefix_args=prefix_args)
272+
ret.check_returncode()
273+
return ret.stdout, ret.stderr
274+
275+
output, error = run_ninja_with_jobserver_pipe(["all"])
276+
if _DEBUG:
277+
print(f"OUTPUT [{output}]\nERROR [{error}]\n", file=sys.stderr)
278+
self.assertTrue(error.find("Pipe-based protocol is not supported!") >= 0)
279+
280+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
281+
self.assertEqual(max_overlaps, task_count)
282+
283+
# Using an explicit -j<N> ignores the jobserver pool.
284+
b.ninja_clean()
285+
output, error = run_ninja_with_jobserver_pipe(["-j1", "all"])
286+
if _DEBUG:
287+
print(f"OUTPUT [{output}]\nERROR [{error}]\n", file=sys.stderr)
288+
self.assertFalse(error.find("Pipe-based protocol is not supported!") >= 0)
289+
290+
max_overlaps = compute_max_overlapped_spans(b.path, task_count)
291+
self.assertEqual(max_overlaps, 1)
292+
293+
def _test_MAKEFLAGS_value(
294+
self, ninja_args: T.List[str] = [], prefix_args: T.List[str] = []
295+
):
296+
build_plan = r"""
297+
rule print
298+
command = echo MAKEFLAGS="[$$MAKEFLAGS]"
299+
300+
build all: print
301+
"""
302+
with BuildDir(build_plan) as b:
303+
ret = b.ninja_spawn(
304+
ninja_args + ["--quiet", "all"], prefix_args=prefix_args
305+
)
306+
self.assertEqual(ret.returncode, 0)
307+
output = ret.stdout.strip()
308+
pos = output.find("MAKEFLAGS=[")
309+
self.assertNotEqual(pos, -1, "Could not find MAKEFLAGS in output!")
310+
makeflags, sep, _ = output[pos + len("MAKEFLAGS=[") :].partition("]")
311+
self.assertEqual(sep, "]", "Missing ] in output!: " + output)
312+
self.assertTrue(
313+
"--jobserver-auth=" in makeflags,
314+
f"Missing --jobserver-auth from MAKEFLAGS [{makeflags}]\nSTDOUT [{ret.stdout}]\nSTDERR [{ret.stderr}]",
315+
)
316+
317+
def test_client_passes_MAKEFLAGS(self):
318+
self._test_MAKEFLAGS_value(
319+
prefix_args=[sys.executable, "-S", _JOBSERVER_POOL_SCRIPT]
320+
)
321+
322+
323+
if __name__ == "__main__":
324+
unittest.main()

0 commit comments

Comments
 (0)