Skip to content

Commit a1102a0

Browse files
authored
Merge pull request #392 from tiran/build-settings-jobs
Redesign parallel build configuration
2 parents fe980c2 + d488143 commit a1102a0

File tree

9 files changed

+159
-18
lines changed

9 files changed

+159
-18
lines changed

requirements-mypy.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
mypy
22
pytest
33
types-html5lib
4+
types-psutil
45
types-PyYAML
56
types-requests
67
types-toml

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ elfdeps>=0.2.0
33
html5lib
44
packaging
55
pkginfo
6+
psutil
67
pydantic
78
PyGithub
89
pyproject_hooks>=1.0.0,!=1.1.0

src/fromager/__main__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python3
22

33
import logging
4-
import os
54
import pathlib
65

76
import click
@@ -119,7 +118,8 @@
119118
"-j",
120119
"--jobs",
121120
type=int,
122-
help="number of jobs available to run in parallel",
121+
default=None,
122+
help="maximum number of jobs to run in parallel",
123123
)
124124
@click.option(
125125
"--network-isolation/--no-network-isolation",
@@ -143,7 +143,7 @@ def main(
143143
wheel_server_url: str,
144144
cleanup: bool,
145145
variant: str,
146-
jobs: int,
146+
jobs: int | None,
147147
network_isolation: bool,
148148
) -> None:
149149
# Set the overall logger level to debug and allow the handlers to filter
@@ -190,6 +190,7 @@ def main(
190190
settings_dir=settings_dir,
191191
patches_dir=patches_dir,
192192
variant=variant,
193+
max_jobs=jobs,
193194
),
194195
constraints_file=constraints_file,
195196
patches_dir=patches_dir,
@@ -199,8 +200,8 @@ def main(
199200
wheel_server_url=wheel_server_url,
200201
cleanup=cleanup,
201202
variant=variant,
202-
jobs=jobs if jobs is None or jobs > 0 else os.cpu_count(),
203203
network_isolation=network_isolation,
204+
max_jobs=jobs,
204205
)
205206
wkctx.setup()
206207
ctx.obj = wkctx

src/fromager/context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,16 @@ def __init__(
3636
wheel_server_url: str,
3737
cleanup: bool = True,
3838
variant: str = "cpu",
39-
jobs: int | None = None,
4039
network_isolation: bool = False,
40+
max_jobs: int | None = None,
4141
):
4242
if active_settings is None:
4343
active_settings = packagesettings.Settings(
4444
settings=packagesettings.SettingsFile(),
4545
package_settings=[],
4646
patches_dir=patches_dir,
4747
variant=variant,
48+
max_jobs=max_jobs,
4849
)
4950
self.settings = active_settings
5051
self.input_constraints_file = constraints_file
@@ -64,7 +65,6 @@ def __init__(
6465
self.wheel_server_url = wheel_server_url
6566
self.cleanup = cleanup
6667
self.variant = variant
67-
self.jobs = jobs
6868
self.network_isolation = network_isolation
6969

7070
self._build_order_filename = self.work_dir / "build-order.json"

src/fromager/packagesettings.py

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import typing
77
from collections.abc import Mapping
88

9+
import psutil
910
import pydantic
1011
import yaml
1112
from packaging.utils import BuildTag, NormalizedName, canonicalize_name
@@ -142,6 +143,27 @@ def validate_destination_filename(cls, v):
142143
return v
143144

144145

146+
class BuildOptions(pydantic.BaseModel):
147+
"""Build system options"""
148+
149+
model_config = MODEL_CONFIG
150+
151+
cpu_cores_per_job: int = Field(default=1, ge=1)
152+
"""Scale parallel jobs by available CPU cores
153+
154+
Examples:
155+
1: as many parallel jobs as CPU logical cores
156+
2: allocate 2 cores per job
157+
"""
158+
159+
memory_per_job_gb: float = Field(default=1.0, ge=0.1)
160+
"""Scale parallel jobs by available virtual memory (without swap)
161+
162+
Examples:
163+
0.5: assume each parallel job requires 512 MB virtual memory
164+
"""
165+
166+
145167
class VariantInfo(pydantic.BaseModel):
146168
"""Variant information for a package"""
147169

@@ -209,6 +231,9 @@ class PackageSettings(pydantic.BaseModel):
209231
resolve_source: ResolveSource = Field(default_factory=ResolveSource)
210232
"""Resolve distribution version"""
211233

234+
build_options: BuildOptions = Field(default_factory=BuildOptions)
235+
"""Build system options"""
236+
212237
variants: Mapping[Variant, VariantInfo] = Field(default_factory=dict)
213238
"""Variant configuration"""
214239

@@ -320,16 +345,30 @@ def _resolve_template(
320345
raise
321346

322347

348+
def get_cpu_count() -> int:
349+
"""CPU count from scheduler affinity"""
350+
if hasattr(os, "sched_getaffinity"):
351+
return len(os.sched_getaffinity(0))
352+
else:
353+
return os.cpu_count() or 1
354+
355+
356+
def get_available_memory_gib() -> float:
357+
"""available virtual memory in GiB"""
358+
return psutil.virtual_memory().available / (1024**3)
359+
360+
323361
class PackageBuildInfo:
324362
"""Package build information
325363
326364
Public API for PackageSettings with i
327365
"""
328366

329-
def __init__(self, ctx: "Settings", ps: PackageSettings) -> None:
330-
self._variant = typing.cast(Variant, ctx.variant)
331-
self._patches_dir = ctx.patches_dir
332-
self._variant_changelog = ctx.variant_changelog()
367+
def __init__(self, settings: "Settings", ps: PackageSettings) -> None:
368+
self._variant = typing.cast(Variant, settings.variant)
369+
self._patches_dir = settings.patches_dir
370+
self._variant_changelog = settings.variant_changelog()
371+
self._max_jobs: int | None = settings.max_jobs
333372
self._ps = ps
334373
self._plugin_module: types.ModuleType | None | typing.Literal[False] = False
335374
self._patches: PatchMap | None = None
@@ -497,6 +536,35 @@ def get_extra_environ(
497536

498537
return extra_environ
499538

539+
def parallel_jobs(self) -> int:
540+
"""How many parallel jobs?"""
541+
# adjust by CPU cores, at least 1
542+
cpu_cores_per_job = self._ps.build_options.cpu_cores_per_job
543+
cpu_count = get_cpu_count()
544+
max_num_job_cores = int(max(1, cpu_count // cpu_cores_per_job))
545+
logger.debug(
546+
f"{self.package}: {max_num_job_cores=}, {cpu_cores_per_job=}, {cpu_count=}"
547+
)
548+
549+
# adjust by memory consumption per job, at least 1
550+
memory_per_job_gb = self._ps.build_options.memory_per_job_gb
551+
free_memory = get_available_memory_gib()
552+
max_num_jobs_memory = int(max(1.0, free_memory // memory_per_job_gb))
553+
logger.debug(
554+
f"{self.package}: {max_num_jobs_memory=}, {memory_per_job_gb=}, {free_memory=:0.1f} GiB"
555+
)
556+
557+
# limit by smallest amount of CPU, memory, and --jobs parameter
558+
max_jobs = cpu_count if self._max_jobs is None else self._max_jobs
559+
parallel_builds = min(max_num_job_cores, max_num_jobs_memory, max_jobs)
560+
561+
logger.debug(
562+
f"{self.package}: parallel builds {parallel_builds=} "
563+
f"({free_memory=:0.1f} GiB, {cpu_count=}, {max_jobs=})"
564+
)
565+
566+
return parallel_builds
567+
500568
def serialize(self, **kwargs) -> dict[str, typing.Any]:
501569
return self._ps.serialize(**kwargs)
502570

@@ -556,13 +624,15 @@ def __init__(
556624
package_settings: typing.Iterable[PackageSettings],
557625
variant: Variant | str,
558626
patches_dir: pathlib.Path,
627+
max_jobs: int | None,
559628
) -> None:
560629
self._settings = settings
561630
self._package_settings: dict[Package, PackageSettings] = {
562631
p.name: p for p in package_settings
563632
}
564633
self._variant = typing.cast(Variant, variant)
565634
self._patches_dir = patches_dir
635+
self._max_jobs = max_jobs
566636
self._pbi_cache: dict[Package, PackageBuildInfo] = {}
567637

568638
@classmethod
@@ -573,6 +643,7 @@ def from_files(
573643
settings_dir: pathlib.Path,
574644
variant: Variant | str,
575645
patches_dir: pathlib.Path,
646+
max_jobs: int | None,
576647
) -> "Settings":
577648
"""Create Settings from settings.yaml and directory"""
578649
if settings_file.is_file():
@@ -591,6 +662,7 @@ def from_files(
591662
package_settings=package_settings,
592663
variant=variant,
593664
patches_dir=patches_dir,
665+
max_jobs=max_jobs,
594666
)
595667

596668
@property
@@ -612,10 +684,21 @@ def patches_dir(self) -> pathlib.Path:
612684

613685
@patches_dir.setter
614686
def patches_dir(self, path: pathlib.Path) -> None:
615-
"""Change patches_dr (for testing)"""
687+
"""Change patches_dir (for testing)"""
616688
self._pbi_cache.clear()
617689
self._patches_dir = path
618690

691+
@property
692+
def max_jobs(self) -> int | None:
693+
"""Get max parallel jobs"""
694+
return self._max_jobs
695+
696+
@max_jobs.setter
697+
def max_jobs(self, jobs: int | None) -> None:
698+
"""Change max jobs (for testing)"""
699+
self._pbi_cache.clear()
700+
self._max_jobs = jobs
701+
619702
def variant_changelog(self) -> list[str]:
620703
"""Get global changelog for current variant"""
621704
return list(self._settings.changelog.get(self.variant, []))

src/fromager/wheels.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,13 @@ def build_wheel(
234234
# TODO: refactor?
235235
# Build Rust without network access
236236
extra_environ["CARGO_NET_OFFLINE"] = "true"
237-
# configure max jobs settings. should cover most of the cases, if not then the user can use ctx.jobs in their plugin
238-
if ctx.jobs:
239-
extra_environ["MAKEFLAGS"] = (
240-
f"{extra_environ.get('MAKEFLAGS', '')} -j{ctx.jobs}"
241-
)
242-
extra_environ["CMAKE_BUILD_PARALLEL_LEVEL"] = f"{ctx.jobs}"
243-
extra_environ["MAX_JOBS"] = f"{ctx.jobs}"
237+
238+
# configure max jobs settings, settings depend on package, available
239+
# CPU cores, and available virtual memory.
240+
jobs = pbi.parallel_jobs()
241+
extra_environ["MAKEFLAGS"] = f"{extra_environ.get('MAKEFLAGS', '')} -j{jobs}"
242+
extra_environ["CMAKE_BUILD_PARALLEL_LEVEL"] = str(jobs)
243+
extra_environ["MAX_JOBS"] = str(jobs)
244244

245245
# Start the timer
246246
start = datetime.now().replace(microsecond=0)

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def testdata_context(
4545
settings_dir=overrides / "settings",
4646
patches_dir=patches_dir,
4747
variant=variant,
48+
max_jobs=None,
4849
),
4950
constraints_file=None,
5051
patches_dir=overrides / "patches",

tests/test_packagesettings.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pathlib
2+
from unittest.mock import Mock, patch
23

34
import pydantic
45
import pytest
@@ -21,6 +22,10 @@
2122

2223
FULL_EXPECTED = {
2324
"build_dir": pathlib.Path("python"),
25+
"build_options": {
26+
"cpu_cores_per_job": 4,
27+
"memory_per_job_gb": 4.0,
28+
},
2429
"changelog": {
2530
Version("1.0.1"): ["fixed bug"],
2631
Version("1.0.2"): ["more bugs", "rebuild"],
@@ -64,6 +69,10 @@
6469
EMPTY_EXPECTED = {
6570
"name": "test-empty-pkg",
6671
"build_dir": None,
72+
"build_options": {
73+
"cpu_cores_per_job": 1,
74+
"memory_per_job_gb": 1.0,
75+
},
6776
"changelog": {},
6877
"env": {},
6978
"download_source": {
@@ -302,3 +311,45 @@ def test_settings_list(testdata_context: context.WorkContext) -> None:
302311
assert testdata_context.settings.variant_changelog() == [
303312
"setuptools upgraded to 82.0.0"
304313
]
314+
315+
316+
@patch("fromager.packagesettings.get_cpu_count", return_value=8)
317+
@patch("fromager.packagesettings.get_available_memory_gib", return_value=7.1)
318+
def test_parallel_jobs(
319+
get_available_memory_gib: Mock,
320+
get_cpu_count: Mock,
321+
testdata_context: context.WorkContext,
322+
) -> None:
323+
assert testdata_context.settings.max_jobs is None
324+
325+
pbi = testdata_context.settings.package_build_info(TEST_EMPTY_PKG)
326+
assert pbi.parallel_jobs() == 7
327+
328+
get_cpu_count.return_value = 4
329+
assert pbi.parallel_jobs() == 4
330+
331+
get_available_memory_gib.return_value = 2.1
332+
assert pbi.parallel_jobs() == 2
333+
334+
get_available_memory_gib.return_value = 1.5
335+
assert pbi.parallel_jobs() == 1
336+
337+
testdata_context.settings.max_jobs = 2
338+
pbi = testdata_context.settings.package_build_info(TEST_EMPTY_PKG)
339+
get_available_memory_gib.return_value = 23
340+
assert pbi.parallel_jobs() == 2
341+
342+
# test-pkg needs more memory
343+
testdata_context.settings.max_jobs = 200
344+
pbi = testdata_context.settings.package_build_info(TEST_PKG)
345+
get_cpu_count.return_value = 16
346+
get_available_memory_gib.return_value = 20
347+
assert pbi.parallel_jobs() == 4
348+
349+
get_cpu_count.return_value = 32
350+
get_available_memory_gib.return_value = 25
351+
assert pbi.parallel_jobs() == 6
352+
353+
testdata_context.settings.max_jobs = 4
354+
pbi = testdata_context.settings.package_build_info(TEST_PKG)
355+
assert pbi.parallel_jobs() == 4

tests/testdata/context/overrides/settings/test_pkg.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
build_dir: python
2+
build_options:
3+
cpu_cores_per_job: 4
4+
memory_per_job_gb: 4
25
changelog:
36
"1.0.1":
47
- fixed bug

0 commit comments

Comments
 (0)