Skip to content

Commit 344bf7c

Browse files
authored
[Misc] add installation time env vars (#4574)
1 parent ab50275 commit 344bf7c

File tree

2 files changed

+81
-18
lines changed

2 files changed

+81
-18
lines changed

setup.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import importlib.util
12
import io
23
import logging
34
import os
@@ -13,10 +14,23 @@
1314
from setuptools.command.build_ext import build_ext
1415
from torch.utils.cpp_extension import CUDA_HOME
1516

17+
18+
def load_module_from_path(module_name, path):
19+
spec = importlib.util.spec_from_file_location(module_name, path)
20+
module = importlib.util.module_from_spec(spec)
21+
sys.modules[module_name] = module
22+
spec.loader.exec_module(module)
23+
return module
24+
25+
1626
ROOT_DIR = os.path.dirname(__file__)
1727
logger = logging.getLogger(__name__)
18-
# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
19-
VLLM_TARGET_DEVICE = os.getenv("VLLM_TARGET_DEVICE", "cuda")
28+
29+
# cannot import envs directly because it depends on vllm,
30+
# which is not installed yet
31+
envs = load_module_from_path('envs', os.path.join(ROOT_DIR, 'vllm', 'envs.py'))
32+
33+
VLLM_TARGET_DEVICE = envs.VLLM_TARGET_DEVICE
2034

2135
# vLLM only supports Linux platform
2236
assert sys.platform.startswith(
@@ -60,7 +74,7 @@ class cmake_build_ext(build_ext):
6074
def compute_num_jobs(self):
6175
# `num_jobs` is either the value of the MAX_JOBS environment variable
6276
# (if defined) or the number of CPUs available.
63-
num_jobs = os.environ.get("MAX_JOBS", None)
77+
num_jobs = envs.MAX_JOBS
6478
if num_jobs is not None:
6579
num_jobs = int(num_jobs)
6680
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
@@ -78,7 +92,7 @@ def compute_num_jobs(self):
7892
# environment variable (if defined) or 1.
7993
# when it is set, we reduce `num_jobs` to avoid
8094
# overloading the system.
81-
nvcc_threads = os.getenv("NVCC_THREADS", None)
95+
nvcc_threads = envs.NVCC_THREADS
8296
if nvcc_threads is not None:
8397
nvcc_threads = int(nvcc_threads)
8498
logger.info(
@@ -104,7 +118,7 @@ def configure(self, ext: CMakeExtension) -> None:
104118
# Select the build type.
105119
# Note: optimization level + debug info are set by the build type
106120
default_cfg = "Debug" if self.debug else "RelWithDebInfo"
107-
cfg = os.getenv("CMAKE_BUILD_TYPE", default_cfg)
121+
cfg = envs.CMAKE_BUILD_TYPE or default_cfg
108122

109123
# where .so files will be written, should be the same for all extensions
110124
# that use the same CMakeLists.txt.
@@ -118,7 +132,7 @@ def configure(self, ext: CMakeExtension) -> None:
118132
'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
119133
]
120134

121-
verbose = bool(int(os.getenv('VERBOSE', '0')))
135+
verbose = envs.VERBOSE
122136
if verbose:
123137
cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON']
124138

@@ -205,16 +219,15 @@ def _is_neuron() -> bool:
205219
subprocess.run(["neuron-ls"], capture_output=True, check=True)
206220
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
207221
torch_neuronx_installed = False
208-
return torch_neuronx_installed or os.environ.get("VLLM_BUILD_WITH_NEURON",
209-
False)
222+
return torch_neuronx_installed or envs.VLLM_BUILD_WITH_NEURON
210223

211224

212225
def _is_cpu() -> bool:
213226
return VLLM_TARGET_DEVICE == "cpu"
214227

215228

216229
def _install_punica() -> bool:
217-
return bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))
230+
return envs.VLLM_INSTALL_PUNICA_KERNELS
218231

219232

220233
def get_hipcc_rocm_version():
@@ -377,7 +390,7 @@ def _read_requirements(filename: str) -> List[str]:
377390
package_data = {
378391
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
379392
}
380-
if os.environ.get("VLLM_USE_PRECOMPILED"):
393+
if envs.VLLM_USE_PRECOMPILED:
381394
ext_modules = []
382395
package_data["vllm"].append("*.so")
383396

vllm/envs.py

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,71 @@
2727
VLLM_CPU_KVCACHE_SPACE: int = 0
2828
VLLM_USE_RAY_COMPILED_DAG: bool = False
2929
VLLM_WORKER_MULTIPROC_METHOD: str = "spawn"
30+
VLLM_TARGET_DEVICE: str = "cuda"
31+
MAX_JOBS: Optional[str] = None
32+
NVCC_THREADS: Optional[str] = None
33+
VLLM_BUILD_WITH_NEURON: bool = False
34+
VLLM_USE_PRECOMPILED: bool = False
35+
VLLM_INSTALL_PUNICA_KERNELS: bool = False
36+
CMAKE_BUILD_TYPE: Optional[str] = None
37+
VERBOSE: bool = False
3038

3139
# The begin-* and end* here are used by the documentation generator
3240
# to extract the used env vars.
3341

3442
# begin-env-vars-definition
3543

3644
environment_variables: Dict[str, Callable[[], Any]] = {
45+
46+
# ================== Installation Time Env Vars ==================
47+
48+
# Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
49+
"VLLM_TARGET_DEVICE":
50+
lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda"),
51+
52+
# Maximum number of compilation jobs to run in parallel.
53+
# By default this is the number of CPUs
54+
"MAX_JOBS":
55+
lambda: os.getenv("MAX_JOBS", None),
56+
57+
# Number of threads to use for nvcc
58+
# By default this is 1.
59+
# If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU.
60+
"NVCC_THREADS":
61+
lambda: os.getenv("NVCC_THREADS", None),
62+
63+
# If set, vllm will build with Neuron support
64+
"VLLM_BUILD_WITH_NEURON":
65+
lambda: bool(os.environ.get("VLLM_BUILD_WITH_NEURON", False)),
66+
67+
# If set, vllm will use precompiled binaries (*.so)
68+
"VLLM_USE_PRECOMPILED":
69+
lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")),
70+
71+
# If set, vllm will install Punica kernels
72+
"VLLM_INSTALL_PUNICA_KERNELS":
73+
lambda: bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0"))),
74+
75+
# CMake build type
76+
# If not set, defaults to "Debug" or "RelWithDebInfo"
77+
# Available options: "Debug", "Release", "RelWithDebInfo"
78+
"CMAKE_BUILD_TYPE":
79+
lambda: os.getenv("CMAKE_BUILD_TYPE"),
80+
81+
# If set, vllm will print verbose logs during installation
82+
"VERBOSE":
83+
lambda: bool(int(os.getenv('VERBOSE', '0'))),
84+
85+
# Root directory for VLLM configuration files
86+
# Note that this not only affects how vllm finds its configuration files
87+
# during runtime, but also affects how vllm installs its configuration
88+
# files during **installation**.
89+
"VLLM_CONFIG_ROOT":
90+
lambda: os.environ.get("VLLM_CONFIG_ROOT", None) or os.getenv(
91+
"XDG_CONFIG_HOME", None) or os.path.expanduser("~/.config"),
92+
93+
# ================== Runtime Env Vars ==================
94+
3795
# used in distributed environment to determine the master address
3896
'VLLM_HOST_IP':
3997
lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
@@ -93,14 +151,6 @@
93151
"S3_ENDPOINT_URL":
94152
lambda: os.environ.get("S3_ENDPOINT_URL", None),
95153

96-
# Root directory for VLLM configuration files
97-
# Note that this not only affects how vllm finds its configuration files
98-
# during runtime, but also affects how vllm installs its configuration
99-
# files during **installation**.
100-
"VLLM_CONFIG_ROOT":
101-
lambda: os.environ.get("VLLM_CONFIG_ROOT", None) or os.getenv(
102-
"XDG_CONFIG_HOME", None) or os.path.expanduser("~/.config"),
103-
104154
# Usage stats collection
105155
"VLLM_USAGE_STATS_SERVER":
106156
lambda: os.environ.get("VLLM_USAGE_STATS_SERVER", "https://stats.vllm.ai"),

0 commit comments

Comments
 (0)