1
+ import importlib .util
1
2
import io
2
3
import logging
3
4
import os
13
14
from setuptools .command .build_ext import build_ext
14
15
from torch .utils .cpp_extension import CUDA_HOME
15
16
17
+
18
+ def load_module_from_path (module_name , path ):
19
+ spec = importlib .util .spec_from_file_location (module_name , path )
20
+ module = importlib .util .module_from_spec (spec )
21
+ sys .modules [module_name ] = module
22
+ spec .loader .exec_module (module )
23
+ return module
24
+
25
+
16
26
ROOT_DIR = os .path .dirname (__file__ )
17
27
logger = logging .getLogger (__name__ )
18
- # Target device of vLLM, supporting [cuda (by default), rocm, neuron, cpu]
19
- VLLM_TARGET_DEVICE = os .getenv ("VLLM_TARGET_DEVICE" , "cuda" )
28
+
29
+ # cannot import envs directly because it depends on vllm,
30
+ # which is not installed yet
31
+ envs = load_module_from_path ('envs' , os .path .join (ROOT_DIR , 'vllm' , 'envs.py' ))
32
+
33
+ VLLM_TARGET_DEVICE = envs .VLLM_TARGET_DEVICE
20
34
21
35
# vLLM only supports Linux platform
22
36
assert sys .platform .startswith (
@@ -60,7 +74,7 @@ class cmake_build_ext(build_ext):
60
74
def compute_num_jobs (self ):
61
75
# `num_jobs` is either the value of the MAX_JOBS environment variable
62
76
# (if defined) or the number of CPUs available.
63
- num_jobs = os . environ . get ( " MAX_JOBS" , None )
77
+ num_jobs = envs . MAX_JOBS
64
78
if num_jobs is not None :
65
79
num_jobs = int (num_jobs )
66
80
logger .info ("Using MAX_JOBS=%d as the number of jobs." , num_jobs )
@@ -78,7 +92,7 @@ def compute_num_jobs(self):
78
92
# environment variable (if defined) or 1.
79
93
# when it is set, we reduce `num_jobs` to avoid
80
94
# overloading the system.
81
- nvcc_threads = os . getenv ( " NVCC_THREADS" , None )
95
+ nvcc_threads = envs . NVCC_THREADS
82
96
if nvcc_threads is not None :
83
97
nvcc_threads = int (nvcc_threads )
84
98
logger .info (
@@ -104,7 +118,7 @@ def configure(self, ext: CMakeExtension) -> None:
104
118
# Select the build type.
105
119
# Note: optimization level + debug info are set by the build type
106
120
default_cfg = "Debug" if self .debug else "RelWithDebInfo"
107
- cfg = os . getenv ( " CMAKE_BUILD_TYPE" , default_cfg )
121
+ cfg = envs . CMAKE_BUILD_TYPE or default_cfg
108
122
109
123
# where .so files will be written, should be the same for all extensions
110
124
# that use the same CMakeLists.txt.
@@ -118,7 +132,7 @@ def configure(self, ext: CMakeExtension) -> None:
118
132
'-DVLLM_TARGET_DEVICE={}' .format (VLLM_TARGET_DEVICE ),
119
133
]
120
134
121
- verbose = bool ( int ( os . getenv ( ' VERBOSE' , '0' )))
135
+ verbose = envs . VERBOSE
122
136
if verbose :
123
137
cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON' ]
124
138
@@ -205,16 +219,15 @@ def _is_neuron() -> bool:
205
219
subprocess .run (["neuron-ls" ], capture_output = True , check = True )
206
220
except (FileNotFoundError , PermissionError , subprocess .CalledProcessError ):
207
221
torch_neuronx_installed = False
208
- return torch_neuronx_installed or os .environ .get ("VLLM_BUILD_WITH_NEURON" ,
209
- False )
222
+ return torch_neuronx_installed or envs .VLLM_BUILD_WITH_NEURON
210
223
211
224
212
225
def _is_cpu () -> bool :
213
226
return VLLM_TARGET_DEVICE == "cpu"
214
227
215
228
216
229
def _install_punica () -> bool :
217
- return bool ( int ( os . getenv ( " VLLM_INSTALL_PUNICA_KERNELS" , "0" )))
230
+ return envs . VLLM_INSTALL_PUNICA_KERNELS
218
231
219
232
220
233
def get_hipcc_rocm_version ():
@@ -377,7 +390,7 @@ def _read_requirements(filename: str) -> List[str]:
377
390
package_data = {
378
391
"vllm" : ["py.typed" , "model_executor/layers/fused_moe/configs/*.json" ]
379
392
}
380
- if os . environ . get ( " VLLM_USE_PRECOMPILED" ) :
393
+ if envs . VLLM_USE_PRECOMPILED :
381
394
ext_modules = []
382
395
package_data ["vllm" ].append ("*.so" )
383
396
0 commit comments