We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3de6e6a commit 1dab9bcCopy full SHA for 1dab9bc
vllm/executor/multiproc_gpu_executor.py
@@ -37,6 +37,11 @@ def _init_executor(self) -> None:
37
# Disable torch async compiling which won't work with daemonic processes
38
os.environ["TORCHINDUCTOR_COMPILE_THREADS"] = "1"
39
40
+ # Set OMP_NUM_THREADS to 1 if it is not set explicitly, avoids CPU
41
+ # contention amongst the shards
42
+ if "OMP_NUM_THREADS" not in os.environ:
43
+ os.environ["OMP_NUM_THREADS"] = "1"
44
+
45
assert world_size <= cuda_device_count_stateless(), (
46
"please set tensor_parallel_size to less than max local gpu count")
47
0 commit comments