Skip to content

Commit 188a459

Browse files
authored
[Misc] Do not override NCCL_CUMEM_ENABLE if set explicitly (vllm-project#19105)
Signed-off-by: 22quinn <[email protected]>
1 parent 1809308 commit 188a459

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

vllm/env_override.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,23 @@
44

55
import torch
66

7+
from vllm.logger import init_logger
8+
9+
logger = init_logger(__name__)
10+
711
# set some common config/environment variables that should be set
812
# for all processes created by vllm and all processes
913
# that interact with vllm workers.
1014
# they are executed whenever `import vllm` is called.
1115

12-
if not os.path.exists('/dev/nvidia-caps-imex-channels'):
13-
# normally, we disable NCCL_CUMEM_ENABLE because it
14-
# will cost 1~2 GiB GPU memory with cudagraph+allreduce,
15-
# see https://github.com/NVIDIA/nccl/issues/1234
16-
# for more details.
17-
# However, NCCL requires NCCL_CUMEM_ENABLE to work with
16+
if 'NCCL_CUMEM_ENABLE' in os.environ:
17+
logger.warning(
18+
"NCCL_CUMEM_ENABLE is set to %s, skipping override. "
19+
"This may increase memory overhead with cudagraph+allreduce: "
20+
"https://github.com/NVIDIA/nccl/issues/1234",
21+
os.environ['NCCL_CUMEM_ENABLE'])
22+
elif not os.path.exists('/dev/nvidia-caps-imex-channels'):
23+
# NCCL requires NCCL_CUMEM_ENABLE to work with
1824
# multi-node NVLink, typically on GB200-NVL72 systems.
1925
# The ultimate way to detect multi-node NVLink is to use
2026
# NVML APIs, which are too expensive to call here.

0 commit comments

Comments
 (0)