File tree Expand file tree Collapse file tree 1 file changed +12
-6
lines changed
Expand file tree Collapse file tree 1 file changed +12
-6
lines changed Original file line number Diff line number Diff line change 44
55import torch
66
7+ from vllm .logger import init_logger
8+
9+ logger = init_logger (__name__ )
10+
711# set some common config/environment variables that should be set
812# for all processes created by vllm and all processes
913# that interact with vllm workers.
1014# they are executed whenever `import vllm` is called.
1115
12- if not os .path .exists ('/dev/nvidia-caps-imex-channels' ):
13- # normally, we disable NCCL_CUMEM_ENABLE because it
14- # will cost 1~2 GiB GPU memory with cudagraph+allreduce,
15- # see https://github.com/NVIDIA/nccl/issues/1234
16- # for more details.
17- # However, NCCL requires NCCL_CUMEM_ENABLE to work with
16+ if 'NCCL_CUMEM_ENABLE' in os .environ :
17+ logger .warning (
18+ "NCCL_CUMEM_ENABLE is set to %s, skipping override. "
19+ "This may increase memory overhead with cudagraph+allreduce: "
20+ "https://github.com/NVIDIA/nccl/issues/1234" ,
21+ os .environ ['NCCL_CUMEM_ENABLE' ])
22+ elif not os .path .exists ('/dev/nvidia-caps-imex-channels' ):
23+ # NCCL requires NCCL_CUMEM_ENABLE to work with
1824 # multi-node NVLink, typically on GB200-NVL72 systems.
1925 # The ultimate way to detect multi-node NVLink is to use
2026 # NVML APIs, which are too expensive to call here.
You can’t perform that action at this time.
0 commit comments