-
Notifications
You must be signed in to change notification settings - Fork 566
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Your current environment
services:
vllm-ascend-head:
image: quay.io/ascend/vllm-ascend:v0.11.0-dev
container_name: vllm-ascend-head
network_mode: host
privileged: true
restart: always
shm_size: 500g
environment:
- HCCL_IF_IP=10.48.205.243
- GLOO_SOCKET_IFNAME=bond4
- TP_SOCKET_IFNAME=bond4
- HCCL_SOCKET_IFNAME=bond4
- RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1
- ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
- OMP_PROC_BIND=false
- OMP_NUM_THREADS=100
- VLLM_USE_V1=1
- HCCL_BUFFSIZE=1024
devices:
- /dev/davinci0
- /dev/davinci1
- /dev/davinci2
- /dev/davinci3
- /dev/davinci4
- /dev/davinci5
- /dev/davinci6
- /dev/davinci7
- /dev/davinci_manager
- /dev/devmm_svm
- /dev/hisi_hdc
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi
- /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
- /data:/data
command: >
vllm serve /data/deepseek-V3.1-w8a8
--host 0.0.0.0
--port 1025
--headless
--data-parallel-size 4
--data-parallel-size-local 2
--data-parallel-start-rank 2
--data-parallel-address 10.48.205.242
--data-parallel-rpc-port 13389
--tensor-parallel-size 4
--seed 1024
--quantization ascend
--served-model-name DeepSeek-r1-32k_token
--enable-expert-parallel
--max-num-seqs 16
--max-model-len 32768
--max-num-batched-tokens 32768
--trust-remote-code
--no-enable-prefix-caching
--gpu-memory-utilization 0.99
--additional-config '{"ascend_scheduler_config":{"enabled":true},"torchair_graph_config":{"enabled":true}}'services:
vllm-ascend-head:
image: quay.io/ascend/vllm-ascend:v0.11.0-dev
container_name: vllm-ascend-head
network_mode: host
privileged: true
restart: always
shm_size: 500g
environment:
- HCCL_IF_IP=10.48.205.243
- GLOO_SOCKET_IFNAME=bond4
- TP_SOCKET_IFNAME=bond4
- HCCL_SOCKET_IFNAME=bond4
- RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES=1
- ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
- OMP_PROC_BIND=false
- OMP_NUM_THREADS=100
- VLLM_USE_V1=1
- HCCL_BUFFSIZE=1024
devices:
- /dev/davinci0
- /dev/davinci1
- /dev/davinci2
- /dev/davinci3
- /dev/davinci4
- /dev/davinci5
- /dev/davinci6
- /dev/davinci7
- /dev/davinci_manager
- /dev/devmm_svm
- /dev/hisi_hdc
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi
- /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
- /data:/data
command: >
vllm serve /data/deepseek-V3.1-w8a8
--host 0.0.0.0
--port 1025
--headless
--data-parallel-size 4
--data-parallel-size-local 2
--data-parallel-start-rank 2
--data-parallel-address 10.48.205.242
--data-parallel-rpc-port 13389
--tensor-parallel-size 4
--seed 1024
--quantization ascend
--served-model-name DeepSeek-r1-32k_token
--enable-expert-parallel
--max-num-seqs 16
--max-model-len 32768
--max-num-batched-tokens 32768
--trust-remote-code
--no-enable-prefix-caching
--gpu-memory-utilization 0.99
--additional-config '{"ascend_scheduler_config":{"enabled":true},"torchair_graph_config":{"enabled":true}}'The error occurs after a period of usage
🐛 Describe the bug
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] WorkerProc hit an exception.
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Traceback (most recent call last):
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/_utils/error_code.py", line 43, in wapper
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] return func(*args, **kwargs)
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] ^^^^^^^^^^^^^^^^^^^^^
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch_npu/dynamo/torchair/core/_backend.py", line 125, in compile
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] return super(TorchNpuGraph, self).compile()
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] RuntimeError: E19014: [PID: 1085] 2025-11-15-05:36:42.238.323 Value [input assist_info_for_combine shape] for Op [MoeDistributeCombineV2_116] is invalid. Reason: contains negative or zero dimension.
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Solution: Invalid operator information. Check the operator information in the error message.
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] TraceBack (most recent call last):
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] MoeDistributeCombineV2_116(MoeDistributeCombineV2) Verify failed.[FUNC:Verify][FILE:node_utils_ex.cc][LINE:165]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Verify ing MoeDistributeCombineV2_116 failed.[FUNC:InferShapeAndType][FILE:infershape_pass.cc][LINE:131]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Call InferShapeAndType for node:MoeDistributeCombineV2_116(MoeDistributeCombineV2) failed[FUNC:Infer][FILE:infershape_pass.cc][LINE:118]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] process pass InferShapePass on node:MoeDistributeCombineV2_116 failed, ret:4294967295[FUNC:RunPassesOnNode][FILE:base_pass.cc][LINE:565]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] [Call][PreRun] Failed, graph_id:2, session_id:2.[FUNC:CompileGraph][FILE:graph_manager.cc][LINE:4654]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] [Compile][Graph]Compile graph failed, error code:1343225857, session_id:2, graph_id:2, isEnableSliceSchedule:0.[FUNC:CompileGraph][FILE:ge_api.cc][LINE:1365]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] During handling of the above exception, another exception occurred:
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671]
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Traceback (most recent call last):
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/vllm-workspace/vllm/vllm/v1/executor/multiproc_executor.py", line 666, in worker_busy_loop
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] output = func(*args, **kwargs)
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] ^^^^^^^^^^^^^^^^^^^^^
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker_v1.py", line 375, in execute_dummy_batch
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] self.model_runner._dummy_run(
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/usr/local/python3.11.13/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] return func(*args, **kwargs)
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] ^^^^^^^^^^^^^^^^^^^^^
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2506, in _dummy_run
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] hidden_states = self._generate_dummy_run_hidden_states(
(Worker_DP1_TP1_EP5 pid=1085) ERROR 11-15 05:36:42 [multiproc_executor.py:671] Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working