-
Notifications
You must be signed in to change notification settings - Fork 839
Open
Labels
Description
Your current environment
Details
### 环境信息 910B3 8*4 32 card ### 镜像 quay.io/ascend/vllm-ascend:glm5配置信息
节点1信息
# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.90"
# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"
export HCCL_OP_EXPANSION_MODE="AIV"
export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
vllm serve /opt/weight/GLM-5-Artget/GLM-5/ \
--host 0.0.0.0 \
--port 16888 \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 0 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100 \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'节点2信息
# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.93"
# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"
export HCCL_OP_EXPANSION_MODE="AIV"
export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
vllm serve /opt/weight/GLM-5-Artget/GLM-5/ \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 1 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100 \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'节点3信息
# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.62"
# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"
export HCCL_OP_EXPANSION_MODE="AIV"
export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
vllm serve /opt/weight/GLM-5-Artget/GLM-5/ \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 2 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100 \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'节点4信息
# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="XXX.67"
# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"
export HCCL_OP_EXPANSION_MODE="AIV"
export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
vllm serve /opt/weight/GLM-5-Artget/GLM-5/ \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 3 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100 \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
🐛 Describe the bug
错误信息
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] self.worker.load_model()
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] self.model_runner.load_model()
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] self.drafter.load_model(self.model)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] self.model = get_model(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] ^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] return loader.load_model(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] self.load_weights(model, model_config)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] raise ValueError(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.embed_tokens.weight', 'model.layers.78.shared_head.head.weight'}
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] self.worker.load_model()
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] self.model_runner.load_model()
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] self.drafter.load_model(self.model)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] self.model = get_model(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] ^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] return loader.load_model(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] self.load_weights(model, model_config)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] raise ValueError(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.layers.78.shared_head.head.weight', 'model.embed_tokens.weight'}
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.worker.load_model()
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.model_runner.load_model()
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.drafter.load_model(self.model)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.model = get_model(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] return loader.load_model(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.load_weights(model, model_config)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] raise ValueError(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.embed_tokens.weight', 'model.layers.78.shared_head.head.weight'}
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.worker.load_model()
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.model_runner.load_model()
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.drafter.load_model(self.model)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.model = get_model(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] return loader.load_model(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] self.load_weights(model, model_config)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] raise ValueError(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.layers.78.shared_head.head.weight', 'model.embed_tokens.weight'}
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] self.worker.load_model()
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] self.model_runner.load_model()
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] self.drafter.load_model(self.model)
(Worker_D
Reactions are currently unavailable