Skip to content

[Bug]: GLM-5 Load weight error: Following weights were not initialized from checkpoint: {'model.layers.78.shared_head.head.weight', 'model.embed_tokens.weight'} #6754

@YiYang-Eon

Description

@YiYang-Eon

Your current environment

Details ### 环境信息 910B3 8*4 32 card ### 镜像 quay.io/ascend/vllm-ascend:glm5

配置信息

节点1信息

# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.90"

# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"

export HCCL_OP_EXPANSION_MODE="AIV"

export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True

vllm serve /opt/weight/GLM-5-Artget/GLM-5/  \
--host 0.0.0.0 \
--port 16888 \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 0 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100 \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'

节点2信息

# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.93"

# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"

export HCCL_OP_EXPANSION_MODE="AIV"

export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True

vllm serve /opt/weight/GLM-5-Artget/GLM-5/  \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 1 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100  \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'

节点3信息

# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="xxx.62"

# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"

export HCCL_OP_EXPANSION_MODE="AIV"

export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True

vllm serve /opt/weight/GLM-5-Artget/GLM-5/  \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 2 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100  \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'

节点4信息

# this obtained through ifconfig
# nic_name is the network interface name corresponding to local_ip of the current node
nic_name="eth6"
local_ip="XXX.67"

# The value of node0_ip must be consistent with the value of local_ip set in node0 (master node)
node0_ip="xxx.90"

export HCCL_OP_EXPANSION_MODE="AIV"

export HCCL_IF_IP=$local_ip
export GLOO_SOCKET_IFNAME=$nic_name
export TP_SOCKET_IFNAME=$nic_name
export HCCL_SOCKET_IFNAME=$nic_name
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=10
export VLLM_USE_V1=1
export HCCL_BUFFSIZE=200
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True

vllm serve /opt/weight/GLM-5-Artget/GLM-5/  \
--host 0.0.0.0 \
--port 16888 \
--headless \
--data-parallel-size 4 \
--data-parallel-size-local 1 \
--data-parallel-start-rank 3 \
--data-parallel-address $node0_ip \
--data-parallel-rpc-port 13389 \
--tensor-parallel-size 8 \
--seed 1024 \
--served-model-name glm-5 \
--enable-expert-parallel \
--max-num-seqs 100  \
--max-model-len 131072 \
--max-num-batched-tokens 4096 \
--trust-remote-code \
--no-enable-prefix-caching \
--gpu-memory-utilization 0.95 \
--compilation-config '{"cudagraph_mode": "FULL_DECODE_ONLY"}' \
--additional-config '{"multistream_overlap_shared_expert":true}' \
--speculative-config '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'


🐛 Describe the bug

错误信息

(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     self.worker.load_model()
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     self.model_runner.load_model()
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     self.drafter.load_model(self.model)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     self.model = get_model(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]                  ^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     return loader.load_model(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]            ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     self.load_weights(model, model_config)
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783]     raise ValueError(
(Worker_DP2_TP6_EP22 pid=446) ERROR 02-13 10:40:07 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.embed_tokens.weight', 'model.layers.78.shared_head.head.weight'}
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     self.worker.load_model()
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     self.model_runner.load_model()
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     self.drafter.load_model(self.model)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     self.model = get_model(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]                  ^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     return loader.load_model(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]            ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     self.load_weights(model, model_config)
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783]     raise ValueError(
(Worker_DP2_TP4_EP20 pid=444) ERROR 02-13 10:40:08 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.layers.78.shared_head.head.weight', 'model.embed_tokens.weight'}
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.worker.load_model()
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.model_runner.load_model()
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.drafter.load_model(self.model)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.model = get_model(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]                  ^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     return loader.load_model(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]            ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.load_weights(model, model_config)
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     raise ValueError(
(Worker_DP2_TP5_EP21 pid=445) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.embed_tokens.weight', 'model.layers.78.shared_head.head.weight'}
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.worker.load_model()
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.model_runner.load_model()
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.drafter.load_model(self.model)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/spec_decode/eagle_proposer.py", line 155, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.model = get_model(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]                  ^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/__init__.py", line 135, in get_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     return loader.load_model(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]            ^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/base_loader.py", line 62, in load_model
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     self.load_weights(model, model_config)
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/model_executor/model_loader/default_loader.py", line 303, in load_weights
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783]     raise ValueError(
(Worker_DP2_TP3_EP19 pid=443) ERROR 02-13 10:40:09 [multiproc_executor.py:783] ValueError: Following weights were not initialized from checkpoint: {'model.layers.78.shared_head.head.weight', 'model.embed_tokens.weight'}
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] WorkerProc failed to start.
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783] Traceback (most recent call last):
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 754, in worker_main
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]     worker = WorkerProc(*args, **kwargs)
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]   File "/usr/local/python3.11.14/lib/python3.11/site-packages/vllm/v1/executor/multiproc_executor.py", line 580, in __init__
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]     self.worker.load_model()
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/worker.py", line 404, in load_model
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]     self.model_runner.load_model()
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]   File "/vllm-workspace/vllm-ascend/vllm_ascend/worker/model_runner_v1.py", line 2317, in load_model
(Worker_DP2_TP2_EP18 pid=442) ERROR 02-13 10:40:10 [multiproc_executor.py:783]     self.drafter.load_model(self.model)
(Worker_D

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingglm5

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions