Skip to content

Commit 6fff24f

Browse files
[Bugfix] Qwen3.5 kv-scale weight remapping (vllm-project#34719)
Signed-off-by: Linda-Stadter <57756729+Linda-Stadter@users.noreply.github.com>
1 parent 23210a9 commit 6fff24f

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

vllm/model_executor/models/qwen3_5.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
)
5858
from vllm.model_executor.model_loader.weight_utils import (
5959
default_weight_loader,
60+
maybe_remap_kv_scale_name,
6061
)
6162
from vllm.multimodal import MULTIMODAL_REGISTRY
6263
from vllm.sequence import IntermediateTensors
@@ -397,6 +398,12 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
397398
if name.startswith("mtp."):
398399
continue
399400

401+
# Remapping the name of FP8 kv-scale.
402+
if name.endswith("scale"):
403+
name = maybe_remap_kv_scale_name(name, params_dict)
404+
if name is None:
405+
continue
406+
400407
for param_name, weight_name, shard_id in stacked_params_mapping:
401408
if "experts.gate_up_proj" in name or "experts.down_proj" in name:
402409
is_fused_expert = True

0 commit comments

Comments
 (0)