Skip to content

Commit fe07405

Browse files
cyang49dumb0002
authored andcommitted
[Model] Granite-4 support loading quantized checkpoint (vllm-project#22925)
Signed-off-by: Chih-Chieh-Yang <[email protected]>
1 parent 88d430b commit fe07405

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vllm/model_executor/models/granitemoehybrid.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,10 @@ def _load_expert(n, p, name, shard_id, expert_id):
471471
# Mapping different experts' layout:
472472
# from HF (input_linear, output_linear, router)
473473
# to vLLM (experts_w13({e}.w1, {e}.w2), experts_w3({e}.w3), gate)
474-
if n.endswith('.block_sparse_moe.input_linear.weight'):
474+
# The renaming and parameter loading logic is the same for weight
475+
# and weight_scale tensors so we can reuse them without issues.
476+
if (n.endswith('.block_sparse_moe.input_linear.weight') or
477+
n.endswith('.block_sparse_moe.input_linear.weight_scale')):
475478
for e in range(p.size(0)):
476479
w1_name = n.replace(
477480
'.block_sparse_moe.input_linear.weight',
@@ -490,7 +493,8 @@ def _load_expert(n, p, name, shard_id, expert_id):
490493
w3_name,
491494
shard_id='w3',
492495
expert_id=e)
493-
elif n.endswith('.block_sparse_moe.output_linear.weight'):
496+
elif (n.endswith('.block_sparse_moe.output_linear.weight') or
497+
n.endswith('.block_sparse_moe.output_linear.weight_scale')):
494498
for e in range(p.size(0)):
495499
w2_name = n.replace(
496500
'.block_sparse_moe.output_linear.weight',

0 commit comments

Comments
 (0)