Skip to content

Commit a27920d

Browse files
committed
fix bmm experts export issue with nvfp4 scales
Signed-off-by: Zhiyu Cheng <[email protected]>
1 parent 061f2e5 commit a27920d

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

modelopt/torch/export/unified_export_hf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,11 +299,11 @@ def _export_quantized_weight(
299299
expert_type in type(sub_module).__name__
300300
for expert_type in ["Llama4TextExperts", "GptOssExperts"]
301301
)
302-
302+
303303
if is_expert_weight:
304304
# Transpose from (num_experts, in_dim, out_dim) to (num_experts, out_dim, in_dim)
305305
transposed_weight = weight.transpose(-2, -1).contiguous()
306-
306+
307307
# Compute scaling factor from transposed weight
308308
weight_scale = NVFP4QTensor.get_weights_scaling_factor(
309309
transposed_weight,
@@ -319,10 +319,10 @@ def _export_quantized_weight(
319319
weight_scale_2,
320320
block_size,
321321
)
322-
322+
323323
# Transpose quantized weight back to original format (num_experts, in_dim, out_dim)
324324
quantized_weight = quantized_weight.transpose(-2, -1).contiguous()
325-
325+
326326
# Transpose scaling factor back to match original weight dimensions
327327
weight_scale = weight_scale.transpose(-2, -1).contiguous()
328328
else:

0 commit comments

Comments
 (0)