Skip to content

Commit 3426af6

Browse files
authored
Deepseek fix for moe fp8 (#624)
Co-authored-by: baishihao <[email protected]>
1 parent d268517 commit 3426af6

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(
3333
self.lock = threading.Lock()
3434

3535
def set_quant_method(self, quant_method):
36-
if isinstance(self.quant_method, vLLMFP8w8a8QuantizationMethod):
36+
if isinstance(quant_method, vLLMFP8w8a8QuantizationMethod):
3737
self.quant_method = quant_method
3838
if self.quant_method is not None:
3939
self.quant_method.is_moe = True

lightllm/common/quantization/vllm_quant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def quantize_moe(self, weight):
7171
weight_scales = []
7272
qweights = torch.empty_like(weight, dtype=torch.float8_e4m3fn).cuda()
7373
for i in range(num_experts):
74-
qweight, weight_scale = ops.scaled_fp8_quant(weight[0].cuda(), scale=None, use_per_token_if_dynamic=False)
74+
qweight, weight_scale = ops.scaled_fp8_quant(weight[i].cuda(), scale=None, use_per_token_if_dynamic=False)
7575
qweights[i] = qweight
7676
weight_scales.append(weight_scale)
7777
weight_scale = torch.cat(weight_scales, dim=0).reshape(-1)

0 commit comments

Comments
 (0)