fix

sufubao · gemini-code-assist[bot] · web-flow · commit 94aecac1939f · 2025-07-02T11:24:14.000+08:00
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py b/lightllm/common/quantization/triton_quant/fp8/fp8w8a8_block_quant_kernel.py
@@ -19,9 +19,8 @@ def weight_quant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.constexpr):
     amax = tl.max(tl.abs(x))
 
     max_fp8e4m3_val = 448.0 
-    scale = amax / (max_fp8e4m3_val + 1e-6) 
-
-    y = (x / scale).to(y_ptr.dtype.element_ty)
+    scale = amax / max_fp8e4m3_val
+    y = (x / (scale + 1e-6)).to(y_ptr.dtype.element_ty)
 
     tl.store(y_ptr + offs, y, mask=mask)
     tl.store(s_ptr + pid_m * n_blocks + pid_n, scale)