Skip to content

Commit 4d17741

Browse files
author
wangzaijun
committed
5090 disable ma
1 parent ed7c31e commit 4d17741

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

lightllm/common/quantization/triton_quant/fp8/fp8w8a8_scaled_mm_per_token_kernel.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from typing import Any, Dict, List, Optional, Tuple
99
from triton import Config
1010
from lightllm.common.triton_utils.autotuner import autotune
11-
from lightllm.utils.device_utils import triton_support_tensor_descriptor
11+
from lightllm.utils.device_utils import triton_support_tensor_descriptor, is_5090_gpu
1212

1313

1414
class Fp8ScaledMMKernelConfig(KernelConfigs):
@@ -250,6 +250,8 @@ def fp8_scaled_mm_per_token(
250250

251251
# use tma
252252
support_tma = triton_support_tensor_descriptor()
253+
# 5090 上,小shape开启tma性能不是很好。
254+
support_tma = support_tma and (not is_5090_gpu())
253255
if support_tma:
254256
stride = A.stride(-2)
255257
if (stride * A.dtype.itemsize) % 16 != 0:

0 commit comments

Comments
 (0)