add-4090-fa (#1153)

SangChengC · web-flow · commit 60e5d0167cb3 · 2025-12-19T18:42:41.000+08:00
diff --git a/lightllm/models/vit/triton_kernel/flashattention_nopad.py b/lightllm/models/vit/triton_kernel/flashattention_nopad.py
@@ -5,7 +5,7 @@
 import time
 import torch.nn.functional as F
 from typing import Optional, Tuple
-from lightllm.utils.device_utils import is_hopper
+from lightllm.utils.device_utils import is_hopper, is_4090
 
 if triton.__version__ >= "2.1.0":
 
@@ -217,7 +217,7 @@ def flash_attention_fwd(q, k, v, o, cu_seqlens, max_seqlen):
     则使用 sgl_kernel里的接口，否则使用 Triton 版本。
     """
     global _flash_attn_v3_available
-    if _flash_attn_v3_available and is_hopper():
+    if _flash_attn_v3_available and (is_hopper() or is_4090()):
         try:
             flash_attention_v3_fwd(q, k, v, o, cu_seqlens, max_seqlen)
         except Exception as e:
diff --git a/lightllm/utils/device_utils.py b/lightllm/utils/device_utils.py
@@ -24,6 +24,11 @@ def is_hopper():
     )
 
 
+@lru_cache(maxsize=None)
+def is_4090():
+    return "4090" in torch.cuda.get_device_name(0) or "RTX 4090" in torch.cuda.get_device_name(0)
+
+
 @lru_cache(maxsize=None)
 def get_device_sm_count():
     import triton