fix attn import (#436)

sleepcoo · web-flow · commit ad7f5c65c14d · 2026-01-19T00:18:48.000-08:00
diff --git a/specforge/modeling/draft/llama3_eagle.py b/specforge/modeling/draft/llama3_eagle.py
@@ -6,7 +6,6 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-from flash_attn import flash_attn_func
 from torch.nn.attention.flex_attention import create_block_mask, flex_attention
 from transformers.activations import ACT2FN
 from transformers.cache_utils import Cache
@@ -26,9 +25,10 @@
 
 try:
     from flash_attn import flash_attn_func
-except:
+except ImportError:
     warnings.warn(
-        "flash_attn is not found, please install flash_attn if you want to use the flash attention backend"
+        "flash_attn is not found, falling back to flex_attention. "
+        "Please install flash_attn if you want to use the flash attention backend."
     )
     flash_attn_func = None