fix

lucidrains · lucidrains · commit 197112b30f4f · 2025-02-28T16:15:37.000Z
diff --git a/native_sparse_attention_pytorch/transformer.py b/native_sparse_attention_pytorch/transformer.py
@@ -214,12 +214,15 @@ def sample(
 
         cache = None
 
-        for _ in tqdm(range(sample_num_times)):
+        for ind in tqdm(range(sample_num_times)):
+            is_first = ind == 0
 
             logits, next_cache = self.forward(
                 out,
                 cache = cache,
-                return_cache = True
+                return_cache = True,
+                disable_flex = not is_first,
+                disable_triton_kernel = not is_first
             )
 
             if use_cache_kv:
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "native-sparse-attention-pytorch"
-version = "0.0.61"
+version = "0.0.62"
 description = "Native Sparse Attention"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }