We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0e1f6fe commit 197112bCopy full SHA for 197112b
native_sparse_attention_pytorch/transformer.py
@@ -214,12 +214,15 @@ def sample(
214
215
cache = None
216
217
- for _ in tqdm(range(sample_num_times)):
+ for ind in tqdm(range(sample_num_times)):
218
+ is_first = ind == 0
219
220
logits, next_cache = self.forward(
221
out,
222
cache = cache,
- return_cache = True
223
+ return_cache = True,
224
+ disable_flex = not is_first,
225
+ disable_triton_kernel = not is_first
226
)
227
228
if use_cache_kv:
pyproject.toml
@@ -1,6 +1,6 @@
1
[project]
2
name = "native-sparse-attention-pytorch"
3
-version = "0.0.61"
+version = "0.0.62"
4
description = "Native Sparse Attention"
5
authors = [
6
{ name = "Phil Wang", email = "[email protected]" }
0 commit comments