@@ -102,7 +102,7 @@ def reference_lightning_attention(q, k, v, ed, block_size, kv_history, seq_len):
102102 return [output , kvsums ]
103103
104104
105- def execute_lightning_attention_prefill_case (self , batch_size , head_num , max_seq_len , head_dim , block_size ,
105+ def execute_lightning_attention_prefill_case (batch_size , head_num , max_seq_len , head_dim , block_size ,
106106 has_kv_history = False , actual_seq_len = None , dtype = torch .float16 ,
107107 slope_rate = None ):
108108
@@ -157,7 +157,7 @@ def execute_lightning_attention_prefill_case(self, batch_size, head_num, max_seq
157157
158158
159159@torch .inference_mode ()
160- def test_lightning_attention_prefill_pad (self ):
160+ def test_lightning_attention_prefill_pad ():
161161 batch_size = 1
162162 head_num = 4
163163 max_seq_len = 8192
@@ -169,7 +169,7 @@ def test_lightning_attention_prefill_pad(self):
169169 torch .npu .reset_peak_memory_stats ()
170170
171171@torch .inference_mode ()
172- def test_lightning_attention_prefill_unpad_1 (self ):
172+ def test_lightning_attention_prefill_unpad_1 ():
173173 batch_size = 1
174174 head_num = 8
175175 max_seq_len = 16
@@ -181,7 +181,7 @@ def test_lightning_attention_prefill_unpad_1(self):
181181 gc .collect ()
182182 torch .npu .empty_cache ()
183183 torch .npu .reset_peak_memory_stats ()
184- def test_lightning_attention_prefill_unpad_2 (self ):
184+ def test_lightning_attention_prefill_unpad_2 ():
185185 batch_size = 4
186186 head_num = 8
187187 max_seq_len = 2048
@@ -196,7 +196,7 @@ def test_lightning_attention_prefill_unpad_2(self):
196196 torch .npu .reset_peak_memory_stats ()
197197
198198@torch .inference_mode ()
199- def test_lightning_attention_prefill_unpad_3 (self ):
199+ def test_lightning_attention_prefill_unpad_3 ():
200200 batch_size = 3
201201 head_num = 8
202202 max_seq_len = 384
@@ -210,7 +210,7 @@ def test_lightning_attention_prefill_unpad_3(self):
210210 torch .npu .reset_peak_memory_stats ()
211211
212212@torch .inference_mode ()
213- def test_lightning_attention_prefill_unpad_4 (self ):
213+ def test_lightning_attention_prefill_unpad_4 ():
214214 batch_size = 1
215215 head_num = 4
216216 max_seq_len = 256
@@ -225,7 +225,7 @@ def test_lightning_attention_prefill_unpad_4(self):
225225 torch .npu .reset_peak_memory_stats ()
226226
227227@torch .inference_mode ()
228- def test_lightning_attention_prefill_with_kv_history (self ):
228+ def test_lightning_attention_prefill_with_kv_history ():
229229 batch_size = 4
230230 head_num = 8
231231 max_seq_len = 1024
@@ -240,7 +240,7 @@ def test_lightning_attention_prefill_with_kv_history(self):
240240 torch .npu .reset_peak_memory_stats ()
241241
242242@torch .inference_mode ()
243- def test_lightning_attention_prefill_fp32 (self ):
243+ def test_lightning_attention_prefill_fp32 ():
244244 batch_size = 1
245245 head_num = 16
246246 max_seq_len = 256
0 commit comments