File tree Expand file tree Collapse file tree 1 file changed +4
-12
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +4
-12
lines changed Original file line number Diff line number Diff line change @@ -189,11 +189,7 @@ def fused_moe_kernel_gptq_awq(
189
189
mask = token_mask [:, None ] &
190
190
(offs_k [None , :] < K - k * BLOCK_SIZE_K ),
191
191
other = 0.0 )
192
- b = tl .load (
193
- b_ptrs ,
194
- cache_modifier = ".cg" ,
195
- eviction_policy = "evict_last" ,
196
- )
192
+ b = tl .load (b_ptrs )
197
193
if use_int4_w4a16 :
198
194
b = (b >> b_shifter ) & 0xF
199
195
@@ -395,13 +391,9 @@ def fused_moe_kernel(
395
391
mask = token_mask [:, None ] &
396
392
(offs_k [None , :] < K - k * BLOCK_SIZE_K ),
397
393
other = 0.0 )
398
- b = tl .load (
399
- b_ptrs ,
400
- mask = offs_k [:, None ] < K - k * BLOCK_SIZE_K ,
401
- other = 0.0 ,
402
- cache_modifier = ".cg" ,
403
- eviction_policy = "evict_last" ,
404
- )
394
+ b = tl .load (b_ptrs ,
395
+ mask = offs_k [:, None ] < K - k * BLOCK_SIZE_K ,
396
+ other = 0.0 )
405
397
# We accumulate along the K dimension.
406
398
if use_int8_w8a16 :
407
399
accumulator = tl .dot (a , b .to (compute_type ), acc = accumulator )
You can’t perform that action at this time.
0 commit comments