File tree Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -283,11 +283,18 @@ def use_trtllm_attention(
283
283
284
284
if force_use_trtllm is None :
285
285
# Environment variable not set - use auto-detection
286
- use_trtllm = (
287
- num_tokens <= 256 and max_seq_len <= 131072 and kv_cache_dtype == "auto"
288
- )
289
- if use_trtllm :
290
- logger .warning_once ("Using TRTLLM attention (auto-detected)." )
286
+ if is_prefill :
287
+ # Prefill auto-detection
288
+ use_trtllm = max_seq_len <= 131072 and kv_cache_dtype == "auto"
289
+ if use_trtllm :
290
+ logger .warning_once ("Using TRTLLM prefill attention (auto-detected)." )
291
+ else :
292
+ # Decode auto-detection
293
+ use_trtllm = (
294
+ num_tokens <= 256 and max_seq_len <= 131072 and kv_cache_dtype == "auto"
295
+ )
296
+ if use_trtllm :
297
+ logger .warning_once ("Using TRTLLM decode attention (auto-detected)." )
291
298
return use_trtllm
292
299
293
300
# Environment variable is set to 1 - respect it
You can’t perform that action at this time.
0 commit comments