File tree Expand file tree Collapse file tree 1 file changed +3
-5
lines changed Expand file tree Collapse file tree 1 file changed +3
-5
lines changed Original file line number Diff line number Diff line change 1717 is_all_cross_attn_metadata_set , is_all_encoder_attn_metadata_set )
1818from vllm .attention .ops .paged_attn import (PagedAttention ,
1919 PagedAttentionMetadata )
20- from vllm .logger import init_logger
21-
22- logger = init_logger (__name__ )
20+ from vllm .utils import print_warning_once
2321
2422
2523class XFormersBackend (AttentionBackend ):
@@ -386,8 +384,8 @@ def __init__(
386384 raise ValueError (
387385 "XFormers does not support block-sparse attention." )
388386 if logits_soft_cap is not None :
389- raise ValueError (
390- "XFormers does not support attention logits soft capping ." )
387+ print_warning_once ( "XFormers does not support logits soft cap. "
388+ "Outputs may be slightly off ." )
391389 self .num_heads = num_heads
392390 self .head_size = head_size
393391 self .scale = float (scale )
You can’t perform that action at this time.
0 commit comments