File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change 9
9
import vllm .envs as envs
10
10
from vllm .logger import init_logger
11
11
from vllm .utils import DEFAULT_MAX_NUM_BATCHED_TOKENS
12
- from vllm .v1 .attention .backends .utils import set_kv_cache_layout
13
12
14
13
from .interface import DeviceCapability , Platform , PlatformEnum , _Backend
15
14
@@ -164,11 +163,16 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
164
163
vllm_config .scheduler_config .max_num_batched_tokens = max (
165
164
vllm_config .scheduler_config .max_model_len ,
166
165
DEFAULT_MAX_NUM_BATCHED_TOKENS )
166
+ from vllm .v1 .attention .backends .utils import set_kv_cache_layout
167
167
168
168
set_kv_cache_layout ("NHD" )
169
169
logger .info ("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; "
170
170
"only NHD layout is supported by XPU attention kernels." )
171
171
172
+ @classmethod
173
+ def support_hybrid_kv_cache (cls ) -> bool :
174
+ return True
175
+
172
176
@classmethod
173
177
def is_pin_memory_available (cls ):
174
178
return True
You can’t perform that action at this time.
0 commit comments