@@ -112,9 +112,7 @@ class EngineArgs:
112
112
pipeline_parallel_size : int = 1
113
113
tensor_parallel_size : int = 1
114
114
max_parallel_loading_workers : Optional [int ] = None
115
- # NOTE(kzawora): default block size for Gaudi should be 128
116
- # smaller sizes still work, but very inefficiently
117
- block_size : int = 16 if not current_platform .is_hpu () else 128
115
+ block_size : Optional [int ] = None
118
116
enable_prefix_caching : Optional [bool ] = None
119
117
disable_sliding_window : bool = False
120
118
use_v2_block_manager : bool = True
@@ -1036,9 +1034,7 @@ def create_engine_config(self,
1036
1034
self .enable_prefix_caching = False
1037
1035
1038
1036
cache_config = CacheConfig (
1039
- # neuron needs block_size = max_model_len
1040
- block_size = self .block_size if self .device != "neuron" else
1041
- (self .max_model_len if self .max_model_len is not None else 0 ),
1037
+ block_size = self .block_size ,
1042
1038
gpu_memory_utilization = self .gpu_memory_utilization ,
1043
1039
swap_space = self .swap_space ,
1044
1040
cache_dtype = self .kv_cache_dtype ,
0 commit comments