File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -143,6 +143,8 @@ def __init__(
143
143
# the backends)
144
144
if envs .VLLM_USE_V1 :
145
145
self .use_irope = extra_impl_args .pop ("use_irope" , False )
146
+ else :
147
+ self .use_irope = extra_impl_args .get ("use_irope" , False )
146
148
147
149
quant_method = quant_config .get_quant_method (
148
150
self , prefix = prefix ) if quant_config else None
@@ -177,7 +179,6 @@ def __init__(
177
179
kv_sharing_target_layer_name , ** extra_impl_args )
178
180
self .backend = backend_name_to_enum (attn_backend .get_name ())
179
181
self .dtype = dtype
180
- self .use_irope = extra_impl_args .get ("use_irope" , False )
181
182
182
183
# For cuda-alike (CUDA and ROCM) and cpu platforms, we control how
183
184
# torch.compile works by registering the attention as one giant
You can’t perform that action at this time.
0 commit comments