File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -185,10 +185,11 @@ def _verify_cuda_graph(self) -> None:
185
185
self .max_context_len_to_capture = self .max_model_len
186
186
self .max_context_len_to_capture = min (self .max_context_len_to_capture ,
187
187
self .max_model_len )
188
- if self .quantization == "gptq" and not self .enforce_eager :
188
+ if (self .quantization in ["gptq" , "squeezellm" ]
189
+ and not self .enforce_eager ):
189
190
# Related issue: https://github.com/vllm-project/vllm/issues/2147
190
- logger .warning ("GPTQ does not support CUDA graph yet. Disabling "
191
- "CUDA graph." )
191
+ logger .warning (f" { self . quantization } does not support CUDA graph "
192
+ "yet. Disabling CUDA graph." )
192
193
self .enforce_eager = True
193
194
194
195
def verify_with_parallel_config (
You can’t perform that action at this time.
0 commit comments