File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -185,10 +185,11 @@ def _verify_cuda_graph(self) -> None:
185185 self .max_context_len_to_capture = self .max_model_len
186186 self .max_context_len_to_capture = min (self .max_context_len_to_capture ,
187187 self .max_model_len )
188- if self .quantization == "gptq" and not self .enforce_eager :
188+ if (self .quantization in ["gptq" , "squeezellm" ]
189+ and not self .enforce_eager ):
189190 # Related issue: https://github.com/vllm-project/vllm/issues/2147
190- logger .warning ("GPTQ does not support CUDA graph yet. Disabling "
191- "CUDA graph." )
191+ logger .warning (f" { self . quantization } does not support CUDA graph "
192+ "yet. Disabling CUDA graph." )
192193 self .enforce_eager = True
193194
194195 def verify_with_parallel_config (
You can’t perform that action at this time.
0 commit comments