File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -219,13 +219,13 @@ def get_torch_arch_list() -> Set[str]:
219
219
"csrc/activation_kernels.cu" ,
220
220
"csrc/layernorm_kernels.cu" ,
221
221
"csrc/quantization/squeezellm/quant_cuda_kernel.cu" ,
222
- "csrc/quantization/gptq/q_gemm.cu" ,
223
222
"csrc/cuda_utils_kernels.cu" ,
224
223
"csrc/pybind.cpp" ,
225
224
]
226
225
227
226
if _is_cuda ():
228
227
vllm_extension_sources .append ("csrc/quantization/awq/gemm_kernels.cu" )
228
+ vllm_extension_sources .append ("csrc/quantization/gptq/q_gemm.cu" )
229
229
230
230
vllm_extension = CUDAExtension (
231
231
name = "vllm._C" ,
Original file line number Diff line number Diff line change @@ -143,7 +143,7 @@ def _verify_tokenizer_mode(self) -> None:
143
143
144
144
def _verify_quantization (self ) -> None :
145
145
supported_quantization = ["awq" , "gptq" , "squeezellm" ]
146
- rocm_not_supported_quantization = ["awq" ]
146
+ rocm_not_supported_quantization = ["awq" , "gptq" ]
147
147
if self .quantization is not None :
148
148
self .quantization = self .quantization .lower ()
149
149
You can’t perform that action at this time.
0 commit comments