Skip to content

Commit 2acd76f

Browse files
authored
[ROCm] Temporarily remove GPTQ ROCm support (#2138)
1 parent b81a6a6 commit 2acd76f

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,13 @@ def get_torch_arch_list() -> Set[str]:
219219
"csrc/activation_kernels.cu",
220220
"csrc/layernorm_kernels.cu",
221221
"csrc/quantization/squeezellm/quant_cuda_kernel.cu",
222-
"csrc/quantization/gptq/q_gemm.cu",
223222
"csrc/cuda_utils_kernels.cu",
224223
"csrc/pybind.cpp",
225224
]
226225

227226
if _is_cuda():
228227
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
228+
vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu")
229229

230230
vllm_extension = CUDAExtension(
231231
name="vllm._C",

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _verify_tokenizer_mode(self) -> None:
143143

144144
def _verify_quantization(self) -> None:
145145
supported_quantization = ["awq", "gptq", "squeezellm"]
146-
rocm_not_supported_quantization = ["awq"]
146+
rocm_not_supported_quantization = ["awq", "gptq"]
147147
if self.quantization is not None:
148148
self.quantization = self.quantization.lower()
149149

0 commit comments

Comments
 (0)