Skip to content

Commit 4fc722e

Browse files
mgoinyewentao256
andauthored
[Kernel/Quant] Remove AQLM (#22943)
Signed-off-by: mgoin <[email protected]> Co-authored-by: Wentao Ye <[email protected]>
1 parent 3253ae7 commit 4fc722e

File tree

16 files changed

+0
-1534
lines changed

16 files changed

+0
-1534
lines changed

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ fi
121121
if [[ $commands == *" kernels/quantization"* ]]; then
122122
commands="${commands} \
123123
--ignore=kernels/quantization/test_int8_quant.py \
124-
--ignore=kernels/quantization/test_aqlm.py \
125124
--ignore=kernels/quantization/test_machete_mm.py \
126125
--ignore=kernels/quantization/test_block_fp8.py \
127126
--ignore=kernels/quantization/test_block_int8.py \

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
286286
FetchContent_MakeAvailable(cutlass)
287287

288288
list(APPEND VLLM_EXT_SRC
289-
"csrc/quantization/aqlm/gemm_kernels.cu"
290289
"csrc/quantization/awq/gemm_kernels.cu"
291290
"csrc/permute_cols.cu"
292291
"csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu"

benchmarks/kernels/benchmark_aqlm.py

Lines changed: 0 additions & 345 deletions
This file was deleted.

csrc/ops.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,6 @@ void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope,
154154
torch::Tensor get_cuda_view_from_cpu_tensor(torch::Tensor& cpu_tensor);
155155

156156
#ifndef USE_ROCM
157-
torch::Tensor aqlm_gemm(const torch::Tensor& input, const torch::Tensor& codes,
158-
const torch::Tensor& codebooks,
159-
const torch::Tensor& scales,
160-
const std::vector<int64_t>& codebook_partition_sizes,
161-
const std::optional<torch::Tensor>& bias);
162-
163-
torch::Tensor aqlm_dequant(
164-
const torch::Tensor& codes, const torch::Tensor& codebooks,
165-
const std::vector<int64_t>& codebook_partition_sizes);
166157

167158
torch::Tensor awq_gemm(torch::Tensor _in_feats, torch::Tensor _kernel,
168159
torch::Tensor _scaling_factors, torch::Tensor _zeros,

0 commit comments

Comments
 (0)