Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 55d63b1

Browse files
[Bugfix] Don't build machete on cuda <12.0 (vllm-project#7757)
1 parent 4f419c0 commit 55d63b1

File tree

2 files changed

+47
-27
lines changed

2 files changed

+47
-27
lines changed

CMakeLists.txt

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
1010

1111
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
1212

13+
# Suppress potential warnings about unused manually-specified variables
14+
set(ignoreMe "${VLLM_PYTHON_PATH}")
15+
1316
#
1417
# Supported python versions. These versions will be searched in order, the
1518
# first match will be selected. These should be kept in sync with setup.py.
@@ -228,43 +231,48 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
228231
endif()
229232

230233
#
231-
# For the Machete kernels we automatically generate sources for various
232-
# preselected input type pairs and schedules.
233-
# Generate sources:
234-
execute_process(
235-
COMMAND ${CMAKE_COMMAND} -E env
236-
PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:${VLLM_PYTHON_PATH}:$PYTHONPATH
237-
${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py
238-
RESULT_VARIABLE machete_generation_result
239-
OUTPUT_VARIABLE machete_generation_output
240-
OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
241-
ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
242-
)
234+
# Machete kernels
243235

244-
if (NOT machete_generation_result EQUAL 0)
245-
message(FATAL_ERROR "Machete generation failed."
246-
" Result: \"${machete_generation_result}\""
247-
"\nCheck the log for details: "
248-
"${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log")
249-
else()
250-
message(STATUS "Machete generation completed successfully.")
251-
endif()
236+
# The machete kernels only work on hopper and require CUDA 12.0 or later.
237+
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
238+
#
239+
# For the Machete kernels we automatically generate sources for various
240+
# preselected input type pairs and schedules.
241+
# Generate sources:
242+
execute_process(
243+
COMMAND ${CMAKE_COMMAND} -E env
244+
PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/:${VLLM_PYTHON_PATH}:$PYTHONPATH
245+
${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py
246+
RESULT_VARIABLE machete_generation_result
247+
OUTPUT_VARIABLE machete_generation_output
248+
OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
249+
ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log
250+
)
251+
252+
if (NOT machete_generation_result EQUAL 0)
253+
message(FATAL_ERROR "Machete generation failed."
254+
" Result: \"${machete_generation_result}\""
255+
"\nCheck the log for details: "
256+
"${CMAKE_CURRENT_BINARY_DIR}/machete_generation.log")
257+
else()
258+
message(STATUS "Machete generation completed successfully.")
259+
endif()
252260

253-
# Add machete generated sources
254-
file(GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu")
255-
list(APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES})
256-
message(STATUS "Machete generated sources: ${MACHETE_GEN_SOURCES}")
261+
# Add machete generated sources
262+
file(GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu")
263+
list(APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES})
264+
message(STATUS "Machete generated sources: ${MACHETE_GEN_SOURCES}")
257265

258-
# See comment above for scaled_mm_c3x (same if condition)
259-
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
260266
set_source_files_properties(
261267
${MACHETE_GEN_SOURCES}
262268
PROPERTIES
263269
COMPILE_FLAGS
264270
"-gencode arch=compute_90a,code=sm_90a")
265271
endif()
266272

267-
# Add pytorch binding
273+
# Add pytorch binding for machete (add on even CUDA < 12.0 so that we can
274+
# raise an error if the user that this was built with an incompatible
275+
# CUDA version)
268276
list(APPEND VLLM_EXT_SRC
269277
csrc/quantization/machete/machete_pytorch.cu)
270278
endif()

csrc/quantization/machete/machete_pytorch.cu

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,13 @@ static auto scalar_type_dispatch(ScalarType const& type, Fn fn) {
3737
//
3838

3939
std::vector<std::string> supported_schedules(ScalarTypeTorchPtr const& btype) {
40+
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
4041
return scalar_type_dispatch(*btype, [&](auto BType) {
4142
return GemmDispatcher<half_t, decltype(BType)>::supported_schedules();
4243
});
44+
#else
45+
TORCH_CHECK(false, "Machete requires CUDA 12.0 or later");
46+
#endif
4347
}
4448

4549
torch::Tensor gemm(torch::Tensor const& A, torch::Tensor const& B,
@@ -50,6 +54,7 @@ torch::Tensor gemm(torch::Tensor const& A, torch::Tensor const& B,
5054
c10::optional<torch::Tensor> const& C,
5155
c10::optional<double> alpha, c10::optional<double> beta,
5256
c10::optional<std::string> schedule) {
57+
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
5358
auto args = PyTorchArguments{.A = A,
5459
.B = B,
5560
.scales = scales,
@@ -67,13 +72,20 @@ torch::Tensor gemm(torch::Tensor const& A, torch::Tensor const& B,
6772
return GemmDispatcher<ComputeType, decltype(BType)>::dispatch(args);
6873
});
6974
});
75+
#else
76+
TORCH_CHECK(false, "Machete requires CUDA 12.0 or later");
77+
#endif
7078
}
7179

7280
torch::Tensor prepack_B(torch::Tensor const& B,
7381
ScalarTypeTorchPtr const& btype) {
82+
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
7483
return scalar_type_dispatch(*btype, [&](auto BType) {
7584
return PrepackBDispatcher<half_t, decltype(BType), half_t>::dispatch(B);
7685
});
86+
#else
87+
TORCH_CHECK(false, "Machete requires CUDA 12.0 or later");
88+
#endif
7789
}
7890

7991
}; // namespace machete

0 commit comments

Comments
 (0)