@@ -10,6 +10,9 @@ message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
10
10
11
11
include (${CMAKE_CURRENT_LIST_DIR} /cmake/utils.cmake)
12
12
13
+ # Suppress potential warnings about unused manually-specified variables
14
+ set (ignoreMe "${VLLM_PYTHON_PATH} " )
15
+
13
16
#
14
17
# Supported python versions. These versions will be searched in order, the
15
18
# first match will be selected. These should be kept in sync with setup.py.
@@ -228,43 +231,48 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
228
231
endif ()
229
232
230
233
#
231
- # For the Machete kernels we automatically generate sources for various
232
- # preselected input type pairs and schedules.
233
- # Generate sources:
234
- execute_process (
235
- COMMAND ${CMAKE_COMMAND} -E env
236
- PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR} /csrc/cutlass_extensions/:${CUTLASS_DIR} /python/:${VLLM_PYTHON_PATH} :$PYTHONPATH
237
- ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR} /csrc/quantization/machete/generate.py
238
- RESULT_VARIABLE machete_generation_result
239
- OUTPUT_VARIABLE machete_generation_output
240
- OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log
241
- ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log
242
- )
234
+ # Machete kernels
243
235
244
- if (NOT machete_generation_result EQUAL 0)
245
- message (FATAL_ERROR "Machete generation failed."
246
- " Result: \" ${machete_generation_result} \" "
247
- "\n Check the log for details: "
248
- "${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log" )
249
- else ()
250
- message (STATUS "Machete generation completed successfully." )
251
- endif ()
236
+ # The machete kernels only work on hopper and require CUDA 12.0 or later.
237
+ if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
238
+ #
239
+ # For the Machete kernels we automatically generate sources for various
240
+ # preselected input type pairs and schedules.
241
+ # Generate sources:
242
+ execute_process (
243
+ COMMAND ${CMAKE_COMMAND} -E env
244
+ PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR} /csrc/cutlass_extensions/:${CUTLASS_DIR} /python/:${VLLM_PYTHON_PATH} :$PYTHONPATH
245
+ ${Python_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR} /csrc/quantization/machete/generate.py
246
+ RESULT_VARIABLE machete_generation_result
247
+ OUTPUT_VARIABLE machete_generation_output
248
+ OUTPUT_FILE ${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log
249
+ ERROR_FILE ${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log
250
+ )
251
+
252
+ if (NOT machete_generation_result EQUAL 0)
253
+ message (FATAL_ERROR "Machete generation failed."
254
+ " Result: \" ${machete_generation_result} \" "
255
+ "\n Check the log for details: "
256
+ "${CMAKE_CURRENT_BINARY_DIR} /machete_generation.log" )
257
+ else ()
258
+ message (STATUS "Machete generation completed successfully." )
259
+ endif ()
252
260
253
- # Add machete generated sources
254
- file (GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu" )
255
- list (APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES} )
256
- message (STATUS "Machete generated sources: ${MACHETE_GEN_SOURCES} " )
261
+ # Add machete generated sources
262
+ file (GLOB MACHETE_GEN_SOURCES "csrc/quantization/machete/generated/*.cu" )
263
+ list (APPEND VLLM_EXT_SRC ${MACHETE_GEN_SOURCES} )
264
+ message (STATUS "Machete generated sources: ${MACHETE_GEN_SOURCES} " )
257
265
258
- # See comment above for scaled_mm_c3x (same if condition)
259
- if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0)
260
266
set_source_files_properties (
261
267
${MACHETE_GEN_SOURCES}
262
268
PROPERTIES
263
269
COMPILE_FLAGS
264
270
"-gencode arch=compute_90a,code=sm_90a" )
265
271
endif ()
266
272
267
- # Add pytorch binding
273
+ # Add pytorch binding for machete (add on even CUDA < 12.0 so that we can
274
+ # raise an error if the user that this was built with an incompatible
275
+ # CUDA version)
268
276
list (APPEND VLLM_EXT_SRC
269
277
csrc/quantization/machete/machete_pytorch.cu)
270
278
endif ()
0 commit comments