Skip to content

Commit ffc3f9a

Browse files
authored
feat: use aggressive compress-mode for fatbin (#484)
1 parent 4260eac commit ffc3f9a

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

CMakeLists.txt

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ else()
201201
FetchContent_MakeAvailable(libtorch)
202202

203203
find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
204-
message(STATUS "Downloading and using libtorch 2.7.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
204+
message(STATUS "Downloading and using libtorch 2.7.1 for cuda ${CUDAToolkit_VERSION} at ${libtorch_SOURCE_DIR}")
205205
endif()
206206

207207
# carry over torch flags to the rest of the project
@@ -214,14 +214,21 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DC10_USE_GLOG")
214214
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
215215
message(STATUS "CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}")
216216

217-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -O3)
218217
# The following definitions must be undefined since half-precision operation is required.
219-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
218+
list(APPEND CUDA_NVCC_FLAGS
220219
-U__CUDA_NO_HALF_OPERATORS__
221220
-U__CUDA_NO_HALF_CONVERSIONS__
222221
-U__CUDA_NO_HALF2_OPERATORS__
223-
-U__CUDA_NO_BFLOAT16_CONVERSIONS__)
224-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math -Xfatbin -compress-all)
222+
-U__CUDA_NO_BFLOAT16_CONVERSIONS__
223+
-O3
224+
--use_fast_math
225+
-Xfatbin
226+
-compress-all)
227+
228+
# Enable aggresive fatbin compress for CUDA 12.8 or later.
229+
if(${CUDAToolkit_VERSION} VERSION_GREATER_EQUAL 12.8)
230+
list(APPEND CUDA_NVCC_FLAGS -compress-mode=size)
231+
endif()
225232
message(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
226233

227234
# enable testing in this directory so we can do a top-level `make test`.

0 commit comments

Comments
 (0)