@@ -75,6 +75,59 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
75
75
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} " )
76
76
set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall" )
77
77
78
+ ######################################################################################
79
+ # A function for automatic detection of GPUs installed (if autodetection is enabled)
80
+ # Usage:
81
+ # detect_installed_gpus(out_variable)
82
+ function (detect_installed_gpus out_variable)
83
+ if (NOT CUDA_gpu_detect_output)
84
+ set (cufile ${PROJECT_BINARY_DIR} /detect_cuda_archs.cu)
85
+
86
+ file (WRITE ${cufile} ""
87
+ "#include \" stdio.h\"\n "
88
+ "#include \" cuda.h\"\n "
89
+ "#include \" cuda_runtime.h\"\n "
90
+ "int main() {\n "
91
+ " int count = 0;\n "
92
+ " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n "
93
+ " if (count == 0) return -1;\n "
94
+ " for (int device = 0; device < count; ++device) {\n "
95
+ " cudaDeviceProp prop;\n "
96
+ " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n "
97
+ " printf(\" %d.%d \" , prop.major, prop.minor);\n "
98
+ " }\n "
99
+ " return 0;\n "
100
+ "}\n " )
101
+
102
+ execute_process (COMMAND "${CUDA_NVCC_EXECUTABLE} "
103
+ "--run" "${cufile} "
104
+ WORKING_DIRECTORY "${PROJECT_BINARY_DIR} /CMakeFiles/"
105
+ RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
106
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
107
+
108
+ if (nvcc_res EQUAL 0)
109
+ # Only use last item of nvcc_out (the last device's compute capability).
110
+ string (REGEX REPLACE "\\ ." "" nvcc_out "${nvcc_out} " )
111
+ string (REGEX MATCHALL "[0-9()]+" nvcc_out "${nvcc_out} " )
112
+ list (GET nvcc_out -1 nvcc_out)
113
+ set (CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE)
114
+ endif ()
115
+ endif ()
116
+
117
+ if (NOT CUDA_gpu_detect_output)
118
+ message (STATUS "Automatic GPU detection failed. Building for all known architectures." )
119
+ set (${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE)
120
+ else ()
121
+ set (${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
122
+ endif ()
123
+ endfunction ()
124
+
125
+ if (NOT SM)
126
+ # TODO(guosheng): Remove it if `GetCUDAComputeCapability` is exposed by paddle.
127
+ # Currently, if `CUDA_gpu_detect_output` is not defined, use the detected arch.
128
+ detect_installed_gpus(SM)
129
+ endif ()
130
+
78
131
if (SM STREQUAL 80 OR
79
132
SM STREQUAL 86 OR
80
133
SM STREQUAL 70 OR
@@ -217,64 +270,19 @@ set(FT_PATCH_COMMAND
217
270
&& ${MUTE_COMMAND}
218
271
)
219
272
220
- ######################################################################################
221
- # A function for automatic detection of GPUs installed (if autodetection is enabled)
222
- # Usage:
223
- # detect_installed_gpus(out_variable)
224
- function (detect_installed_gpus out_variable)
225
- if (NOT CUDA_gpu_detect_output)
226
- set (cufile ${PROJECT_BINARY_DIR} /detect_cuda_archs.cu)
227
-
228
- file (WRITE ${cufile} ""
229
- "#include \" stdio.h\"\n "
230
- "#include \" cuda.h\"\n "
231
- "#include \" cuda_runtime.h\"\n "
232
- "int main() {\n "
233
- " int count = 0;\n "
234
- " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n "
235
- " if (count == 0) return -1;\n "
236
- " for (int device = 0; device < count; ++device) {\n "
237
- " cudaDeviceProp prop;\n "
238
- " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n "
239
- " printf(\" %d.%d \" , prop.major, prop.minor);\n "
240
- " }\n "
241
- " return 0;\n "
242
- "}\n " )
243
-
244
- execute_process (COMMAND "${CUDA_NVCC_EXECUTABLE} "
245
- "--run" "${cufile} "
246
- WORKING_DIRECTORY "${PROJECT_BINARY_DIR} /CMakeFiles/"
247
- RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out
248
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
249
-
250
- if (nvcc_res EQUAL 0)
251
- # Only use last item of nvcc_out (the last device's compute capability).
252
- string (REGEX REPLACE "\\ ." "" nvcc_out "${nvcc_out} " )
253
- string (REGEX MATCHALL "[0-9()]+" nvcc_out "${nvcc_out} " )
254
- list (GET nvcc_out -1 nvcc_out)
255
- set (CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE)
256
- endif ()
257
- endif ()
258
-
259
- if (NOT CUDA_gpu_detect_output)
260
- message (STATUS "Automatic GPU detection failed. Building for all known architectures." )
261
- set (${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE)
262
- else ()
263
- set (${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
264
- endif ()
265
- endfunction ()
266
-
267
- # TODO(guosheng): Remove it if `GetCUDAComputeCapability` is exposed by paddle.
268
- # Currently, if `CUDA_gpu_detect_output` is not defined, use the detected arch.
269
- detect_installed_gpus(SM)
273
+ # TODO(guosheng): Use UPDATE_COMMAND instead of PATCH_COMMAND to make cmake
274
+ # re-run always use the latest patches when the developer changes FT patch codes,
275
+ # all patches rather than the changes would re-build, any better way to do this.
276
+ # Or maybe hidden this function for simplicity.
277
+ set (FT_UPDATE_COMMAND git checkout v3.1 && git checkout . && ${FT_PATCH_COMMAND} )
270
278
271
279
ExternalProject_Add(
272
280
extern_${THIRD_PARTY_NAME}
273
281
GIT_REPOSITORY https://github.com/NVIDIA/FasterTransformer.git
274
282
GIT_TAG v3.1
275
283
PREFIX ${THIRD_PATH}
276
284
SOURCE_DIR ${THIRD_PATH} /source /${THIRD_PARTY_NAME}
277
- PATCH_COMMAND ${FT_PATCH_COMMAND}
285
+ UPDATE_COMMAND ${FT_UPDATE_COMMAND} # PATCH_COMMAND ${FT_PATCH_COMMAND}
278
286
BINARY_DIR ${THIRD_PATH} /build /${THIRD_PARTY_NAME}
279
287
INSTALL_COMMAND ""
280
288
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release -DSM=${SM} -DBUILD_PD=ON -DPY_CMD=${PY_CMD} -DON_INFER=${ON_INFER} -DPADDLE_LIB=${PADDLE_LIB} -DWITH_MKL=${WITH_MKL} -DWITH_STATIC_LIB=${WITH_STATIC_LIB}
0 commit comments