@@ -29,8 +29,6 @@ file(GLOB kernel_primitive_h "primitive/*.h")
29
29
file (
30
30
GLOB kernel_cu
31
31
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR} "
32
- "gpu/*.cu"
33
- "gpu/*.cu.cc"
34
32
"gpudnn/*.cu"
35
33
"kps/*.cu"
36
34
"legacy/kps/*.cu"
@@ -40,18 +38,24 @@ file(
40
38
"strings/gpu/*.cu"
41
39
"fusion/gpu/*.cu" )
42
40
41
+ file (
42
+ GLOB kernel_gpu
43
+ RELATIVE "${CMAKE_CURRENT_SOURCE_DIR} "
44
+ "gpu/*.cu" "gpu/*.cu.cc" )
45
+
43
46
if (APPLE OR WIN32 )
44
47
list (REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu" )
45
48
list (REMOVE_ITEM kernel_cu "sparse/gpu/conv_kernel_igemm.cu" )
46
49
endif ()
47
50
48
51
if (NOT WITH_DGC)
49
- list (REMOVE_ITEM kernel_cu "gpu/dgc_kernel.cu" )
52
+ list (REMOVE_ITEM kernel_gpu "gpu/dgc_kernel.cu" )
50
53
endif ()
51
54
52
55
if (DEFINED REDUCE_INFERENCE_LIB_SIZE)
53
- list (FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\ .cc$" )
54
56
list (FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\ .cu$" )
57
+ list (FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\ .cc$" )
58
+ list (FILTER kernel_gpu EXCLUDE REGEX ".*_grad_kernel\\ .cu$" )
55
59
endif ()
56
60
57
61
if (WITH_CUTLASS)
@@ -216,6 +220,15 @@ if(WITH_ROCM)
216
220
list (
217
221
REMOVE_ITEM
218
222
kernel_cu
223
+ "gpudnn/mha_cudnn_frontend.cu"
224
+ "fusion/gpu/blha_get_max_len.cu"
225
+ "fusion/gpu/block_multi_head_attention_kernel.cu"
226
+ "fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
227
+ "fusion/gpu/fused_bn_add_activation_kernel.cu"
228
+ "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu" )
229
+ list (
230
+ REMOVE_ITEM
231
+ kernel_gpu
219
232
"gpu/affine_grid_grad_kernel.cu"
220
233
"gpu/apply_per_channel_scale_kernel.cu"
221
234
"gpu/cholesky_solve_kernel.cu"
@@ -228,13 +241,7 @@ if(WITH_ROCM)
228
241
"gpu/put_along_axis_grad_kernel.cu"
229
242
"gpu/put_along_axis_kernel.cu"
230
243
"gpu/qr_kernel.cu"
231
- "gpu/svd_kernel.cu"
232
- "gpudnn/mha_cudnn_frontend.cu"
233
- "fusion/gpu/blha_get_max_len.cu"
234
- "fusion/gpu/block_multi_head_attention_kernel.cu"
235
- "fusion/gpu/fused_bn_add_activation_grad_kernel.cu"
236
- "fusion/gpu/fused_bn_add_activation_kernel.cu"
237
- "fusion/gpu/fusion_transpose_flatten_concat_kernel.cu" )
244
+ "gpu/svd_kernel.cu" )
238
245
endif ()
239
246
240
247
set (cc_search_pattern
@@ -291,6 +298,8 @@ file(
291
298
if (WITH_GPU OR WITH_ROCM)
292
299
collect_srcs(kernels_srcs SRCS ${kernel_cu} )
293
300
kernel_declare("${kernel_cu} " )
301
+ collect_srcs(kernels_gpu_srcs SRCS ${kernel_gpu} )
302
+ kernel_declare("${kernel_gpu} " )
294
303
endif ()
295
304
296
305
if (WITH_XPU)
0 commit comments