Skip to content

Commit 8f9bfad

Browse files
authored
perf(compile): speed up reduce_op compile by splitting files (#14294)
test=develop
1 parent 792bf0b commit 8f9bfad

12 files changed

+148
-48
lines changed

cmake/external/mkldnn.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
3737
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
3838

3939
INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) # For MKLDNN code to include internal headers.
40-
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include mkldnn.h
4140

4241
IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
4342
SET(MKLDNN_DEPENDS ${MKLML_PROJECT})

paddle/fluid/operators/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ list(REMOVE_DUPLICATES GENERAL_OPS)
55
set(DEPS_OPS "")
66
set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h)
77
file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT!\n\n")
8+
9+
set(PART_CUDA_KERNEL_FILES)
810
function(op_library TARGET)
911
# op_library is a function to create op library. The interface is same as
1012
# cc_library. But it handle split GPU/CPU code and link some common library
@@ -37,6 +39,12 @@ function(op_library TARGET)
3739
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
3840
list(APPEND cu_srcs ${TARGET}.cu)
3941
endif()
42+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
43+
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
44+
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
45+
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
46+
endif()
47+
4048
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
4149
list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
4250
endif()
@@ -327,6 +335,8 @@ foreach(src ${GENERAL_OPS})
327335
endforeach()
328336

329337
file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
338+
339+
330340
if (NOT WIN32)
331341
add_subdirectory(reader)
332342
endif(NOT WIN32)
@@ -353,3 +363,14 @@ if(NOT WIN32)
353363
nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
354364
endif()
355365
nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor)
366+
367+
if(WITH_GPU)
368+
foreach(CUDA_KERNEL_FILE ${PART_CUDA_KERNEL_FILES})
369+
file(READ ${CUDA_KERNEL_FILE} TARGET_CONTENT)
370+
string(REGEX MATCH "REGISTER_OP_CUDA_KERNEL\\(\\n?([^,]+),.*" MATCHED ${TARGET_CONTENT})
371+
if (MATCHED)
372+
string(STRIP ${CMAKE_MATCH_1} MATCHED)
373+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${MATCHED}, CUDA);\n")
374+
endif()
375+
endforeach()
376+
endif()

paddle/fluid/operators/reduce_max_op.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_max,
2323
int, ops::MaxFunctor>,
2424
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
2525
int64_t, ops::MaxFunctor>);
26-
REGISTER_OP_CUDA_KERNEL(
27-
reduce_max_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
28-
float, ops::MaxOrMinGradFunctor>,
29-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
30-
ops::MaxOrMinGradFunctor>,
31-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
32-
ops::MaxOrMinGradFunctor>,
33-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
34-
ops::MaxOrMinGradFunctor>);
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/operators/reduce_min_max_op.h"
16+
17+
REGISTER_OP_CUDA_KERNEL(
18+
reduce_max_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
19+
float, ops::MaxOrMinGradFunctor>,
20+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
21+
ops::MaxOrMinGradFunctor>,
22+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
23+
ops::MaxOrMinGradFunctor>,
24+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
25+
ops::MaxOrMinGradFunctor>);

paddle/fluid/operators/reduce_mean_op.cu

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel<float>,
6969
ops::ReduceMeanKernel<double>,
7070
ops::ReduceMeanKernel<int>,
7171
ops::ReduceMeanKernel<int64_t>);
72-
73-
REGISTER_OP_CUDA_KERNEL(
74-
reduce_mean_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
75-
float, ops::MeanGradFunctor>,
76-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
77-
ops::MeanGradFunctor>,
78-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
79-
ops::MeanGradFunctor>,
80-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
81-
ops::MeanGradFunctor>);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// .part used to speed up nvcc compile
16+
#include "paddle/fluid/operators/reduce_mean_op.h"
17+
18+
REGISTER_OP_CUDA_KERNEL(
19+
reduce_mean_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
20+
float, ops::MeanGradFunctor>,
21+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
22+
ops::MeanGradFunctor>,
23+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
24+
ops::MeanGradFunctor>,
25+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
26+
ops::MeanGradFunctor>);

paddle/fluid/operators/reduce_min_op.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_min,
2323
int, ops::MinFunctor>,
2424
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
2525
int64_t, ops::MinFunctor>);
26-
REGISTER_OP_CUDA_KERNEL(
27-
reduce_min_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
28-
float, ops::MaxOrMinGradFunctor>,
29-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
30-
ops::MaxOrMinGradFunctor>,
31-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
32-
ops::MaxOrMinGradFunctor>,
33-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
34-
ops::MaxOrMinGradFunctor>);
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/operators/reduce_min_max_op.h"
16+
17+
REGISTER_OP_CUDA_KERNEL(
18+
reduce_min_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
19+
float, ops::MaxOrMinGradFunctor>,
20+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
21+
ops::MaxOrMinGradFunctor>,
22+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
23+
ops::MaxOrMinGradFunctor>,
24+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
25+
ops::MaxOrMinGradFunctor>);

paddle/fluid/operators/reduce_prod_op.cu

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_prod,
2323
int, ops::ProdFunctor>,
2424
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
2525
int64_t, ops::ProdFunctor>);
26-
REGISTER_OP_CUDA_KERNEL(
27-
reduce_prod_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
28-
float, ops::ProdGradFunctor>,
29-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
30-
ops::ProdGradFunctor>,
31-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
32-
ops::ProdGradFunctor>,
33-
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
34-
ops::ProdGradFunctor>);
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/operators/reduce_prod_op.h"
16+
17+
REGISTER_OP_CUDA_KERNEL(
18+
reduce_prod_grad, ops::ReduceGradKernel<paddle::platform::CUDADeviceContext,
19+
float, ops::ProdGradFunctor>,
20+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, double,
21+
ops::ProdGradFunctor>,
22+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int,
23+
ops::ProdGradFunctor>,
24+
ops::ReduceGradKernel<paddle::platform::CUDADeviceContext, int64_t,
25+
ops::ProdGradFunctor>);

0 commit comments

Comments
 (0)