Skip to content

Commit 3edd833

Browse files
authored
Merge pull request #5573 from qingqing01/cmake_speed
[Speed Compiling]: Reduce NVCC compiling files.
2 parents d7bf372 + c33922c commit 3edd833

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+414
-258
lines changed

paddle/operators/CMakeLists.txt

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ function(op_library TARGET)
99
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE)
1010
set(cc_srcs)
1111
set(cu_srcs)
12+
set(cu_cc_srcs)
1213
set(op_common_deps operator op_registry math_function)
1314
set(options "")
1415
set(oneValueArgs "")
@@ -22,13 +23,18 @@ function(op_library TARGET)
2223
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
2324
list(APPEND cc_srcs ${TARGET}.cc)
2425
endif()
26+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
27+
list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
28+
endif()
2529
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
2630
list(APPEND cu_srcs ${TARGET}.cu)
2731
endif()
2832
else()
2933
foreach(src ${op_library_SRCS})
3034
if (${src} MATCHES ".*\\.cu$")
3135
list(APPEND cu_srcs ${src})
36+
elseif(${src} MATCHES ".*\\.cu.cc$")
37+
list(APPEND cu_cc_srcs ${src})
3238
elseif(${src} MATCHES ".*\\.cc$")
3339
list(APPEND cc_srcs ${src})
3440
else()
@@ -43,7 +49,7 @@ function(op_library TARGET)
4349
endif()
4450

4551
if (WITH_GPU)
46-
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
52+
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
4753
${op_common_deps})
4854
else()
4955
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS}
@@ -140,7 +146,9 @@ function(op_library TARGET)
140146

141147
# pybind USE_CPU_ONLY_OP
142148
list(LENGTH cu_srcs cu_srcs_len)
143-
if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0)
149+
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
150+
151+
if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0)
144152
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
145153
set(pybind_flag 1)
146154
endif()
@@ -160,11 +168,12 @@ set(DEPS_OPS
160168
recurrent_op
161169
dynamic_recurrent_op
162170
softmax_with_cross_entropy_op
171+
softmax_op
172+
sequence_softmax_op
163173
sum_op
164174
pool_op
165175
pool_with_index_op
166176
conv_op
167-
lstm_op
168177
conv_transpose_op
169178
nccl_op
170179
sequence_conv_op
@@ -182,6 +191,8 @@ set(DEPS_OPS
182191
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
183192
op_library(cross_entropy_op DEPS cross_entropy)
184193
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
194+
op_library(softmax_op DEPS softmax)
195+
op_library(sequence_softmax_op DEPS softmax)
185196
op_library(sum_op DEPS selected_rows_functor)
186197
op_library(sgd_op DEPS selected_rows_functor)
187198
op_library(adagrad_op DEPS selected_rows_functor)
@@ -225,6 +236,6 @@ cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
225236
rnn/recurrent_op_utils.cc
226237
DEPS dynamic_recurrent_op)
227238
if(WITH_GPU)
228-
nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context)
239+
cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
229240
endif()
230241
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
File renamed without changes.

paddle/operators/conv2d_transpose_cudnn_op.cu renamed to paddle/operators/conv2d_transpose_cudnn_op.cu.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
200200
T alpha = 1.0f, beta = 0.0f;
201201
if (input_grad) {
202202
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
203-
auto t = framework::EigenVector<T>::Flatten(*input_grad);
204-
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
205-
t.constant(static_cast<T>(0));
203+
math::set_constant(ctx.device_context(), input_grad, 0);
206204

207205
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
208206
handle, &alpha, cudnn_output_desc, output_grad_data,
@@ -214,9 +212,8 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
214212
// ------------------- cudnn conv backward filter ---------------------
215213
if (filter_grad) {
216214
T* filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
217-
auto t = framework::EigenVector<T>::Flatten(*filter_grad);
218-
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
219-
t.constant(static_cast<T>(0));
215+
math::set_constant(ctx.device_context(), filter_grad, 0);
216+
220217
// Gradient with respect to the filter
221218
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
222219
handle, &alpha, cudnn_output_desc, output_grad_data, cudnn_input_desc,
File renamed without changes.

paddle/operators/cross_entropy_op.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ template <typename T>
2323
__global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X,
2424
const int64_t* label, const int N,
2525
const int D) {
26-
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
27-
// CUDA_1D_KERNEL_LOOP(i, N) {
2826
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
2927
i += blockDim.x * gridDim.x) {
3028
int idx = i * D + label[i];

paddle/operators/fill_constant_batch_size_like_op.cu renamed to paddle/operators/fill_constant_batch_size_like_op.cu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/framework/op_registry.h"
1615
#include "paddle/operators/fill_constant_batch_size_like_op.h"
16+
#include "paddle/framework/op_registry.h"
1717

1818
namespace ops = paddle::operators;
1919
REGISTER_OP_GPU_KERNEL(

paddle/operators/fill_zeros_like_op.cu renamed to paddle/operators/fill_zeros_like_op.cu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/framework/op_registry.h"
1615
#include "paddle/operators/fill_zeros_like_op.h"
16+
#include "paddle/framework/op_registry.h"
1717

1818
namespace ops = paddle::operators;
1919
REGISTER_OP_GPU_KERNEL(

0 commit comments

Comments
 (0)