Skip to content

Commit a2d9b34

Browse files
authored
Refine operator cmake (#14413)
* wip simplify operator framework * wip * wip * done test=develop * clean test=develop * fix test=develop * fix deps test=develop * fix cpu build test=develop * fix tensorrt build test=develop * fix tests test=develop * fix test=develop * fix cpu build test=develop
1 parent 7f17e56 commit a2d9b34

File tree

213 files changed

+531
-520
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+531
-520
lines changed

cmake/operators.cmake

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
set(PART_CUDA_KERNEL_FILES)
2+
function(op_library TARGET)
3+
# op_library is a function to create op library. The interface is same as
4+
# cc_library. But it handle split GPU/CPU code and link some common library
5+
# for ops.
6+
set(cc_srcs)
7+
set(cu_srcs)
8+
set(hip_cu_srcs)
9+
set(miopen_hip_cc_srcs)
10+
set(cu_cc_srcs)
11+
set(cudnn_cu_cc_srcs)
12+
set(CUDNN_FILE)
13+
set(mkldnn_cc_srcs)
14+
set(MKLDNN_FILE)
15+
set(op_common_deps operator op_registry math_function)
16+
set(options "")
17+
set(oneValueArgs "")
18+
set(multiValueArgs SRCS DEPS)
19+
set(pybind_flag 0)
20+
cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
21+
"${multiValueArgs}" ${ARGN})
22+
23+
list(LENGTH op_library_SRCS op_library_SRCS_len)
24+
if (${op_library_SRCS_len} EQUAL 0)
25+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
26+
list(APPEND cc_srcs ${TARGET}.cc)
27+
endif()
28+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
29+
list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
30+
endif()
31+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
32+
list(APPEND cu_srcs ${TARGET}.cu)
33+
endif()
34+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
35+
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
36+
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
37+
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
38+
endif()
39+
40+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
41+
list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
42+
endif()
43+
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
44+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
45+
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
46+
endif()
47+
if(WITH_AMD_GPU)
48+
string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}")
49+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cc)
50+
list(APPEND miopen_hip_cc_srcs ${MIOPEN_FILE}.hip.cc)
51+
endif()
52+
endif()
53+
if(WITH_MKLDNN)
54+
string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
55+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_FILE}.cc)
56+
list(APPEND mkldnn_cc_srcs ${MKLDNN_FILE}.cc)
57+
endif()
58+
endif()
59+
else()
60+
foreach(src ${op_library_SRCS})
61+
if (${src} MATCHES ".*\\.hip.cu$")
62+
list(APPEND hip_cu_srcs ${src})
63+
elseif (${src} MATCHES ".*\\.cu$")
64+
list(APPEND cu_srcs ${src})
65+
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
66+
list(APPEND cudnn_cu_cc_srcs ${src})
67+
elseif(WITH_AMD_GPU AND ${src} MATCHES ".*_miopen_op.hip.cc$")
68+
list(APPEND miopen_hip_cc_srcs ${src})
69+
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
70+
list(APPEND mkldnn_cc_srcs ${src})
71+
elseif(${src} MATCHES ".*\\.cu.cc$")
72+
list(APPEND cu_cc_srcs ${src})
73+
elseif(${src} MATCHES ".*\\.cc$")
74+
list(APPEND cc_srcs ${src})
75+
else()
76+
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
77+
endif()
78+
endforeach()
79+
endif()
80+
81+
list(LENGTH cc_srcs cc_srcs_len)
82+
if (${cc_srcs_len} EQUAL 0)
83+
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
84+
endif()
85+
if (WIN32)
86+
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
87+
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op" "hierarchical_sigmoid_op"
88+
"crf_decoding_op" "select_op" "lstmp_op" "gru_op" "fusion_gru_op" "lstm_op" "fusion_lstm_op" "cumsum_op"
89+
"fusion_seqconv_eltadd_relu_op" "channel_send_op" "channel_create_op" "channel_close_op" "channel_recv_op")
90+
if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
91+
return()
92+
endif()
93+
endforeach()
94+
endif(WIN32)
95+
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
96+
97+
list(LENGTH op_library_DEPS op_library_DEPS_len)
98+
if (${op_library_DEPS_len} GREATER 0)
99+
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
100+
endif()
101+
if (WITH_GPU)
102+
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
103+
${op_common_deps})
104+
elseif (WITH_AMD_GPU)
105+
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
106+
${op_common_deps})
107+
else()
108+
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
109+
${op_common_deps})
110+
endif()
111+
112+
# Define operators that don't need pybind here.
113+
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
114+
"tensor_array_read_write_op" "tensorrt_engine_op")
115+
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
116+
set(pybind_flag 1)
117+
endif()
118+
endforeach()
119+
120+
# The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
121+
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
122+
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
123+
file(READ ${TARGET}.cc TARGET_CONTENT)
124+
string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
125+
string(REGEX MATCH "REGISTER_OPERATOR\\([a-z0-9_]*," one_register "${multi_register}")
126+
if (one_register STREQUAL "")
127+
string(REPLACE "_op" "" TARGET "${TARGET}")
128+
else ()
129+
string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
130+
string(REPLACE "," "" TARGET "${TARGET}")
131+
endif()
132+
133+
# pybind USE_NO_KERNEL_OP
134+
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
135+
string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
136+
string(REPLACE "_op" "" TARGET "${TARGET}")
137+
if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
138+
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
139+
set(pybind_flag 1)
140+
endif()
141+
142+
# pybind USE_CPU_ONLY_OP
143+
list(LENGTH cu_srcs cu_srcs_len)
144+
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
145+
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
146+
list(LENGTH hip_cu_srcs hip_cu_srcs_len)
147+
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
148+
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
149+
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0)
150+
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
151+
set(pybind_flag 1)
152+
endif()
153+
154+
# pybind USE_OP_DEVICE_KERNEL for CUDNN
155+
list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
156+
if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
157+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
158+
endif()
159+
160+
# pybind USE_OP_DEVICE_KERNEL for MIOPEN
161+
if (WITH_AMD_GPU AND ${miopen_hip_cc_srcs_len} GREATER 0)
162+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n")
163+
endif()
164+
165+
# pybind USE_OP_DEVICE_KERNEL for MKLDNN
166+
if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
167+
# Append first implemented MKLDNN activation operator
168+
if (${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
169+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
170+
else()
171+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
172+
endif()
173+
endif()
174+
175+
# pybind USE_OP
176+
if (${pybind_flag} EQUAL 0)
177+
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
178+
if(${TARGET} STREQUAL "activation")
179+
file(APPEND ${pybind_file} "USE_OP(relu);\n")
180+
elseif(${TARGET} STREQUAL "fake_dequantize")
181+
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
182+
elseif(${TARGET} STREQUAL "fake_quantize")
183+
file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n")
184+
elseif(${TARGET} STREQUAL "tensorrt_engine_op")
185+
message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference")
186+
elseif(${TARGET} STREQUAL "fc")
187+
# HACK: fc only have mkldnn and cpu, which would mismatch the cpu only condition
188+
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
189+
else()
190+
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
191+
endif()
192+
endif()
193+
endfunction()
194+
195+
196+
function(register_operators)
197+
set(options "")
198+
set(oneValueArgs "")
199+
set(multiValueArgs EXCLUDES)
200+
cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}"
201+
"${multiValueArgs}" ${ARGN})
202+
203+
file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
204+
string(REPLACE "_mkldnn" "" OPS "${OPS}")
205+
string(REPLACE ".cc" "" OPS "${OPS}")
206+
list(REMOVE_DUPLICATES OPS)
207+
208+
foreach(src ${OPS})
209+
list(FIND register_operators_EXCLUDES ${src} _index)
210+
if (${_index} EQUAL -1)
211+
op_library(${src})
212+
endif()
213+
endforeach()
214+
endfunction()

paddle/fluid/framework/data_device_transform_test.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License. */
1717
#include "paddle/fluid/framework/lod_tensor.h"
1818
#include "paddle/fluid/framework/op_info.h"
1919
#include "paddle/fluid/framework/op_registry.h"
20-
#include "paddle/fluid/operators/elementwise_op_function.h"
20+
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
2121
#include "paddle/fluid/operators/math/math_function.h"
2222
#include "paddle/fluid/platform/device_context.h"
2323
#include "paddle/fluid/platform/init.h"

paddle/fluid/inference/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor)
1313
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
1414
cc_library(paddle_fluid_api
1515
SRCS io.cc
16-
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
16+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
1717

1818
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
1919
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)

paddle/fluid/inference/tensorrt/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto device_context)
1+
nv_library(tensorrt_engine SRCS engine.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context)
22
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
33
nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
44
add_subdirectory(plugin)

paddle/fluid/inference/tensorrt/convert/CMakeLists.txt

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,34 @@ pad_op.cc split_op.cc prelu_op.cc
66
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
77

88
nv_test(test_op_converter SRCS test_op_converter.cc DEPS
9-
${FLUID_CORE_MODULES} tensorrt_engine tensorrt_converter)
9+
${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter)
1010

1111
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
1212
nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
13-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
13+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op SERIAL)
1414
nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
15-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
15+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op SERIAL)
1616
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
17-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
17+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op SERIAL)
1818
nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
19-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op conv_transpose_op SERIAL)
19+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine conv_op conv_transpose_op SERIAL)
2020
nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
21-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
21+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine pool_op SERIAL)
2222
nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
23-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine elementwise_add_op SERIAL)
23+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine elementwise_add_op SERIAL)
2424
nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc
25-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine softmax_op SERIAL)
25+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine softmax_op SERIAL)
2626
nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc
27-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine batch_norm_op SERIAL)
27+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine batch_norm_op SERIAL)
2828
nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
29-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine concat_op SERIAL)
29+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine concat_op SERIAL)
3030
nv_test(test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
31-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine dropout_op SERIAL)
31+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine dropout_op SERIAL)
3232
nv_test(test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
33-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine pad_op SERIAL)
33+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine pad_op SERIAL)
3434
nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc
35-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
35+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
3636
split_op concat_op SERIAL)
3737
nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc
38-
DEPS ${FLUID_CORE_MODULES} tensorrt_engine tensorrt_plugin
38+
DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
3939
prelu_op SERIAL)

0 commit comments

Comments
 (0)