Skip to content

Commit c73c5ed

Browse files
committed
use for_range
2 parents b548ecb + e8b4e0d commit c73c5ed

File tree

22 files changed

+1123
-260
lines changed

22 files changed

+1123
-260
lines changed

cmake/external/anakin.cmake

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ if (NOT WITH_ANAKIN)
22
return()
33
endif()
44

5+
option(ANAKIN_ENABLE_OP_TIMER "Get more detailed information with Anakin op time" OFF)
6+
if(ANAKIN_ENABLE_OP_TIMER)
7+
add_definitions(-DPADDLE_ANAKIN_ENABLE_OP_TIMER)
8+
endif()
9+
510
INCLUDE(ExternalProject)
611
set(ANAKIN_SOURCE_DIR ${THIRD_PARTY_PATH}/anakin)
712
# the anakin install dir is only default one now
@@ -11,23 +16,34 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
1116
set(ANAKIN_SHARED_LIB ${ANAKIN_LIBRARY}/libanakin.so)
1217
set(ANAKIN_SABER_LIB ${ANAKIN_LIBRARY}/libanakin_saber_common.so)
1318

14-
# TODO(luotao): ANAKIN_MODLE_URL will move to demo ci later.
15-
set(ANAKIN_MODLE_URL "http://paddle-inference-dist.bj.bcebos.com/mobilenet_v2.anakin.bin")
19+
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
20+
set(INFERENCE_URL "http://paddle-inference-dist.bj.bcebos.com")
21+
set(ANAKIN_MODLE_URL "${INFERENCE_URL}/mobilenet_v2.anakin.bin")
22+
set(ANAKIN_RNN_MODLE_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn.anakin2.model.bin")
23+
set(ANAKIN_RNN_DATA_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn_data.txt")
1624
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_SOURCE_DIR}")
17-
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL}")
25+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL} -N")
26+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_MODLE_URL} -N")
27+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_DATA_URL} -N")
1828

1929
include_directories(${ANAKIN_INCLUDE})
2030
include_directories(${ANAKIN_INCLUDE}/saber/)
31+
include_directories(${ANAKIN_INCLUDE}/saber/core/)
32+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/x86/)
33+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/cuda/base/cuda_c/)
2134

2235
set(ANAKIN_COMPILE_EXTRA_FLAGS
2336
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
2437
-Wno-error=unused-variable -Wno-unused-variable
2538
-Wno-error=format-extra-args -Wno-format-extra-args
26-
-Wno-error=comment -Wno-comment
27-
-Wno-error=format -Wno-format
39+
-Wno-error=comment -Wno-comment
40+
-Wno-error=format -Wno-format
41+
-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized
2842
-Wno-error=switch -Wno-switch
2943
-Wno-error=return-type -Wno-return-type
3044
-Wno-error=non-virtual-dtor -Wno-non-virtual-dtor
45+
-Wno-error=ignored-qualifiers
46+
-Wno-ignored-qualifiers
3147
-Wno-sign-compare
3248
-Wno-reorder
3349
-Wno-error=cpp)
@@ -38,7 +54,7 @@ ExternalProject_Add(
3854
DEPENDS ${MKLML_PROJECT}
3955
# Anakin codes error on Intel(R) Xeon(R) Gold 5117 CPU, temporary do not compile avx512 related code.
4056
GIT_REPOSITORY "https://github.com/luotao1/Anakin"
41-
GIT_TAG "bcf17aabe7921ceb7bce591244b4f9dce7dba5c8"
57+
GIT_TAG "211d1fc5d813d70c0c14072f9083cf25f40940ea"
4258
PREFIX ${ANAKIN_SOURCE_DIR}
4359
UPDATE_COMMAND ""
4460
CMAKE_ARGS -DUSE_GPU_PLACE=YES
@@ -48,6 +64,7 @@ ExternalProject_Add(
4864
-DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml
4965
-DCUDNN_ROOT=${CUDNN_ROOT}
5066
-DCUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}
67+
-DENABLE_OP_TIMER=${ANAKIN_ENABLE_OP_TIMER}
5168
${EXTERNAL_OPTIONAL_ARGS}
5269
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR}
5370
)

doc/fluid/dev/new_op_cn.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,29 @@ $$Out = scale*X$$
119119
120120
这个例子有`AddAttr<AttrType>("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。
121121
122+
### 定义GradProtoMaker类
123+
每个Op的必须有一个对应的GraProtoMaker,若未定制对应前向Op的GradProtoMaker,fluid提供了DefaultGradProtoMaker,默认注册会使用全部输入输出,包括Input, Output, Output@Grad等,使用不需要的变量的会造成显存浪费。
124+
下面示例定义了ScaleOp的GradProtoMaker。
125+
126+
```cpp
127+
class ScaleGradMaker : public framework::SingleGradOpDescMaker {
128+
public:
129+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
130+
131+
std::unique_ptr<framework::OpDesc> Apply() const override {
132+
auto *grad_op = new framework::OpDesc();
133+
grad_op->SetType("scale");
134+
grad_op->SetInput("X", OutputGrad("Out"));
135+
grad_op->SetOutput("Out", InputGrad("X"));
136+
grad_op->SetAttr("scale", GetAttr("scale"));
137+
return std::unique_ptr<framework::OpDesc>(grad_op);
138+
}
139+
};
140+
```
122141
123142
### 定义Operator类
124143
125-
下面的点实现了MulOp的定义
144+
下面实现了MulOp的定义
126145
127146
```cpp
128147
class MulOp : public framework::OperatorWithKernel {
@@ -383,6 +402,19 @@ PADDLE_ENFORCE(forward_pd != nullptr,
383402
"Fail to find eltwise_fwd_pd in device context"); //eltwise_fwd_pd用户可能看不懂
384403
```
385404

405+
3. OP内部调用非法接口:Op内部如果出现Output = ShareDataWith(Input)
406+
问题示例:
407+
```cpp
408+
auto *out = ctx.Output<framework::LoDTensor>("Out");
409+
auto *in = ctx.Input<framework::LoDTensor>("X");
410+
out->ShareDataWith(*in);
411+
```
412+
Op内部如果出现Output = ShareDataWith(Input),相当于operator图的中有一条隐藏边,连接了Input和Output,这条边无法在图分析中表达,引发基于图优化的错误。
413+
414+
4. OP实现的性能实践
415+
调用了eigen的broadcast, chop等操作,性能会比手写cuda kernel差几倍以上。此时cpu的实现可以复用eigen,gpu实现可以实现cuda kernel.
416+
417+
386418
#### OP InferShape检查提示信息特别说明
387419
388420
- 检查输入输出变量,请统一遵循以下格式

paddle/fluid/framework/ir/graph_helper.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
104104
for (auto &adj_n : var->inputs) {
105105
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
106106
adj_list[n].insert(adj_n);
107-
VLOG(3) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
107+
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
108108
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
109109
<< " via " << var->Name() << reinterpret_cast<void *>(var);
110110
}

paddle/fluid/inference/analysis/CMakeLists.txt

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ function (inference_analysis_test TARGET)
2222
if(WITH_TESTING)
2323
set(options "")
2424
set(oneValueArgs "")
25-
set(multiValueArgs SRCS)
25+
set(multiValueArgs SRCS EXTRA_DEPS)
2626
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
2727

2828
set(mem_opt "")
@@ -31,22 +31,43 @@ function (inference_analysis_test TARGET)
3131
endif()
3232
cc_test(${TARGET}
3333
SRCS "${analysis_test_SRCS}"
34-
DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detecter pass
34+
DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detecter pass ${analysis_test_EXTRA_DEPS}
3535
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model ${mem_opt})
3636
set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
3737
endif(WITH_TESTING)
3838
endfunction(inference_analysis_test)
3939

40-
cc_test(test_analyzer SRCS analyzer_tester.cc DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
41-
# ir
42-
fc_fuse_pass
43-
graph_viz_pass
44-
infer_clean_graph_pass
45-
graph_pattern_detecter
46-
pass
47-
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
48-
#set_tests_properties(test_analyzer PROPERTIES DEPENDS test_word2vec)
49-
#inference_api_test(test_analyzer SRC analyzer_tester.cc ARGS test_word2vec)
40+
set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz")
41+
set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz")
42+
set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE)
43+
set(DITU_RNN_MODEL ${DITU_INSTALL_DIR}/model)
44+
set(DITU_RNN_DATA ${DITU_INSTALL_DIR}/data.txt)
45+
46+
function (inference_download_and_uncompress target url gz_filename)
47+
message(STATUS "Download inference test stuff ${gz_filename} from ${url}")
48+
execute_process(COMMAND bash -c "mkdir -p ${DITU_INSTALL_DIR}")
49+
execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && wget -q ${url}")
50+
execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && tar xzf ${gz_filename}")
51+
message(STATUS "finish downloading ${gz_filename}")
52+
endfunction(inference_download_and_uncompress)
53+
54+
if (NOT EXISTS ${DITU_INSTALL_DIR})
55+
inference_download_and_uncompress(ditu_rnn_model ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz")
56+
inference_download_and_uncompress(ditu_rnn_data ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz")
57+
endif()
58+
59+
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
60+
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
61+
# ir
62+
fc_fuse_pass
63+
graph_viz_pass
64+
infer_clean_graph_pass
65+
graph_pattern_detecter
66+
infer_clean_graph_pass
67+
pass
68+
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model
69+
--infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
70+
--infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
5071

5172
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
5273
inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
2424
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
2525

26-
namespace paddle {
27-
2826
DEFINE_bool(IA_enable_tensorrt_subgraph_engine, false,
2927
"Enable subgraph to TensorRT engine for acceleration");
3028

@@ -35,6 +33,7 @@ DEFINE_string(IA_graphviz_log_root, "./",
3533

3634
DEFINE_string(IA_output_storage_path, "", "optimized model output path");
3735

36+
namespace paddle {
3837
namespace inference {
3938
namespace analysis {
4039

paddle/fluid/inference/analysis/analyzer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,14 @@ limitations under the License. */
3939
#include "paddle/fluid/inference/analysis/pass.h"
4040
#include "paddle/fluid/inference/analysis/pass_manager.h"
4141

42-
namespace paddle {
43-
4442
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
4543
// flag if not available.
4644
DECLARE_bool(IA_enable_tensorrt_subgraph_engine);
4745
DECLARE_string(IA_graphviz_log_root);
4846
DECLARE_string(IA_output_storage_path);
4947
DECLARE_bool(IA_enable_ir);
5048

49+
namespace paddle {
5150
namespace inference {
5251
namespace analysis {
5352

0 commit comments

Comments
 (0)