Skip to content

Commit d9bf73f

Browse files
committed
Merge remote-tracking branch 'ups/develop' into feature/op/fusion_gru
2 parents 6fad27f + f5d5d7b commit d9bf73f

25 files changed

+1008
-88
lines changed

cmake/external/anakin.cmake

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ if (NOT WITH_ANAKIN)
22
return()
33
endif()
44

5+
option(ANAKIN_ENABLE_OP_TIMER "Get more detailed information with Anakin op time" OFF)
6+
if(ANAKIN_ENABLE_OP_TIMER)
7+
add_definitions(-DPADDLE_ANAKIN_ENABLE_OP_TIMER)
8+
endif()
9+
510
INCLUDE(ExternalProject)
611
set(ANAKIN_SOURCE_DIR ${THIRD_PARTY_PATH}/anakin)
712
# the anakin install dir is only default one now
@@ -11,23 +16,34 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
1116
set(ANAKIN_SHARED_LIB ${ANAKIN_LIBRARY}/libanakin.so)
1217
set(ANAKIN_SABER_LIB ${ANAKIN_LIBRARY}/libanakin_saber_common.so)
1318

14-
# TODO(luotao): ANAKIN_MODLE_URL will move to demo ci later.
15-
set(ANAKIN_MODLE_URL "http://paddle-inference-dist.bj.bcebos.com/mobilenet_v2.anakin.bin")
19+
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
20+
set(INFERENCE_URL "http://paddle-inference-dist.bj.bcebos.com")
21+
set(ANAKIN_MODLE_URL "${INFERENCE_URL}/mobilenet_v2.anakin.bin")
22+
set(ANAKIN_RNN_MODLE_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn.anakin2.model.bin")
23+
set(ANAKIN_RNN_DATA_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn_data.txt")
1624
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_SOURCE_DIR}")
17-
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL}")
25+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL} -N")
26+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_MODLE_URL} -N")
27+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_DATA_URL} -N")
1828

1929
include_directories(${ANAKIN_INCLUDE})
2030
include_directories(${ANAKIN_INCLUDE}/saber/)
31+
include_directories(${ANAKIN_INCLUDE}/saber/core/)
32+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/x86/)
33+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/cuda/base/cuda_c/)
2134

2235
set(ANAKIN_COMPILE_EXTRA_FLAGS
2336
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
2437
-Wno-error=unused-variable -Wno-unused-variable
2538
-Wno-error=format-extra-args -Wno-format-extra-args
26-
-Wno-error=comment -Wno-comment
27-
-Wno-error=format -Wno-format
39+
-Wno-error=comment -Wno-comment
40+
-Wno-error=format -Wno-format
41+
-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized
2842
-Wno-error=switch -Wno-switch
2943
-Wno-error=return-type -Wno-return-type
3044
-Wno-error=non-virtual-dtor -Wno-non-virtual-dtor
45+
-Wno-error=ignored-qualifiers
46+
-Wno-ignored-qualifiers
3147
-Wno-sign-compare
3248
-Wno-reorder
3349
-Wno-error=cpp)
@@ -38,7 +54,7 @@ ExternalProject_Add(
3854
DEPENDS ${MKLML_PROJECT}
3955
# Anakin codes error on Intel(R) Xeon(R) Gold 5117 CPU, temporary do not compile avx512 related code.
4056
GIT_REPOSITORY "https://github.com/luotao1/Anakin"
41-
GIT_TAG "bcf17aabe7921ceb7bce591244b4f9dce7dba5c8"
57+
GIT_TAG "211d1fc5d813d70c0c14072f9083cf25f40940ea"
4258
PREFIX ${ANAKIN_SOURCE_DIR}
4359
UPDATE_COMMAND ""
4460
CMAKE_ARGS -DUSE_GPU_PLACE=YES
@@ -48,6 +64,7 @@ ExternalProject_Add(
4864
-DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml
4965
-DCUDNN_ROOT=${CUDNN_ROOT}
5066
-DCUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}
67+
-DENABLE_OP_TIMER=${ANAKIN_ENABLE_OP_TIMER}
5168
${EXTERNAL_OPTIONAL_ARGS}
5269
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR}
5370
)

doc/fluid/dev/new_op_cn.md

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,29 @@ $$Out = scale*X$$
119119
120120
这个例子有`AddAttr<AttrType>("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。
121121
122+
### 定义GradProtoMaker类
123+
每个Op的必须有一个对应的GraProtoMaker,若未定制对应前向Op的GradProtoMaker,fluid提供了DefaultGradProtoMaker,默认注册会使用全部输入输出,包括Input, Output, Output@Grad等,使用不需要的变量的会造成显存浪费。
124+
下面示例定义了ScaleOp的GradProtoMaker。
125+
126+
```cpp
127+
class ScaleGradMaker : public framework::SingleGradOpDescMaker {
128+
public:
129+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
130+
131+
std::unique_ptr<framework::OpDesc> Apply() const override {
132+
auto *grad_op = new framework::OpDesc();
133+
grad_op->SetType("scale");
134+
grad_op->SetInput("X", OutputGrad("Out"));
135+
grad_op->SetOutput("Out", InputGrad("X"));
136+
grad_op->SetAttr("scale", GetAttr("scale"));
137+
return std::unique_ptr<framework::OpDesc>(grad_op);
138+
}
139+
};
140+
```
122141
123142
### 定义Operator类
124143
125-
下面的点实现了MulOp的定义
144+
下面实现了MulOp的定义
126145
127146
```cpp
128147
class MulOp : public framework::OperatorWithKernel {
@@ -334,3 +353,83 @@ ctest -R test_mul_op
334353
- 注册Op时的类型名,需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OPERATOR(B, ...)`等,这将会导致单元测试出错。
335354
- 如果Op没有实现CUDA Kernel,请不要创建空的`*_op.cu`,这将会导致单元测试出错。
336355
- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。
356+
357+
### PADDLE_ENFORCE使用注意
358+
359+
实现Op时检查数据的合法性需要使用PADDLE_ENFORCE以及PADDLE_ENFORCE_EQ等宏定义,基本格式如下:
360+
361+
```
362+
PADDLE_ENFORCE(表达式, 错误提示信息)
363+
PADDLE_ENFORCE_EQ(比较对象A, 比较对象B, 错误提示信息)
364+
```
365+
366+
如果表达式为真,或者比较对象A=B,则检查通过,否则会终止程序运行,向用户反馈相应的错误提示信息。
367+
为了确保提示友好易懂,开发者需要注意其使用方法。
368+
369+
#### 总体原则
370+
371+
任何使用了PADDLE_ENFORCE与PADDLE_ENFORCE_**检查的地方,必须有详略得当的备注解释!**错误提示信息**不能为空!
372+
373+
#### 提示信息书写标准
374+
375+
1. [required] 哪里错了?为什么错了?
376+
- 例如:`ValueError: Mismatched label shape`
377+
2. [optional] 期望的输入是什么样的?实际的输入是怎样的?
378+
- 例如:`Expected labels dimension=1. Received 4.`
379+
3. [optional] 能否给出修改意见?
380+
- 例如:`Suggested Fix:If your classifier expects one-hot encoding label,check your n_classes argument to the estimatorand/or the shape of your label.Otherwise, check the shape of your label.`
381+
382+
如果并非必要或者简洁的描述即可表达清楚以上要点,根据情况书写亦可。
383+
384+
##### FAQ 典型问题
385+
386+
1. 无报错信息或报错信息过于简单,不能给用户提供有效的提示!
387+
388+
问题示例1 :未写提示信息
389+
```
390+
PADDLE_ENFORCE(ctx->HasInput("X"), "");
391+
```
392+
问题示例2 :提示信息过于简单
393+
```
394+
PADDLE_ENFORCE(i != nullptr, "I must be set"); // I是什么?
395+
```
396+
397+
2. 在报错信息中使用开发人员定义的变量缩写,不易理解!
398+
399+
问题示例:
400+
```
401+
PADDLE_ENFORCE(forward_pd != nullptr,
402+
"Fail to find eltwise_fwd_pd in device context"); //eltwise_fwd_pd用户可能看不懂
403+
```
404+
405+
3. OP内部调用非法接口:Op内部如果出现Output = ShareDataWith(Input)
406+
问题示例:
407+
```cpp
408+
auto *out = ctx.Output<framework::LoDTensor>("Out");
409+
auto *in = ctx.Input<framework::LoDTensor>("X");
410+
out->ShareDataWith(*in);
411+
```
412+
Op内部如果出现Output = ShareDataWith(Input),相当于operator图的中有一条隐藏边,连接了Input和Output,这条边无法在图分析中表达,引发基于图优化的错误。
413+
414+
4. OP实现的性能实践
415+
调用了eigen的broadcast, chop等操作,性能会比手写cuda kernel差几倍以上。此时cpu的实现可以复用eigen,gpu实现可以实现cuda kernel.
416+
417+
418+
#### OP InferShape检查提示信息特别说明
419+
420+
- 检查输入输出变量,请统一遵循以下格式
421+
`Input(变量名) of OP名 operator should not be null.`
422+
423+
正确示例:
424+
```
425+
PADDLE_ENFORCE(ctx->HasInput("Input"),
426+
"Input(Input) of LSTMP operator should not be null.");
427+
```
428+
429+
- 反向Op的输入输出检查,要写明反向Op的名字
430+
431+
正确示例:
432+
```
433+
PADDLE_ENFORCE(ctx->HasInput("X"),
434+
"Input(X) of LoDResetGrad opreator should not be null.");
435+
```

paddle/fluid/framework/ir/graph_helper.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
104104
for (auto &adj_n : var->inputs) {
105105
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
106106
adj_list[n].insert(adj_n);
107-
VLOG(3) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
107+
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
108108
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
109109
<< " via " << var->Name() << reinterpret_cast<void *>(var);
110110
}

paddle/fluid/inference/analysis/CMakeLists.txt

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ function (inference_analysis_test TARGET)
2222
if(WITH_TESTING)
2323
set(options "")
2424
set(oneValueArgs "")
25-
set(multiValueArgs SRCS)
25+
set(multiValueArgs SRCS EXTRA_DEPS)
2626
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
2727

2828
set(mem_opt "")
@@ -31,22 +31,43 @@ function (inference_analysis_test TARGET)
3131
endif()
3232
cc_test(${TARGET}
3333
SRCS "${analysis_test_SRCS}"
34-
DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detecter pass
34+
DEPS analysis graph fc_fuse_pass graph_viz_pass infer_clean_graph_pass graph_pattern_detecter pass ${analysis_test_EXTRA_DEPS}
3535
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model ${mem_opt})
3636
set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
3737
endif(WITH_TESTING)
3838
endfunction(inference_analysis_test)
3939

40-
cc_test(test_analyzer SRCS analyzer_tester.cc DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
41-
# ir
42-
fc_fuse_pass
43-
graph_viz_pass
44-
infer_clean_graph_pass
45-
graph_pattern_detecter
46-
pass
47-
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
48-
#set_tests_properties(test_analyzer PROPERTIES DEPENDS test_word2vec)
49-
#inference_api_test(test_analyzer SRC analyzer_tester.cc ARGS test_word2vec)
40+
set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz")
41+
set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz")
42+
set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE)
43+
set(DITU_RNN_MODEL ${DITU_INSTALL_DIR}/model)
44+
set(DITU_RNN_DATA ${DITU_INSTALL_DIR}/data.txt)
45+
46+
function (inference_download_and_uncompress target url gz_filename)
47+
message(STATUS "Download inference test stuff ${gz_filename} from ${url}")
48+
execute_process(COMMAND bash -c "mkdir -p ${DITU_INSTALL_DIR}")
49+
execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && wget -q ${url}")
50+
execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && tar xzf ${gz_filename}")
51+
message(STATUS "finish downloading ${gz_filename}")
52+
endfunction(inference_download_and_uncompress)
53+
54+
if (NOT EXISTS ${DITU_INSTALL_DIR})
55+
inference_download_and_uncompress(ditu_rnn_model ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz")
56+
inference_download_and_uncompress(ditu_rnn_data ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz")
57+
endif()
58+
59+
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
60+
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis
61+
# ir
62+
fc_fuse_pass
63+
graph_viz_pass
64+
infer_clean_graph_pass
65+
graph_pattern_detecter
66+
infer_clean_graph_pass
67+
pass
68+
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model
69+
--infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
70+
--infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
5071

5172
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
5273
inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
2424
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
2525

26-
namespace paddle {
27-
2826
DEFINE_bool(IA_enable_tensorrt_subgraph_engine, false,
2927
"Enable subgraph to TensorRT engine for acceleration");
3028

@@ -35,6 +33,7 @@ DEFINE_string(IA_graphviz_log_root, "./",
3533

3634
DEFINE_string(IA_output_storage_path, "", "optimized model output path");
3735

36+
namespace paddle {
3837
namespace inference {
3938
namespace analysis {
4039

paddle/fluid/inference/analysis/analyzer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,14 @@ limitations under the License. */
3939
#include "paddle/fluid/inference/analysis/pass.h"
4040
#include "paddle/fluid/inference/analysis/pass_manager.h"
4141

42-
namespace paddle {
43-
4442
// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
4543
// flag if not available.
4644
DECLARE_bool(IA_enable_tensorrt_subgraph_engine);
4745
DECLARE_string(IA_graphviz_log_root);
4846
DECLARE_string(IA_output_storage_path);
4947
DECLARE_bool(IA_enable_ir);
5048

49+
namespace paddle {
5150
namespace inference {
5251
namespace analysis {
5352

0 commit comments

Comments
 (0)