Skip to content

Commit 6d01f10

Browse files
committed
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into dist_test_word2vec
2 parents c7d3273 + 19e877f commit 6d01f10

File tree

70 files changed

+1864
-306
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1864
-306
lines changed

benchmark/fluid/args.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,9 @@ def parse_args():
122122
type=str,
123123
default="",
124124
help='Directory that contains all the training recordio files.')
125+
parser.add_argument(
126+
'--use_inference_transpiler',
127+
action='store_true',
128+
help='If set, uses inference transpiler to optimize the program.')
125129
args = parser.parse_args()
126130
return args

benchmark/fluid/fluid_benchmark.py

100644100755
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
131131
exe = fluid.Executor(place)
132132
exe.run(startup_prog)
133133

134+
# Use inference_transpiler to speedup
135+
if args.use_inference_transpiler:
136+
t = fluid.InferenceTranspiler()
137+
t.transpile(infer_prog, place)
138+
134139
if not args.use_reader_op:
135140
feed_var_list = [
136141
var for var in train_prog.global_block().vars.itervalues()

cmake/external/anakin.cmake

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@ function(fetch_include_recursively root_dir)
2626
endforeach()
2727
endfunction()
2828

29-
# download library
30-
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
31-
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
32-
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
33-
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
34-
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
35-
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
29+
if (NOT EXISTS "${ANAKIN_INSTALL_DIR}")
30+
# download library
31+
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
32+
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
33+
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
34+
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
35+
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
36+
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
37+
endif()
3638

3739
if (WITH_ANAKIN)
3840
message(STATUS "Anakin for inference is enabled")

cmake/inference_lib.cmake

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,21 +149,33 @@ copy(memory_lib
149149
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail
150150
)
151151

152-
set(module "inference")
153-
copy(inference_lib DEPS paddle_fluid_shared paddle_fluid
154-
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
155-
DSTS ${dst_dir}/${module} ${dst_dir}/${module}
156-
)
152+
set(inference_deps paddle_fluid_shared paddle_fluid)
157153

158154
if(WITH_CONTRIB)
159-
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
160-
copy(contrib_inference_lib DEPS paddle_inference_api
155+
message(STATUS "installing contrib")
156+
set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference")
157+
if (WITH_ANAKIN)
158+
copy(contrib_anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
159+
SRCS
160+
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libinference_anakin_api* # compiled anakin api
161+
${PADDLE_BINARY_DIR}/third_party/install/anakin/*.tar.gz # anakin release
162+
DSTS ${contrib_dst_dir}/anakin ${contrib_dst_dir}/anakin)
163+
list(APPEND inference_deps contrib_anakin_inference_lib)
164+
endif()
165+
166+
copy(contrib_inference_lib DEPS paddle_inference_api
161167
SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h
162168
${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.*
163-
DSTS ${contrib_dst_dir} ${contrib_dst_dir}
164-
)
169+
DSTS ${contrib_dst_dir} ${contrib_dst_dir})
170+
list(APPEND inference_deps contrib_inference_lib)
165171
endif()
166172

173+
set(module "inference")
174+
copy(inference_lib DEPS ${inference_deps}
175+
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
176+
DSTS ${dst_dir}/${module} ${dst_dir}/${module}
177+
)
178+
167179
set(module "platform")
168180
copy(platform_lib DEPS profiler_py_proto
169181
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h

paddle/contrib/inference/CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ if(APPLE)
1818
endif(APPLE)
1919

2020

21-
set(inference_deps paddle_inference_api paddle_fluid_api)
21+
set(inference_deps paddle_inference_api paddle_fluid_api paddle_inference_tensorrt_subgraph_engine)
2222

2323
function(inference_api_test TARGET_NAME)
2424
if (WITH_TESTING)
@@ -50,13 +50,24 @@ cc_test(test_paddle_inference_api
5050
inference_api_test(test_paddle_inference_api_impl
5151
ARGS test_word2vec test_image_classification)
5252

53+
if(WITH_GPU AND TENSORRT_FOUND)
54+
cc_library(paddle_inference_tensorrt_subgraph_engine
55+
SRCS paddle_inference_api_tensorrt_subgraph_engine.cc
56+
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api)
57+
58+
inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec)
59+
endif()
60+
5361
if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI
5462
# Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
5563
# so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
5664
# compile the libinference_anakin_api.a and compile with anakin.so.
57-
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
65+
nv_library(inference_anakin_api SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
66+
nv_library(inference_anakin_api_shared SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
5867
target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
68+
target_compile_options(inference_anakin_api_shared BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
5969
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
70+
target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
6071
cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
6172
ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
6273
DEPS inference_anakin_api)

paddle/contrib/inference/demo/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515

1616
inference_api_test(simple_on_word2vec ARGS test_word2vec)
1717

18+
option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF)
19+
if(NOT WITH_INFERENCE_DEMO)
20+
return()
21+
endif()
22+
1823
set(DEMO_INSTALL_DIR "${PADDLE_BINARY_DIR}/inference_demo")
1924
set(URL_ROOT http://paddlemodels.bj.bcebos.com/inference-vis-demos%2F)
2025

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Paddle 预测 API
2+
3+
为了更简单方便的预测部署,Fluid 提供了一套高层 API 用来隐藏底层不同的优化实现。
4+
5+
预测库包含:
6+
7+
- 头文件 `paddle_inference_api.h` 定义了所有的接口
8+
- 库文件`libpaddle_fluid.so``libpaddle_fluid.a`
9+
- 库文件 `libpaddle_inference_api.so``libpaddle_inference_api.a`
10+
11+
下面是详细的一些 API 概念介绍
12+
13+
## PaddleTensor
14+
15+
PaddleTensor 定义了预测最基本的输入输出的数据格式,其定义是
16+
17+
```c++
18+
struct PaddleTensor {
19+
std::string name; // variable name.
20+
std::vector<int> shape;
21+
PaddleBuf data; // blob of data.
22+
PaddleDType dtype;
23+
};
24+
```
25+
26+
- `name` 用于指定输入数据对应的 模型中variable 的名字 (暂时没有用,但会在后续支持任意 target 时启用)
27+
- `shape` 表示一个 Tensor 的 shape
28+
- `data` 数据以连续内存的方式存储在`PaddleBuf` 中,`PaddleBuf` 可以接收外面的数据或者独立`malloc`内存,详细可以参考头文件中相关定义。
29+
- `dtype` 表示 Tensor 的数据类型
30+
31+
## engine
32+
33+
高层 API 底层有多种优化实现,我们称之为 engine,目前有三种 engine
34+
35+
- 原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型,
36+
- Anakin engine,封装了 [Anakin](https://github.com/PaddlePaddle/Anakin) ,在某些模型上性能不错,但只能接受自带模型格式,无法支持所有 paddle 模型,
37+
- TensorRT mixed engine,用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP)
38+
39+
其实现为
40+
41+
```c++
42+
enum class PaddleEngineKind {
43+
kNative = 0, // Use the native Fluid facility.
44+
kAnakin, // Use Anakin for inference.
45+
kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops.
46+
};
47+
```
48+
49+
## 预测部署过程
50+
51+
总体上分为以下步骤
52+
53+
1. 用合适的配置创建 `PaddlePredictor`
54+
2. 创建输入用的 `PaddleTensor`,传入到 `PaddlePredictor`
55+
3. 获取输出的 `PaddleTensor` ,将结果取出
56+
57+
下面完整演示一个简单的模型,部分细节代码隐去
58+
59+
```c++
60+
#include "paddle_inference_api.h"
61+
62+
// 创建一个 config,并修改相关设置
63+
paddle::NativeConfig config;
64+
config.model_dir = "xxx";
65+
config.use_gpu = false;
66+
// 创建一个原生的 PaddlePredictor
67+
auto predictor =
68+
paddle::CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
69+
// 创建输入 tensor
70+
int64_t data[4] = {1, 2, 3, 4};
71+
paddle::PaddleTensor tensor{.name = "",
72+
.shape = std::vector<int>({4, 1}),
73+
.data = PaddleBuf(data, sizeof(data)),
74+
.dtype = PaddleDType::INT64};
75+
// 创建输出 tensor,输出 tensor 的内存可以复用
76+
std::vector<paddle::PaddleTensor> outputs;
77+
// 执行预测
78+
CHECK(predictor->Run(slots, &outputs));
79+
// 获取 outputs ...
80+
```
81+
82+
编译时,联编 `libpaddle_fluid.a/.so` 和 `libpaddle_inference_api.a/.so` 便可。
83+
84+
## 详细代码参考
85+
86+
- [inference demos](./demo)
87+
- [复杂单线程/多线程例子](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/contrib/inference/test_paddle_inference_api_impl.cc)

paddle/contrib/inference/paddle_inference_api.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ struct PaddleTensor {
7373
};
7474

7575
enum class PaddleEngineKind {
76-
kNative = 0, // Use the native Fluid facility.
77-
kAnakin, // Use Anakin for inference.
76+
kNative = 0, // Use the native Fluid facility.
77+
kAnakin, // Use Anakin for inference.
78+
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
7879
// TODO(Superjomn) support following engines latter.
7980
// kTensorRT, // Use TensorRT for inference.
8081
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
81-
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
8282
};
8383

8484
/*
@@ -130,6 +130,11 @@ struct AnakinConfig : public PaddlePredictor::Config {
130130
int max_batch_size{-1};
131131
};
132132

133+
struct TensorRTConfig : public NativeConfig {
134+
// Determine whether a subgraph will be executed by TRT.
135+
int min_subgraph_size{1};
136+
};
137+
133138
// A factory to help create different predictors.
134139
//
135140
// FOR EXTENSION DEVELOPER:

paddle/contrib/inference/paddle_inference_api_impl.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ bool NativePaddlePredictor::Init(
8989
LOG(ERROR) << "fail to load inference model.";
9090
return false;
9191
}
92+
9293
ctx_ = executor_->Prepare(*inference_program_, 0);
9394
executor_->CreateVariables(
9495
*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
@@ -119,6 +120,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
119120
return false;
120121
}
121122
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
123+
VLOG(4) << "setting " << i << "-th target";
122124
feed_targets[feed_target_names_[i]] = &feeds[i];
123125
}
124126
// get fetch variable
@@ -130,14 +132,16 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
130132
}
131133
// Run the inference program
132134
// if share variables, we need not create variables
135+
VLOG(4) << "Run prepared context";
133136
executor_->RunPreparedContext(
134137
ctx_.get(),
135138
sub_scope_ != nullptr ? sub_scope_ : scope_.get(),
136139
&feed_targets,
137140
&fetch_targets,
138141
false /* don't create variable eatch time */);
142+
VLOG(4) << "Finish prepared context";
139143
if (!GetFetch(fetchs, output_data)) {
140-
LOG(ERROR) << "fail to get fetchs";
144+
LOG(ERROR) << "fail to get fetches";
141145
return false;
142146
}
143147
VLOG(3) << "predict cost: " << timer.toc() << "ms";

paddle/contrib/inference/paddle_inference_api_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class NativePaddlePredictor : public PaddlePredictor {
4444

4545
~NativePaddlePredictor() override;
4646

47-
private:
47+
protected:
4848
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
4949
std::vector<framework::LoDTensor> *feeds);
5050
bool GetFetch(const std::vector<framework::LoDTensor> &fetchs,

0 commit comments

Comments
 (0)