Skip to content

Commit e5c8a19

Browse files
authored
Fea/infer for1.3 (#15760)
1 parent d956fcb commit e5c8a19

File tree

4 files changed

+102
-63
lines changed

4 files changed

+102
-63
lines changed

paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,13 @@ std::unique_ptr<ir::Graph> IdentityScaleOpCleanPass::ApplyImpl(
3838
->assert_is_op("scale")
3939
->assert_op_attr<float>("scale", 1.)
4040
->assert_op_attr<float>("bias", 0.);
41-
auto scale_out = detector.mutable_pattern()
42-
->NewNode("scale_out")
43-
->assert_is_op_output("scale");
41+
auto scale_out =
42+
detector.mutable_pattern()
43+
->NewNode("scale_out")
44+
->assert_is_op_output("scale")
45+
// scale's output var should has only one consumer, or it can't be
46+
// removed.
47+
->assert_more([](Node* x) { return x->outputs.size() == 1UL; });
4448

4549
pre_op->LinksTo({scale_in});
4650
scale_op->LinksFrom({scale_in}).LinksTo({scale_out});

paddle/fluid/inference/api/paddle_api.h

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
/*! \file paddle_api.h
1717
*/
1818

19+
/*! \mainpage Paddle Inference APIs
20+
* \section intro_sec Introduction
21+
* The Paddle inference library aims to offer an high performance inference SDK
22+
* for Paddle users.
23+
*/
24+
1925
#include <cassert>
2026
#include <memory>
2127
#include <string>
@@ -34,26 +40,49 @@ enum PaddleDType {
3440
};
3541

3642
/**
37-
*\brief Memory menager for PaddleTensor.
43+
* \brief Memory manager for `PaddleTensor`.
3844
*
39-
*The PaddleBuf holds a buffer for data input or output. The memory can be
40-
*allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
41-
*should be reused for better performance.
45+
* The PaddleBuf holds a buffer for data input or output. The memory can be
46+
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
47+
* should be reused for better performance.
4248
*
43-
*For user allocated memory, the following API can be used:
44-
*- PaddleBuf(void* data, size_t length) to set an external memory by
45-
*specifying
46-
* the memory address and length.
47-
*- Reset(void* data, size_t length) to reset the PaddleBuf with an external
49+
* For user allocated memory, the following API can be used:
50+
* - PaddleBuf(void* data, size_t length) to set an external memory by
51+
* specifying the memory address and length.
52+
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
4853
*memory.
49-
*ATTENTION, for user allocated memory, deallocation should be done by users
54+
* ATTENTION, for user allocated memory, deallocation should be done by users
5055
*externally after the program finished. The PaddleBuf won't do any allocation
5156
*or deallocation.
5257
*
53-
*To have the PaddleBuf allocate and manage the memory:
54-
*- PaddleBuf(size_t length) will allocate a memory of size `length`.
55-
*- Resize(size_t length) resize the memory to no less than `length`, ATTENTION
58+
* To have the PaddleBuf allocate and manage the memory:
59+
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
60+
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
5661
* if the allocated memory is larger than `length`, nothing will done.
62+
*
63+
* Usage:
64+
*
65+
* Let PaddleBuf manage the memory internally.
66+
* \code{cpp}
67+
* const int num_elements = 128;
68+
* PaddleBuf buf(num_elements * sizeof(float));
69+
* \endcode
70+
*
71+
* Or
72+
* \code{cpp}
73+
* PaddleBuf buf;
74+
* buf.Resize(num_elements * sizeof(float));
75+
* \endcode
76+
* Works the exactly the same.
77+
*
78+
* One can also make the `PaddleBuf` use the external memory.
79+
* \code{cpp}
80+
* PaddleBuf buf;
81+
* void* external_memory = new float[num_elements];
82+
* buf.Reset(external_memory, num_elements*sizeof(float));
83+
* ...
84+
* delete[] external_memory; // manage the memory lifetime outside.
85+
* \endcode
5786
*/
5887
class PaddleBuf {
5988
public:
@@ -78,7 +107,7 @@ class PaddleBuf {
78107
/** Tell whether the buffer is empty.
79108
*/
80109
bool empty() const { return length_ == 0; }
81-
/** Get the memory address.
110+
/** Get the data's memory address.
82111
*/
83112
void* data() const { return data_; }
84113
/** Get the memory length.
@@ -110,7 +139,8 @@ struct PaddleTensor {
110139
};
111140

112141
enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
113-
/** Tensor without copy, currently only supports AnalysisPredictor.
142+
143+
/** Tensor without copy, currently only supports `AnalysisPredictor`.
114144
*/
115145
class ZeroCopyTensor {
116146
public:
@@ -269,9 +299,11 @@ struct NativeConfig : public PaddlePredictor::Config {
269299
*
270300
* Usage:
271301
*
302+
* \code{.cpp}
272303
* NativeConfig config;
273304
* ... // change the configs.
274305
* auto native_predictor = CreatePaddlePredictor(config);
306+
* \endcode
275307
*
276308
* FOR EXTENSION DEVELOPER:
277309
* Different predictors are designated by config type. Similar configs can be

paddle/fluid/inference/api/paddle_pass_builder.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,54 @@ void GpuPassStrategy::EnableMKLDNN() {
6666
LOG(ERROR) << "GPU not support MKLDNN yet";
6767
}
6868

69+
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
70+
passes_.assign({
71+
"infer_clean_graph_pass", //
72+
"identity_scale_op_clean_pass", //
73+
"conv_affine_channel_fuse_pass", //
74+
"conv_eltwiseadd_affine_channel_fuse_pass", //
75+
"conv_bn_fuse_pass", //
76+
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
77+
// guaranteed at least v7
78+
"conv_elementwise_add_act_fuse_pass", //
79+
"conv_elementwise_add2_act_fuse_pass", //
80+
"conv_elementwise_add_fuse_pass", //
81+
#endif
82+
});
83+
84+
for (int i = 6; i >= 3; i--) {
85+
passes_.push_back("transpose_flatten" + std::to_string(i) +
86+
"_concat_fuse_pass");
87+
}
88+
use_gpu_ = true;
89+
}
90+
6991
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
7092
analysis_passes_.push_back(pass);
7193
}
7294

95+
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
96+
// NOTE the large fusions should be located in the front, so that they will
97+
// not be damaged by smaller ones.
98+
passes_.assign({
99+
"infer_clean_graph_pass", //
100+
"attention_lstm_fuse_pass", //
101+
"seqpool_concat_fuse_pass", //
102+
"seqconv_eltadd_relu_fuse_pass", //
103+
// "embedding_fc_lstm_fuse_pass", //
104+
"fc_lstm_fuse_pass", //
105+
"mul_lstm_fuse_pass", //
106+
"fc_gru_fuse_pass", //
107+
"mul_gru_fuse_pass", //
108+
"seq_concat_fc_fuse_pass", //
109+
"fc_fuse_pass", //
110+
"repeated_fc_relu_fuse_pass", //
111+
"squared_mat_sub_fuse_pass", //
112+
"conv_bn_fuse_pass", //
113+
"conv_eltwiseadd_bn_fuse_pass", //
114+
"is_test_pass", //
115+
"identity_scale_op_clean_pass", //
116+
});
117+
use_gpu_ = false;
118+
}
73119
} // namespace paddle

paddle/fluid/inference/api/paddle_pass_builder.h

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -97,30 +97,7 @@ class PassStrategy : public PaddlePassBuilder {
9797
*/
9898
class CpuPassStrategy : public PassStrategy {
9999
public:
100-
CpuPassStrategy() : PassStrategy({}) {
101-
// NOTE the large fusions should be located in the front, so that they will
102-
// not be damaged by smaller ones.
103-
passes_.assign({
104-
"infer_clean_graph_pass", //
105-
"attention_lstm_fuse_pass", //
106-
"seqpool_concat_fuse_pass", //
107-
"seqconv_eltadd_relu_fuse_pass", //
108-
// "embedding_fc_lstm_fuse_pass", //
109-
"fc_lstm_fuse_pass", //
110-
"mul_lstm_fuse_pass", //
111-
"fc_gru_fuse_pass", //
112-
"mul_gru_fuse_pass", //
113-
"seq_concat_fc_fuse_pass", //
114-
"fc_fuse_pass", //
115-
"repeated_fc_relu_fuse_pass", //
116-
"squared_mat_sub_fuse_pass", //
117-
"conv_bn_fuse_pass", //
118-
"conv_eltwiseadd_bn_fuse_pass", //
119-
"is_test_pass", //
120-
"identity_scale_op_clean_pass", //
121-
});
122-
use_gpu_ = false;
123-
}
100+
CpuPassStrategy();
124101

125102
explicit CpuPassStrategy(const CpuPassStrategy &other)
126103
: PassStrategy(other.AllPasses()) {}
@@ -153,27 +130,7 @@ class CpuPassStrategy : public PassStrategy {
153130
*/
154131
class GpuPassStrategy : public PassStrategy {
155132
public:
156-
GpuPassStrategy() : PassStrategy({}) {
157-
passes_.assign({
158-
"infer_clean_graph_pass", //
159-
"identity_scale_op_clean_pass", //
160-
"conv_affine_channel_fuse_pass", //
161-
"conv_eltwiseadd_affine_channel_fuse_pass", //
162-
"conv_bn_fuse_pass", //
163-
#if CUDNN_VERSION >= 7100 // To run conv_fusion, the version of cudnn must be
164-
// guaranteed at least v7
165-
"conv_elementwise_add_act_fuse_pass", //
166-
"conv_elementwise_add2_act_fuse_pass", //
167-
"conv_elementwise_add_fuse_pass", //
168-
#endif
169-
});
170-
171-
for (int i = 6; i >= 3; i--) {
172-
passes_.push_back("transpose_flatten" + std::to_string(i) +
173-
"_concat_fuse_pass");
174-
}
175-
use_gpu_ = true;
176-
}
133+
GpuPassStrategy();
177134

178135
explicit GpuPassStrategy(const GpuPassStrategy &other)
179136
: PassStrategy(other.AllPasses()) {

0 commit comments

Comments
 (0)