Skip to content

Commit 35cff5e

Browse files
committed
Merge branch 'develop' into multi-thread2
2 parents 1a373fb + 9df2d8b commit 35cff5e

26 files changed

+411
-279
lines changed

doc/fluid/new_docs/user_guides/howto/inference/native_infer.rst

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ Paddle 预测 API
44
为了更简单方便的预测部署,Fluid 提供了一套高层 API
55
用来隐藏底层不同的优化实现。
66

7-
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/contrib/inference>`__
7+
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/inference/api>`_
88
包括
99

1010
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
1111
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
12-
- 库文件 ``libpaddle_inference_api.so`` 或
13-
``libpaddle_inference_api.a``
12+
1413

1514
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
1615

@@ -97,8 +96,7 @@ engine
9796
CHECK(predictor->Run(slots, &outputs));
9897
// 获取 outputs ...
9998
100-
编译时,联编 ``libpaddle_fluid.a/.so`` 和
101-
``libpaddle_inference_api.a/.so`` 便可。
99+
编译时,联编 ``libpaddle_fluid.a/.so`` 便可。
102100

103101
详细代码参考
104102
------------

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
312312
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
313313
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
314314
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
315-
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 200, 1))
315+
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 4095, 1))
316316
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
317317
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
318318
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

paddle/fluid/framework/details/multi_devices_graph_pass.cc

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
326326
ir::Graph &result = *graph;
327327

328328
for (auto &node : nodes) {
329-
if (node->NodeType() == ir::Node::Type::kVariable && node->Var()) {
329+
if (node->IsVar() && node->Var()) {
330330
all_vars_.emplace(node->Name(), node->Var());
331331
}
332332
}
@@ -583,18 +583,6 @@ void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
583583
}
584584
}
585585

586-
bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
587-
const std::string &og,
588-
std::unordered_set<std::string> *og_has_been_broadcast) const {
589-
bool is_pg_once =
590-
grad_names_.count(og) != 0 && og_has_been_broadcast->count(og) == 0;
591-
if (is_pg_once) {
592-
// Insert NCCL AllReduce Op
593-
og_has_been_broadcast->insert(og);
594-
}
595-
return is_pg_once;
596-
}
597-
598586
int MultiDevSSAGraphBuilder::GetOpDeviceID(const ir::Graph &graph,
599587
ir::Node *node) const {
600588
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) {
@@ -688,20 +676,6 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
688676
return var;
689677
}
690678

691-
// Find the first occurence of `prev_op_name` and make current `op` depend
692-
// on it.
693-
void MultiDevSSAGraphBuilder::ConnectOp(ir::Graph *result, OpHandleBase *op,
694-
const std::string &prev_op_name) const {
695-
for (auto &prev_op : result->Get<GraphOps>(kGraphOps)) {
696-
if (prev_op->Name() == prev_op_name) {
697-
auto *dep_var = new DummyVarHandle(result->CreateControlDepVar());
698-
prev_op->AddOutput(dep_var);
699-
result->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);
700-
op->AddInput(dep_var);
701-
}
702-
}
703-
}
704-
705679
void MultiDevSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result,
706680
ir::Node *node) const {
707681
int op_dev_id = -1;

paddle/fluid/framework/details/multi_devices_graph_pass.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
6969
std::vector<std::string> FindDistTrainRecvVars(
7070
const std::vector<ir::Node *> &nodes) const;
7171

72-
void ConnectOp(ir::Graph *result, OpHandleBase *op,
73-
const std::string &prev_op_name) const;
74-
7572
void CreateComputationalOps(ir::Graph *result, ir::Node *node,
7673
size_t num_places) const;
7774

@@ -83,10 +80,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
8380
void CreateComputationalOp(ir::Graph *result, ir::Node *node,
8481
int dev_id) const;
8582

86-
bool IsParameterGradientOnce(
87-
const std::string &og,
88-
std::unordered_set<std::string> *og_has_been_broadcast) const;
89-
9083
int GetOpDeviceID(const ir::Graph &graph, ir::Node *node) const;
9184

9285
void InsertAllReduceOp(ir::Graph *result, const std::string &og) const;

paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
8686
}
8787
op_desc.SetInput("Bias", {new_bias_var});
8888
}
89-
9089
#undef GET_NODE
9190

91+
// Create temp variables.
92+
scope->Var(name_scope + "/BatchedInput.new")
93+
->GetMutable<framework::LoDTensor>();
94+
scope->Var(name_scope + "/BatchCellPreAct.new")
95+
->GetMutable<framework::LoDTensor>();
96+
scope->Var(name_scope + "/BatchedGate.new")
97+
->GetMutable<framework::LoDTensor>();
98+
9299
op_desc.SetInput("H0", {});
93100
op_desc.SetInput("C0", {});
94101
op_desc.SetOutput("Hidden", {hidden_n->Name()});
95102
op_desc.SetOutput("Cell", {cell_n->Name()});
96103
op_desc.SetOutput("XX", {xx_n->Name()});
97-
op_desc.SetOutput("BatchedInput", {"blstm_0.tmp_2"});
104+
op_desc.SetOutput("BatchedGate", {name_scope + "/BatchedGate.new"});
105+
op_desc.SetOutput("BatchCellPreAct", {name_scope + "/BatchCellPreAct.new"});
106+
op_desc.SetOutput("BatchedInput", {name_scope + "/BatchedInput.new"});
98107
op_desc.SetAttr("is_reverse", lstm_n->Op()->GetAttr("is_reverse"));
99108
op_desc.SetAttr("use_peepholes", lstm_n->Op()->GetAttr("use_peepholes"));
100109
// TODO(TJ): get from attr
@@ -130,8 +139,8 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
130139

131140
int fusion_count{0};
132141

133-
auto fc_no_bias_handler = [&](
134-
const GraphPatternDetector::subgraph_t& subgraph, Graph* g) {
142+
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
143+
Graph* g) {
135144
#define GET_NODE(name__) \
136145
std::string name__##key = name_scope + "/" + #name__; \
137146
auto* name__##n = pattern->RetrieveNode(name__##key); \
@@ -152,21 +161,24 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope,
152161

153162
if (with_fc_bias) {
154163
GET_NODE(fc_bias);
164+
GET_NODE(elementwise_add);
155165
lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, fc_bias);
166+
// Remove unneeded nodes.
167+
std::unordered_set<const Node*> marked_nodes(
168+
{mul_n, lstm_n, elementwise_add_n});
169+
GraphSafeRemoveNodes(graph, marked_nodes);
156170
} else {
157171
lstm_creator(lstm, x, w, Weight, Bias, Hidden, Cell, fc_out, -1);
172+
// Remove unneeded nodes.
173+
std::unordered_set<const Node*> marked_nodes({mul_n, lstm_n});
174+
GraphSafeRemoveNodes(graph, marked_nodes);
158175
}
159176
#undef GET_NODE
160177

161-
// Remove unneeded nodes.
162-
std::unordered_set<const Node*> marked_nodes({mul_n, lstm_n});
163-
164-
GraphSafeRemoveNodes(graph, marked_nodes);
165-
166178
++fusion_count;
167179
};
168180

169-
gpd(graph, fc_no_bias_handler);
181+
gpd(graph, handler);
170182

171183
return fusion_count;
172184
}

paddle/fluid/framework/ir/fc_lstm_fuse_pass.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#pragma once
16+
1517
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
1618
#include "paddle/fluid/framework/ir/graph.h"
1719
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ void PDPattern::AddEdge(PDNode* a, PDNode* b) {
7373
void GraphPatternDetector::operator()(Graph* graph,
7474
GraphPatternDetector::handle_t handler) {
7575
if (!MarkPDNodesInGraph(*graph)) {
76-
LOG(INFO) << "Mark failed";
7776
return;
7877
}
7978

paddle/fluid/framework/ir/graph_pattern_detector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
#endif
2020

2121
#include <numeric>
22+
#include <string>
23+
#include <utility>
24+
#include <vector>
2225
#include "paddle/fluid/framework/ir/graph.h"
2326
#include "paddle/fluid/framework/ir/node.h"
2427
#include "paddle/fluid/inference/analysis/dot.h"

paddle/fluid/inference/analysis/CMakeLists.txt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ endif()
5858
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc
5959
EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor
6060
ARGS --infer_ditu_rnn_model=${DITU_INSTALL_DIR}/model
61-
--infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
61+
--infer_ditu_rnn_data=${DITU_INSTALL_DIR}/data.txt)
6262

6363
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
6464
inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
@@ -74,7 +74,7 @@ inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
7474
set(CHINESE_NER_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz")
7575
set(CHINESE_NER_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz")
7676
set(CHINESE_NER_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/chinese_ner" CACHE PATH "Chinese ner model and data root." FORCE)
77-
if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING)
77+
if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
7878
inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_MODEL_URL} "chinese_ner_model.tar.gz")
7979
inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_DATA_URL} "chinese_ner-data.txt.tar.gz")
8080
endif()
@@ -87,7 +87,7 @@ inference_analysis_test(test_analyzer_ner SRCS analyzer_ner_tester.cc
8787
set(LAC_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/lac_model.tar.gz")
8888
set(LAC_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/lac_data.txt.tar.gz")
8989
set(LAC_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/lac" CACHE PATH "LAC model and data root." FORCE)
90-
if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING)
90+
if (NOT EXISTS ${LAC_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
9191
inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_MODEL_URL} "lac_model.tar.gz")
9292
inference_download_and_uncompress(${LAC_INSTALL_DIR} ${LAC_DATA_URL} "lac_data.txt.tar.gz")
9393
endif()
@@ -96,3 +96,15 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
9696
EXTRA_DEPS paddle_inference_api paddle_fluid_api
9797
ARGS --infer_model=${LAC_INSTALL_DIR}/model
9898
--infer_data=${LAC_INSTALL_DIR}/data.txt)
99+
100+
101+
set(TEXT_CLASSIFICATION_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz")
102+
set(TEXT_CLASSIFICATION_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/text_classification" CACHE PATH "Text Classification model and data root." FORCE)
103+
104+
if (NOT EXISTS ${TEXT_CLASSIFICATION_INSTALL_DIR} AND WITH_TESTING AND WITH_INFERENCE)
105+
inference_download_and_uncompress(${TEXT_CLASSIFICATION_INSTALL_DIR} ${TEXT_CLASSIFICATION_MODEL_URL} "text-classification-Senta.tar.gz")
106+
endif()
107+
108+
inference_analysis_test(test_text_classification SRCS test_text_classification.cc
109+
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
110+
ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta)

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,16 @@ class DfgPassManagerImpl final : public DfgPassManager {
4242
public:
4343
DfgPassManagerImpl() {
4444
// TODO(Superjomn) set the key with pass reprs.
45-
VLOG(3)
46-
<< "-----------------------------------------------------------------";
47-
if (FLAGS_IA_enable_ir) {
48-
AddPass("fluid-to-ir-pass", new FluidToIrPass);
49-
} else {
45+
if (!FLAGS_IA_enable_ir) {
5046
AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
47+
} else {
48+
AddPass("fluid-to-ir-pass", new FluidToIrPass);
5149
}
5250
TryAddTensorRtPass();
5351
AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
5452
if (!FLAGS_IA_output_storage_path.empty()) {
5553
AddPass("model-store-pass", new ModelStorePass);
5654
}
57-
VLOG(3)
58-
<< "-----------------------------------------------------------------";
5955
}
6056

6157
std::string repr() const override { return "dfg-pass-manager"; }
@@ -102,18 +98,15 @@ class DfgPassManagerImpl final : public DfgPassManager {
10298
Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
10399

104100
void Analyzer::Run(Argument* argument) {
105-
// Ugly support fluid-to-ir-pass
106-
argument->Set(kFluidToIrPassesAttr,
107-
new std::vector<std::string>({
108-
// Manual update the passes here.
109-
"graph_viz_pass", //
110-
"infer_clean_graph_pass", "graph_viz_pass", //
111-
"attention_lstm_fuse_pass", "graph_viz_pass", //
112-
"fc_lstm_fuse_pass", "graph_viz_pass", //
113-
"mul_lstm_fuse_pass", "graph_viz_pass", //
114-
"seq_concat_fc_fuse_pass", "graph_viz_pass", //
115-
"fc_fuse_pass", "graph_viz_pass" //
116-
}));
101+
std::vector<std::string> passes;
102+
for (auto& pass : all_ir_passes_) {
103+
if (!disabled_ir_passes_.count(pass)) {
104+
passes.push_back(pass);
105+
passes.push_back("graph_viz_pass"); // add graphviz for debug.
106+
}
107+
}
108+
passes.push_back("graph_viz_pass");
109+
argument->Set(kFluidToIrPassesAttr, new std::vector<std::string>(passes));
117110

118111
for (auto& x : data_) {
119112
PADDLE_ENFORCE(x->Initialize(argument));
@@ -122,6 +115,11 @@ void Analyzer::Run(Argument* argument) {
122115
}
123116
}
124117

118+
Analyzer& Analyzer::DisableIrPasses(const std::vector<std::string>& passes) {
119+
disabled_ir_passes_.insert(passes.begin(), passes.end());
120+
return *this;
121+
}
122+
125123
} // namespace analysis
126124
} // namespace inference
127125
} // namespace paddle

0 commit comments

Comments
 (0)