Skip to content

Commit 0a71d58

Browse files
committed
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into fix_dist_base
2 parents a0b6865 + 8868525 commit 0a71d58

20 files changed

+818
-242
lines changed

paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,10 @@
1313
// limitations under the License.
1414

1515
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
16-
1716
#include <string>
18-
1917
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
2018
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
2119
#include "paddle/fluid/framework/lod_tensor.h"
22-
#include "paddle/fluid/inference/api/helper.h"
2320

2421
namespace paddle {
2522
namespace framework {

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void GraphPatternDetector::operator()(Graph* graph,
8585
LOG(INFO) << "detect " << subgraphs.size() << " subgraph matches the pattern";
8686
int id = 0;
8787
for (auto& g : subgraphs) {
88-
LOG(INFO) << "optimizing #" << id++ << " subgraph";
88+
VLOG(3) << "optimizing #" << id++ << " subgraph";
8989
handler(g, graph);
9090
}
9191
}

paddle/fluid/framework/ir/graph_viz_pass.cc

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,37 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
5050

5151
Dot dot;
5252

53-
std::vector<Dot::Attr> op_attrs({Dot::Attr("style", "filled"),
54-
Dot::Attr("shape", "box"),
55-
Dot::Attr("fillcolor", "red")});
56-
std::vector<Dot::Attr> var_attrs({Dot::Attr("style", "filled,rounded"),
57-
// Dot::Attr("shape", "diamond"),
58-
Dot::Attr("fillcolor", "yellow")});
59-
60-
std::vector<Dot::Attr> marked_op_attrs({Dot::Attr("style", "filled"),
61-
Dot::Attr("shape", "box"),
62-
Dot::Attr("fillcolor", "lightgray")});
63-
std::vector<Dot::Attr> marked_var_attrs(
64-
{Dot::Attr("style", "filled,rounded"),
65-
// Dot::Attr("shape", "diamond"),
66-
Dot::Attr("fillcolor", "lightgray")});
53+
const std::vector<Dot::Attr> op_attrs({
54+
Dot::Attr("style", "rounded,filled,bold"), //
55+
Dot::Attr("shape", "box"), //
56+
Dot::Attr("color", "#303A3A"), //
57+
Dot::Attr("fontcolor", "#ffffff"), //
58+
Dot::Attr("width", "1.3"), //
59+
Dot::Attr("height", "0.84"), //
60+
Dot::Attr("fontname", "Arial"), //
61+
});
62+
const std::vector<Dot::Attr> arg_attrs({
63+
Dot::Attr("shape", "box"), //
64+
Dot::Attr("style", "rounded,filled,bold"), //
65+
Dot::Attr("fontname", "Arial"), //
66+
Dot::Attr("fillcolor", "#999999"), //
67+
Dot::Attr("color", "#dddddd"), //
68+
});
69+
70+
const std::vector<Dot::Attr> param_attrs({
71+
Dot::Attr("shape", "box"), //
72+
Dot::Attr("style", "rounded,filled,bold"), //
73+
Dot::Attr("fontname", "Arial"), //
74+
Dot::Attr("color", "#148b97"), //
75+
Dot::Attr("fontcolor", "#ffffff"), //
76+
});
77+
78+
const std::vector<Dot::Attr> marked_op_attrs(
79+
{Dot::Attr("style", "rounded,filled,bold"), Dot::Attr("shape", "box"),
80+
Dot::Attr("fillcolor", "yellow")});
81+
const std::vector<Dot::Attr> marked_var_attrs(
82+
{Dot::Attr("style", "filled,rounded"), Dot::Attr("shape", "box"),
83+
Dot::Attr("fillcolor", "yellow")});
6784

6885
auto marked_nodes = ConsumeMarkedNodes(graph.get());
6986
// Create nodes
@@ -74,9 +91,17 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
7491
marked_nodes.count(n) ? marked_op_attrs : op_attrs;
7592
dot.AddNode(node_id, attr, node_id);
7693
} else if (n->IsVar()) {
77-
decltype(op_attrs) attr =
78-
marked_nodes.count(n) ? marked_var_attrs : var_attrs;
79-
dot.AddNode(node_id, attr, node_id);
94+
decltype(op_attrs)* attr;
95+
if (marked_nodes.count(n)) {
96+
attr = &marked_var_attrs;
97+
} else if (const_cast<Node*>(n)->Var() &&
98+
const_cast<Node*>(n)->Var()->Persistable()) {
99+
attr = &param_attrs;
100+
} else {
101+
attr = &arg_attrs;
102+
}
103+
104+
dot.AddNode(node_id, *attr, node_id);
80105
}
81106
node2dot[n] = node_id;
82107
}

paddle/fluid/inference/analysis/analyzer.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ void Analyzer::Run(Argument* argument) {
106106
}
107107
}
108108
passes.push_back("graph_viz_pass");
109-
// Ugly support fluid-to-ir-pass
110109
argument->Set(kFluidToIrPassesAttr, new std::vector<std::string>(passes));
111110

112111
for (auto& x : data_) {

paddle/fluid/inference/analysis/analyzer_tester.cc

Lines changed: 84 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include <google/protobuf/text_format.h>
1818
#include <gtest/gtest.h>
19+
#include <thread> // NOLINT
1920
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
2021
#include "paddle/fluid/framework/ir/pass.h"
2122
#include "paddle/fluid/inference/analysis/ut_helper.h"
@@ -24,12 +25,12 @@
2425
#include "paddle/fluid/inference/api/paddle_inference_api.h"
2526
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
2627
#include "paddle/fluid/inference/utils/singleton.h"
27-
#include "paddle/fluid/platform/profiler.h"
2828

2929
DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN");
3030
DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN");
3131
DEFINE_int32(batch_size, 10, "batch size.");
3232
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
33+
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
3334

3435
namespace paddle {
3536
namespace inference {
@@ -220,39 +221,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
220221
}
221222
}
222223

223-
std::string DescribeTensor(const PaddleTensor &tensor) {
224-
std::stringstream os;
225-
os << "Tensor [" << tensor.name << "]\n";
226-
os << " - type: ";
227-
switch (tensor.dtype) {
228-
case PaddleDType::FLOAT32:
229-
os << "float32";
230-
break;
231-
case PaddleDType::INT64:
232-
os << "int64";
233-
break;
234-
default:
235-
os << "unset";
236-
}
237-
os << '\n';
238-
239-
os << " - shape: " << to_string(tensor.shape) << '\n';
240-
os << " - lod: ";
241-
for (auto &l : tensor.lod) {
242-
os << to_string(l) << "; ";
243-
}
244-
os << "\n";
245-
os << " - data: ";
246-
247-
int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
248-
[](int a, int b) { return a * b; });
249-
for (int i = 0; i < dim; i++) {
250-
os << static_cast<float *>(tensor.data.data())[i] << " ";
251-
}
252-
os << '\n';
253-
return os.str();
254-
}
255-
256224
} // namespace
257225

258226
const float ditu_rnn_target_data[] = {
@@ -266,11 +234,29 @@ const float ditu_rnn_target_data[] = {
266234
10.7286, 12.0595, 10.6672, 0, 0, 0, 0, 0,
267235
93.5771, 3.84641, 0, 0, 0, 0, 0, 0,
268236
169.426, 0, 0, 0, 0, 0, 0, 0};
237+
void CompareResult(const std::vector<PaddleTensor> &outputs,
238+
const std::vector<PaddleTensor> &base_outputs) {
239+
PADDLE_ENFORCE_GT(outputs.size(), 0);
240+
PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
241+
for (size_t i = 0; i < outputs.size(); i++) {
242+
auto &out = outputs[i];
243+
auto &base_out = base_outputs[i];
244+
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
245+
[](int a, int b) { return a * b; });
246+
size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
247+
1, [](int a, int b) { return a * b; });
248+
PADDLE_ENFORCE_EQ(size, size1);
249+
PADDLE_ENFORCE_GT(size, 0);
250+
float *data = static_cast<float *>(out.data.data());
251+
float *base_data = static_cast<float *>(base_out.data.data());
252+
for (size_t i = 0; i < size; i++) {
253+
EXPECT_NEAR(data[i], base_data[i], 1e-3);
254+
}
255+
}
256+
}
269257
// Test with a really complicate model.
270-
void TestDituRNNPrediction(const std::string &model_path,
271-
const std::string &data_path, int batch_size,
272-
bool use_analysis, bool activate_ir,
273-
int num_times = 1) {
258+
void TestDituRNNPrediction(bool use_analysis, bool activate_ir,
259+
int num_threads) {
274260
AnalysisConfig config;
275261
config.prog_file = FLAGS_infer_ditu_rnn_model + "/__model__";
276262
config.param_file = FLAGS_infer_ditu_rnn_model + "/param";
@@ -281,47 +267,64 @@ void TestDituRNNPrediction(const std::string &model_path,
281267
PADDLE_ENFORCE(config.ir_mode ==
282268
AnalysisConfig::IrPassMode::kExclude); // default
283269
config.ir_passes.clear(); // Do not exclude any pass.
270+
int batch_size = FLAGS_batch_size;
271+
int num_times = FLAGS_repeat;
284272

285273
auto base_predictor =
286274
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
287275
auto predictor =
288276
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
289277
config);
290278
std::vector<PaddleTensor> input_slots;
291-
DataRecord data(data_path, batch_size);
279+
DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
292280
// Prepare inputs.
293281
PrepareInputs(&input_slots, &data, batch_size);
294282
std::vector<PaddleTensor> outputs, base_outputs;
295283

296284
base_predictor->Run(input_slots, &base_outputs);
297285

298-
Timer timer;
299-
timer.tic();
300-
for (int i = 0; i < num_times; i++) {
301-
predictor->Run(input_slots, &outputs);
302-
}
303286
LOG(INFO) << "===========profile result===========";
304-
LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << num_times
305-
<< ", latency: " << timer.toc() / num_times << "ms";
306-
LOG(INFO) << "=====================================";
307-
308-
PADDLE_ENFORCE_GT(outputs.size(), 0);
309-
PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
310-
for (size_t i = 0; i < outputs.size(); i++) {
311-
auto &out = outputs[i];
312-
auto &base_out = base_outputs[i];
313-
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
314-
[](int a, int b) { return a * b; });
315-
size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
316-
1, [](int a, int b) { return a * b; });
317-
PADDLE_ENFORCE_EQ(size, size1);
318-
PADDLE_ENFORCE_GT(size, 0);
319-
float *data = static_cast<float *>(out.data.data());
320-
float *base_data = static_cast<float *>(base_out.data.data());
321-
for (size_t j = 0; j < size; j++) {
322-
EXPECT_NEAR(data[j], base_data[j], 1e-3);
287+
if (num_threads == 1) {
288+
// Prepare inputs.
289+
Timer timer;
290+
timer.tic();
291+
for (int i = 0; i < num_times; i++) {
292+
predictor->Run(input_slots, &outputs);
293+
}
294+
PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times);
295+
CompareResult(outputs, base_outputs);
296+
} else {
297+
std::vector<std::thread> threads;
298+
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
299+
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
300+
// because AttentionLSTM's hard code nodeid will be damanged.
301+
for (int tid = 0; tid < num_threads; ++tid) {
302+
predictors.emplace_back(
303+
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
304+
config));
305+
}
306+
for (int tid = 0; tid < num_threads; ++tid) {
307+
threads.emplace_back([&, tid]() {
308+
// Each thread should have local input_slots and outputs.
309+
std::vector<PaddleTensor> input_slots;
310+
DataRecord data(FLAGS_infer_ditu_rnn_data, batch_size);
311+
PrepareInputs(&input_slots, &data, batch_size);
312+
std::vector<PaddleTensor> outputs;
313+
Timer timer;
314+
timer.tic();
315+
for (int i = 0; i < num_times; i++) {
316+
predictors[tid]->Run(input_slots, &outputs);
317+
}
318+
PrintTime(batch_size, num_times, num_threads, tid,
319+
timer.toc() / num_times);
320+
CompareResult(outputs, base_outputs);
321+
});
322+
}
323+
for (int i = 0; i < num_threads; ++i) {
324+
threads[i].join();
323325
}
324326
}
327+
LOG(INFO) << "=====================================";
325328

326329
if (use_analysis && activate_ir) {
327330
AnalysisPredictor *analysis_predictor =
@@ -350,25 +353,26 @@ void TestDituRNNPrediction(const std::string &model_path,
350353
}
351354
}
352355

353-
// Directly infer with the original model.
354-
TEST(Analyzer, DituRNN_without_analysis) {
355-
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
356-
FLAGS_batch_size, false, false, FLAGS_repeat);
356+
// Inference with analysis and IR, easy for profiling independently.
357+
TEST(Analyzer, DituRNN) {
358+
TestDituRNNPrediction(true, true, FLAGS_num_threads);
357359
}
358360

359-
// Inference with the original model with the analysis turned on, the analysis
360-
// module will transform the program to a data flow graph.
361-
TEST(Analyzer, DituRNN_with_analysis) {
362-
LOG(INFO) << "ditu rnn with analysis";
363-
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
364-
FLAGS_batch_size, true, false, FLAGS_repeat);
365-
}
366-
367-
// Inference with analysis and IR. The IR module will fuse some large kernels.
368-
TEST(Analyzer, DituRNN_with_analysis_with_IR) {
369-
LOG(INFO) << "ditu rnn with analysis and IR fuse";
370-
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
371-
FLAGS_batch_size, true, true, FLAGS_repeat);
361+
// Other unit-tests of DituRNN, test different options of use_analysis,
362+
// activate_ir and multi-threads.
363+
TEST(Analyzer, DituRNN_tests) {
364+
int num_threads[2] = {1, 4};
365+
for (auto i : num_threads) {
366+
// Directly infer with the original model.
367+
TestDituRNNPrediction(false, false, i);
368+
// Inference with the original model with the analysis turned on, the
369+
// analysis
370+
// module will transform the program to a data flow graph.
371+
TestDituRNNPrediction(true, false, i);
372+
// Inference with analysis and IR. The IR module will fuse some large
373+
// kernels.
374+
TestDituRNNPrediction(true, true, i);
375+
}
372376
}
373377

374378
} // namespace analysis

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ bool AnalysisPredictor::Init(
3535
} else {
3636
place_ = paddle::platform::CPUPlace();
3737
}
38-
PADDLE_ENFORCE(!parent_scope);
3938
if (parent_scope) {
4039
scope_ = parent_scope;
4140
sub_scope_ = &(parent_scope->NewScope());

paddle/fluid/inference/api/helper.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#pragma once
1616

17+
#include <glog/logging.h>
1718
#include <sys/time.h>
1819
#include <algorithm>
1920
#include <numeric>
@@ -88,5 +89,45 @@ static void TensorAssignData(PaddleTensor *tensor,
8889
}
8990
}
9091

92+
std::string DescribeTensor(const PaddleTensor &tensor) {
93+
std::stringstream os;
94+
os << "Tensor [" << tensor.name << "]\n";
95+
os << " - type: ";
96+
switch (tensor.dtype) {
97+
case PaddleDType::FLOAT32:
98+
os << "float32";
99+
break;
100+
case PaddleDType::INT64:
101+
os << "int64";
102+
break;
103+
default:
104+
os << "unset";
105+
}
106+
os << '\n';
107+
108+
os << " - shape: " << to_string(tensor.shape) << '\n';
109+
os << " - lod: ";
110+
for (auto &l : tensor.lod) {
111+
os << to_string(l) << "; ";
112+
}
113+
os << "\n";
114+
os << " - data: ";
115+
116+
int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
117+
[](int a, int b) { return a * b; });
118+
for (int i = 0; i < dim; i++) {
119+
os << static_cast<float *>(tensor.data.data())[i] << " ";
120+
}
121+
os << '\n';
122+
return os.str();
123+
}
124+
125+
void PrintTime(int batch_size, int repeat, int num_threads, int tid,
126+
double latency) {
127+
LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << repeat
128+
<< ", threads: " << num_threads << ", thread id: " << tid
129+
<< ", latency: " << latency << "ms";
130+
}
131+
91132
} // namespace inference
92133
} // namespace paddle

0 commit comments

Comments
 (0)