16
16
17
17
#include < google/protobuf/text_format.h>
18
18
#include < gtest/gtest.h>
19
+ #include < thread> // NOLINT
19
20
#include " paddle/fluid/framework/ir/fuse_pass_base.h"
20
21
#include " paddle/fluid/framework/ir/pass.h"
21
22
#include " paddle/fluid/inference/analysis/ut_helper.h"
24
25
#include " paddle/fluid/inference/api/paddle_inference_api.h"
25
26
#include " paddle/fluid/inference/api/paddle_inference_pass.h"
26
27
#include " paddle/fluid/inference/utils/singleton.h"
27
- #include " paddle/fluid/platform/profiler.h"
28
28
29
29
DEFINE_string (infer_ditu_rnn_model, " " , " model path for ditu RNN" );
30
30
DEFINE_string (infer_ditu_rnn_data, " " , " data path for ditu RNN" );
31
31
DEFINE_int32 (batch_size, 10 , " batch size." );
32
32
DEFINE_int32 (repeat, 1 , " Running the inference program repeat times." );
33
+ DEFINE_int32 (num_threads, 1 , " Running the inference program in multi-threads." );
33
34
34
35
namespace paddle {
35
36
namespace inference {
@@ -220,39 +221,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
220
221
}
221
222
}
222
223
223
- std::string DescribeTensor (const PaddleTensor &tensor) {
224
- std::stringstream os;
225
- os << " Tensor [" << tensor.name << " ]\n " ;
226
- os << " - type: " ;
227
- switch (tensor.dtype ) {
228
- case PaddleDType::FLOAT32:
229
- os << " float32" ;
230
- break ;
231
- case PaddleDType::INT64:
232
- os << " int64" ;
233
- break ;
234
- default :
235
- os << " unset" ;
236
- }
237
- os << ' \n ' ;
238
-
239
- os << " - shape: " << to_string (tensor.shape ) << ' \n ' ;
240
- os << " - lod: " ;
241
- for (auto &l : tensor.lod ) {
242
- os << to_string (l) << " ; " ;
243
- }
244
- os << " \n " ;
245
- os << " - data: " ;
246
-
247
- int dim = std::accumulate (tensor.shape .begin (), tensor.shape .end (), 1 ,
248
- [](int a, int b) { return a * b; });
249
- for (int i = 0 ; i < dim; i++) {
250
- os << static_cast <float *>(tensor.data .data ())[i] << " " ;
251
- }
252
- os << ' \n ' ;
253
- return os.str ();
254
- }
255
-
256
224
} // namespace
257
225
258
226
const float ditu_rnn_target_data[] = {
@@ -266,11 +234,29 @@ const float ditu_rnn_target_data[] = {
266
234
10.7286 , 12.0595 , 10.6672 , 0 , 0 , 0 , 0 , 0 ,
267
235
93.5771 , 3.84641 , 0 , 0 , 0 , 0 , 0 , 0 ,
268
236
169.426 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
237
+ void CompareResult (const std::vector<PaddleTensor> &outputs,
238
+ const std::vector<PaddleTensor> &base_outputs) {
239
+ PADDLE_ENFORCE_GT (outputs.size (), 0 );
240
+ PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
241
+ for (size_t i = 0 ; i < outputs.size (); i++) {
242
+ auto &out = outputs[i];
243
+ auto &base_out = base_outputs[i];
244
+ size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
245
+ [](int a, int b) { return a * b; });
246
+ size_t size1 = std::accumulate (base_out.shape .begin (), base_out.shape .end (),
247
+ 1 , [](int a, int b) { return a * b; });
248
+ PADDLE_ENFORCE_EQ (size, size1);
249
+ PADDLE_ENFORCE_GT (size, 0 );
250
+ float *data = static_cast <float *>(out.data .data ());
251
+ float *base_data = static_cast <float *>(base_out.data .data ());
252
+ for (size_t i = 0 ; i < size; i++) {
253
+ EXPECT_NEAR (data[i], base_data[i], 1e-3 );
254
+ }
255
+ }
256
+ }
269
257
// Test with a really complicate model.
270
- void TestDituRNNPrediction (const std::string &model_path,
271
- const std::string &data_path, int batch_size,
272
- bool use_analysis, bool activate_ir,
273
- int num_times = 1 ) {
258
+ void TestDituRNNPrediction (bool use_analysis, bool activate_ir,
259
+ int num_threads) {
274
260
AnalysisConfig config;
275
261
config.prog_file = FLAGS_infer_ditu_rnn_model + " /__model__" ;
276
262
config.param_file = FLAGS_infer_ditu_rnn_model + " /param" ;
@@ -281,47 +267,64 @@ void TestDituRNNPrediction(const std::string &model_path,
281
267
PADDLE_ENFORCE (config.ir_mode ==
282
268
AnalysisConfig::IrPassMode::kExclude ); // default
283
269
config.ir_passes .clear (); // Do not exclude any pass.
270
+ int batch_size = FLAGS_batch_size;
271
+ int num_times = FLAGS_repeat;
284
272
285
273
auto base_predictor =
286
274
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative >(config);
287
275
auto predictor =
288
276
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis >(
289
277
config);
290
278
std::vector<PaddleTensor> input_slots;
291
- DataRecord data (data_path , batch_size);
279
+ DataRecord data (FLAGS_infer_ditu_rnn_data , batch_size);
292
280
// Prepare inputs.
293
281
PrepareInputs (&input_slots, &data, batch_size);
294
282
std::vector<PaddleTensor> outputs, base_outputs;
295
283
296
284
base_predictor->Run (input_slots, &base_outputs);
297
285
298
- Timer timer;
299
- timer.tic ();
300
- for (int i = 0 ; i < num_times; i++) {
301
- predictor->Run (input_slots, &outputs);
302
- }
303
286
LOG (INFO) << " ===========profile result===========" ;
304
- LOG (INFO) << " batch_size: " << batch_size << " , repeat: " << num_times
305
- << " , latency: " << timer.toc () / num_times << " ms" ;
306
- LOG (INFO) << " =====================================" ;
307
-
308
- PADDLE_ENFORCE_GT (outputs.size (), 0 );
309
- PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
310
- for (size_t i = 0 ; i < outputs.size (); i++) {
311
- auto &out = outputs[i];
312
- auto &base_out = base_outputs[i];
313
- size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
314
- [](int a, int b) { return a * b; });
315
- size_t size1 = std::accumulate (base_out.shape .begin (), base_out.shape .end (),
316
- 1 , [](int a, int b) { return a * b; });
317
- PADDLE_ENFORCE_EQ (size, size1);
318
- PADDLE_ENFORCE_GT (size, 0 );
319
- float *data = static_cast <float *>(out.data .data ());
320
- float *base_data = static_cast <float *>(base_out.data .data ());
321
- for (size_t j = 0 ; j < size; j++) {
322
- EXPECT_NEAR (data[j], base_data[j], 1e-3 );
287
+ if (num_threads == 1 ) {
288
+ // Prepare inputs.
289
+ Timer timer;
290
+ timer.tic ();
291
+ for (int i = 0 ; i < num_times; i++) {
292
+ predictor->Run (input_slots, &outputs);
293
+ }
294
+ PrintTime (batch_size, num_times, 1 , 0 , timer.toc () / num_times);
295
+ CompareResult (outputs, base_outputs);
296
+ } else {
297
+ std::vector<std::thread> threads;
298
+ std::vector<std::unique_ptr<PaddlePredictor>> predictors;
299
+ // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
300
+ // because AttentionLSTM's hard code nodeid will be damanged.
301
+ for (int tid = 0 ; tid < num_threads; ++tid) {
302
+ predictors.emplace_back (
303
+ CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis >(
304
+ config));
305
+ }
306
+ for (int tid = 0 ; tid < num_threads; ++tid) {
307
+ threads.emplace_back ([&, tid]() {
308
+ // Each thread should have local input_slots and outputs.
309
+ std::vector<PaddleTensor> input_slots;
310
+ DataRecord data (FLAGS_infer_ditu_rnn_data, batch_size);
311
+ PrepareInputs (&input_slots, &data, batch_size);
312
+ std::vector<PaddleTensor> outputs;
313
+ Timer timer;
314
+ timer.tic ();
315
+ for (int i = 0 ; i < num_times; i++) {
316
+ predictors[tid]->Run (input_slots, &outputs);
317
+ }
318
+ PrintTime (batch_size, num_times, num_threads, tid,
319
+ timer.toc () / num_times);
320
+ CompareResult (outputs, base_outputs);
321
+ });
322
+ }
323
+ for (int i = 0 ; i < num_threads; ++i) {
324
+ threads[i].join ();
323
325
}
324
326
}
327
+ LOG (INFO) << " =====================================" ;
325
328
326
329
if (use_analysis && activate_ir) {
327
330
AnalysisPredictor *analysis_predictor =
@@ -350,25 +353,26 @@ void TestDituRNNPrediction(const std::string &model_path,
350
353
}
351
354
}
352
355
353
- // Directly infer with the original model.
354
- TEST (Analyzer, DituRNN_without_analysis) {
355
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
356
- FLAGS_batch_size, false , false , FLAGS_repeat);
356
+ // Inference with analysis and IR, easy for profiling independently.
357
+ TEST (Analyzer, DituRNN) {
358
+ TestDituRNNPrediction (true , true , FLAGS_num_threads);
357
359
}
358
360
359
- // Inference with the original model with the analysis turned on, the analysis
360
- // module will transform the program to a data flow graph.
361
- TEST (Analyzer, DituRNN_with_analysis) {
362
- LOG (INFO) << " ditu rnn with analysis" ;
363
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
364
- FLAGS_batch_size, true , false , FLAGS_repeat);
365
- }
366
-
367
- // Inference with analysis and IR. The IR module will fuse some large kernels.
368
- TEST (Analyzer, DituRNN_with_analysis_with_IR) {
369
- LOG (INFO) << " ditu rnn with analysis and IR fuse" ;
370
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
371
- FLAGS_batch_size, true , true , FLAGS_repeat);
361
+ // Other unit-tests of DituRNN, test different options of use_analysis,
362
+ // activate_ir and multi-threads.
363
+ TEST (Analyzer, DituRNN_tests) {
364
+ int num_threads[2 ] = {1 , 4 };
365
+ for (auto i : num_threads) {
366
+ // Directly infer with the original model.
367
+ TestDituRNNPrediction (false , false , i);
368
+ // Inference with the original model with the analysis turned on, the
369
+ // analysis
370
+ // module will transform the program to a data flow graph.
371
+ TestDituRNNPrediction (true , false , i);
372
+ // Inference with analysis and IR. The IR module will fuse some large
373
+ // kernels.
374
+ TestDituRNNPrediction (true , true , i);
375
+ }
372
376
}
373
377
374
378
} // namespace analysis
0 commit comments