16
16
17
17
#include < google/protobuf/text_format.h>
18
18
#include < gtest/gtest.h>
19
+ #include < thread> // NOLINT
19
20
#include " paddle/fluid/framework/ir/fuse_pass_base.h"
20
21
#include " paddle/fluid/framework/ir/pass.h"
21
22
#include " paddle/fluid/inference/analysis/ut_helper.h"
22
23
#include " paddle/fluid/inference/api/analysis_predictor.h"
23
24
#include " paddle/fluid/inference/api/helper.h"
24
25
#include " paddle/fluid/inference/api/paddle_inference_api.h"
25
26
#include " paddle/fluid/inference/utils/singleton.h"
26
- #include " paddle/fluid/platform/profiler.h"
27
27
28
28
DEFINE_string (infer_ditu_rnn_model, " " , " model path for ditu RNN" );
29
29
DEFINE_string (infer_ditu_rnn_data, " " , " data path for ditu RNN" );
30
30
DEFINE_int32 (batch_size, 10 , " batch size." );
31
31
DEFINE_int32 (repeat, 1 , " Running the inference program repeat times." );
32
+ DEFINE_int32 (num_threads, 1 , " Running the inference program in multi-threads." );
32
33
33
34
namespace paddle {
34
35
namespace inference {
35
36
namespace analysis {
36
37
37
- using namespace framework ;
38
-
39
38
TEST (Analyzer, analysis_without_tensorrt) {
40
39
FLAGS_IA_enable_tensorrt_subgraph_engine = false ;
41
40
Argument argument;
@@ -219,39 +218,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
219
218
}
220
219
}
221
220
222
- std::string DescribeTensor (const PaddleTensor &tensor) {
223
- std::stringstream os;
224
- os << " Tensor [" << tensor.name << " ]\n " ;
225
- os << " - type: " ;
226
- switch (tensor.dtype ) {
227
- case PaddleDType::FLOAT32:
228
- os << " float32" ;
229
- break ;
230
- case PaddleDType::INT64:
231
- os << " int64" ;
232
- break ;
233
- default :
234
- os << " unset" ;
235
- }
236
- os << ' \n ' ;
237
-
238
- os << " - shape: " << to_string (tensor.shape ) << ' \n ' ;
239
- os << " - lod: " ;
240
- for (auto &l : tensor.lod ) {
241
- os << to_string (l) << " ; " ;
242
- }
243
- os << " \n " ;
244
- os << " - data: " ;
245
-
246
- int dim = std::accumulate (tensor.shape .begin (), tensor.shape .end (), 1 ,
247
- [](int a, int b) { return a * b; });
248
- for (int i = 0 ; i < dim; i++) {
249
- os << static_cast <float *>(tensor.data .data ())[i] << " " ;
250
- }
251
- os << ' \n ' ;
252
- return os.str ();
253
- }
254
-
255
221
} // namespace
256
222
257
223
const float ditu_rnn_target_data[] = {
@@ -266,58 +232,92 @@ const float ditu_rnn_target_data[] = {
266
232
93.5771 , 3.84641 , 0 , 0 , 0 , 0 , 0 , 0 ,
267
233
169.426 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
268
234
// Test with a really complicate model.
269
- void TestDituRNNPrediction (const std::string &model_path,
270
- const std::string &data_path, int batch_size,
271
- bool use_analysis, bool activate_ir,
272
- int num_times = 1 ) {
235
+ void TestDituRNNPrediction (bool use_analysis_and_activate_ir = false ,
236
+ int num_threads = FLAGS_num_threads) {
273
237
NativeConfig config;
274
238
config.prog_file = FLAGS_infer_ditu_rnn_model + " /__model__" ;
275
239
config.param_file = FLAGS_infer_ditu_rnn_model + " /param" ;
276
240
config.use_gpu = false ;
277
241
config.device = 0 ;
278
242
config.specify_input_name = true ;
243
+ int batch_size = FLAGS_batch_size;
244
+ int num_times = FLAGS_repeat;
279
245
280
246
auto base_predictor =
281
247
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative >(config);
282
248
auto predictor =
283
249
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kAnalysis >(config);
284
250
std::vector<PaddleTensor> input_slots;
285
- DataRecord data (data_path , batch_size);
251
+ DataRecord data (FLAGS_infer_ditu_rnn_data , batch_size);
286
252
// Prepare inputs.
287
253
PrepareInputs (&input_slots, &data, batch_size);
288
254
std::vector<PaddleTensor> outputs, base_outputs;
289
255
290
256
base_predictor->Run (input_slots, &base_outputs);
291
257
292
- Timer timer;
293
- timer.tic ();
294
- for (int i = 0 ; i < num_times; i++) {
295
- predictor->Run (input_slots, &outputs);
296
- }
297
258
LOG (INFO) << " ===========profile result===========" ;
298
- LOG (INFO) << " batch_size: " << batch_size << " , repeat: " << num_times
299
- << " , latency: " << timer.toc () / num_times << " ms" ;
259
+ if (num_threads == 1 ) {
260
+ std::vector<PaddleTensor> input_slots;
261
+ // Prepare inputs.
262
+ DataRecord data (FLAGS_infer_ditu_rnn_data, batch_size);
263
+ PrepareInputs (&input_slots, &data, batch_size);
264
+
265
+ Timer timer;
266
+ timer.tic ();
267
+ for (int i = 0 ; i < num_times; i++) {
268
+ predictor->Run (input_slots, &outputs);
269
+ }
270
+ print_time (batch_size, num_times, 1 , 0 , timer.toc () / num_times);
271
+ } else {
272
+ std::vector<std::thread> threads;
273
+ std::vector<PaddleTensor> input_slots;
274
+ // Prepare inputs.
275
+ PrepareInputs (&input_slots, &data, batch_size);
276
+ std::vector<PaddleTensor> outputs;
277
+ for (int tid = 0 ; tid < num_threads; ++tid) {
278
+ threads.emplace_back ([&, tid]() {
279
+ auto predictor_tid =
280
+ CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kAnalysis >(
281
+ config);
282
+ DataRecord data (FLAGS_infer_ditu_rnn_data, batch_size);
283
+
284
+ Timer timer;
285
+ timer.tic ();
286
+ for (int i = 0 ; i < num_times; i++) {
287
+ predictor_tid->Run (input_slots, &outputs);
288
+ }
289
+ print_time (batch_size, num_times, num_threads, tid,
290
+ timer.toc () / num_times);
291
+ });
292
+ }
293
+ for (int i = 0 ; i < num_threads; ++i) {
294
+ threads[i].join ();
295
+ }
296
+ }
300
297
LOG (INFO) << " =====================================" ;
301
298
302
- PADDLE_ENFORCE_GT (outputs.size (), 0 );
303
- PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
304
- for (size_t i = 0 ; i < outputs.size (); i++) {
305
- auto &out = outputs[i];
306
- auto &base_out = base_outputs[i];
307
- size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
308
- [](int a, int b) { return a * b; });
309
- size_t size1 = std::accumulate (base_out.shape .begin (), base_out.shape .end (),
310
- 1 , [](int a, int b) { return a * b; });
311
- PADDLE_ENFORCE_EQ (size, size1);
312
- PADDLE_ENFORCE_GT (size, 0 );
313
- float *data = static_cast <float *>(out.data .data ());
314
- float *base_data = static_cast <float *>(base_out.data .data ());
315
- for (size_t i = 0 ; i < size; i++) {
316
- EXPECT_NEAR (data[i], base_data[i], 1e-3 );
299
+ if (num_threads == 1 ) {
300
+ PADDLE_ENFORCE_GT (outputs.size (), 0 );
301
+ PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
302
+ for (size_t i = 0 ; i < outputs.size (); i++) {
303
+ auto &out = outputs[i];
304
+ auto &base_out = base_outputs[i];
305
+ size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
306
+ [](int a, int b) { return a * b; });
307
+ size_t size1 =
308
+ std::accumulate (base_out.shape .begin (), base_out.shape .end (), 1 ,
309
+ [](int a, int b) { return a * b; });
310
+ PADDLE_ENFORCE_EQ (size, size1);
311
+ PADDLE_ENFORCE_GT (size, 0 );
312
+ float *data = static_cast <float *>(out.data .data ());
313
+ float *base_data = static_cast <float *>(base_out.data .data ());
314
+ for (size_t i = 0 ; i < size; i++) {
315
+ EXPECT_NEAR (data[i], base_data[i], 1e-3 );
316
+ }
317
317
}
318
318
}
319
319
320
- if (use_analysis && activate_ir ) {
320
+ if (use_analysis_and_activate_ir ) {
321
321
AnalysisPredictor *analysis_predictor =
322
322
dynamic_cast <AnalysisPredictor *>(predictor.get ());
323
323
auto &fuse_statis = analysis_predictor->analysis_argument ()
@@ -334,23 +334,16 @@ void TestDituRNNPrediction(const std::string &model_path,
334
334
335
335
// Directly infer with the original model.
336
336
TEST (Analyzer, DituRNN_without_analysis) {
337
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
338
- FLAGS_batch_size, false , false , FLAGS_repeat);
339
- }
340
-
341
- // Inference with the original model with the analysis turned on, the analysis
342
- // module will transform the program to a data flow graph.
343
- TEST (Analyzer, DituRNN_with_analysis) {
344
- LOG (INFO) << " ditu rnn with analysis" ;
345
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
346
- FLAGS_batch_size, true , false , FLAGS_repeat);
337
+ LOG (INFO) << " ditu rnn without analysis" ;
338
+ TestDituRNNPrediction (false , 1 );
339
+ TestDituRNNPrediction (false , 4 ); // multi-threads
347
340
}
348
341
349
342
// Inference with analysis and IR. The IR module will fuse some large kernels.
350
343
TEST (Analyzer, DituRNN_with_analysis_with_IR) {
351
344
LOG (INFO) << " ditu rnn with analysis and IR fuse" ;
352
- TestDituRNNPrediction (FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
353
- FLAGS_batch_size, true , true , FLAGS_repeat);
345
+ TestDituRNNPrediction (true , 1 );
346
+ TestDituRNNPrediction ( true , 4 ); // multi-threads
354
347
}
355
348
356
349
} // namespace analysis
0 commit comments