@@ -234,6 +234,26 @@ const float ditu_rnn_target_data[] = {
234
234
10.7286 , 12.0595 , 10.6672 , 0 , 0 , 0 , 0 , 0 ,
235
235
93.5771 , 3.84641 , 0 , 0 , 0 , 0 , 0 , 0 ,
236
236
169.426 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
237
+ void CompareResult (const std::vector<PaddleTensor> &outputs,
238
+ const std::vector<PaddleTensor> &base_outputs) {
239
+ PADDLE_ENFORCE_GT (outputs.size (), 0 );
240
+ PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
241
+ for (size_t i = 0 ; i < outputs.size (); i++) {
242
+ auto &out = outputs[i];
243
+ auto &base_out = base_outputs[i];
244
+ size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
245
+ [](int a, int b) { return a * b; });
246
+ size_t size1 = std::accumulate (base_out.shape .begin (), base_out.shape .end (),
247
+ 1 , [](int a, int b) { return a * b; });
248
+ PADDLE_ENFORCE_EQ (size, size1);
249
+ PADDLE_ENFORCE_GT (size, 0 );
250
+ float *data = static_cast <float *>(out.data .data ());
251
+ float *base_data = static_cast <float *>(base_out.data .data ());
252
+ for (size_t i = 0 ; i < size; i++) {
253
+ EXPECT_NEAR (data[i], base_data[i], 1e-3 );
254
+ }
255
+ }
256
+ }
237
257
// Test with a really complicate model.
238
258
void TestDituRNNPrediction (bool use_analysis_and_activate_ir = false ,
239
259
int num_threads = FLAGS_num_threads) {
@@ -266,7 +286,8 @@ void TestDituRNNPrediction(bool use_analysis_and_activate_ir = false,
266
286
for (int i = 0 ; i < num_times; i++) {
267
287
predictor->Run (input_slots, &outputs);
268
288
}
269
- print_time (batch_size, num_times, 1 , 0 , timer.toc () / num_times);
289
+ PrintTime (batch_size, num_times, 1 , 0 , timer.toc () / num_times);
290
+ CompareResult (outputs, base_outputs);
270
291
} else {
271
292
std::vector<std::thread> threads;
272
293
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
@@ -279,13 +300,19 @@ void TestDituRNNPrediction(bool use_analysis_and_activate_ir = false,
279
300
}
280
301
for (int tid = 0 ; tid < num_threads; ++tid) {
281
302
threads.emplace_back ([&, tid]() {
303
+ // Each thread should have local input_slots and outputs.
304
+ std::vector<PaddleTensor> input_slots;
305
+ DataRecord data (FLAGS_infer_ditu_rnn_data, batch_size);
306
+ PrepareInputs (&input_slots, &data, batch_size);
307
+ std::vector<PaddleTensor> outputs;
282
308
Timer timer;
283
309
timer.tic ();
284
310
for (int i = 0 ; i < num_times; i++) {
285
311
predictors[tid]->Run (input_slots, &outputs);
286
312
}
287
- print_time (batch_size, num_times, num_threads, tid,
288
- timer.toc () / num_times);
313
+ PrintTime (batch_size, num_times, num_threads, tid,
314
+ timer.toc () / num_times);
315
+ CompareResult (outputs, base_outputs);
289
316
});
290
317
}
291
318
for (int i = 0 ; i < num_threads; ++i) {
@@ -294,27 +321,6 @@ void TestDituRNNPrediction(bool use_analysis_and_activate_ir = false,
294
321
}
295
322
LOG (INFO) << " =====================================" ;
296
323
297
- if (num_threads == 1 ) {
298
- PADDLE_ENFORCE_GT (outputs.size (), 0 );
299
- PADDLE_ENFORCE_EQ (outputs.size (), base_outputs.size ());
300
- for (size_t i = 0 ; i < outputs.size (); i++) {
301
- auto &out = outputs[i];
302
- auto &base_out = base_outputs[i];
303
- size_t size = std::accumulate (out.shape .begin (), out.shape .end (), 1 ,
304
- [](int a, int b) { return a * b; });
305
- size_t size1 =
306
- std::accumulate (base_out.shape .begin (), base_out.shape .end (), 1 ,
307
- [](int a, int b) { return a * b; });
308
- PADDLE_ENFORCE_EQ (size, size1);
309
- PADDLE_ENFORCE_GT (size, 0 );
310
- float *data = static_cast <float *>(out.data .data ());
311
- float *base_data = static_cast <float *>(base_out.data .data ());
312
- for (size_t i = 0 ; i < size; i++) {
313
- EXPECT_NEAR (data[i], base_data[i], 1e-3 );
314
- }
315
- }
316
- }
317
-
318
324
if (use_analysis_and_activate_ir) {
319
325
AnalysisPredictor *analysis_predictor =
320
326
dynamic_cast <AnalysisPredictor *>(predictor.get ());
@@ -342,13 +348,13 @@ void TestDituRNNPrediction(bool use_analysis_and_activate_ir = false,
342
348
}
343
349
}
344
350
345
- TEST (Analyzer, DituRNN) {
346
- // default FLAGS_num_threads = 1
347
- TestDituRNNPrediction (false , FLAGS_num_threads);
348
- TestDituRNNPrediction (true , FLAGS_num_threads);
349
- }
351
+ // basic unit-test of DituRNN, easy for profiling independently.
352
+ TEST (Analyzer, DituRNN) { TestDituRNNPrediction (false , FLAGS_num_threads); }
350
353
354
+ // advance unit-test of DituRNN, test use_analysis_and_activate_ir and
355
+ // multi-threads.
351
356
TEST (Analyzer, DituRNN_multi_thread) {
357
+ TestDituRNNPrediction (true , 1 );
352
358
TestDituRNNPrediction (false , 4 );
353
359
TestDituRNNPrediction (true , 4 );
354
360
}
0 commit comments