@@ -160,7 +160,7 @@ TEST(paddle_inference_api_native_multithreads, word2vec) {
160
160
config.use_gpu = false ;
161
161
auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
162
162
163
- // prepare inputs data
163
+ // prepare inputs data and reference results
164
164
constexpr int num_jobs = 3 ;
165
165
std::vector<std::vector<framework::LoDTensor>> jobs (num_jobs);
166
166
std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds (num_jobs);
@@ -204,13 +204,64 @@ TEST(paddle_inference_api_native_multithreads, word2vec) {
204
204
205
205
// check outputs correctness
206
206
float * ref_data = refs[tid].data <float >();
207
- EXPECT_EQ (refs[tid].numel (), len / sizeof (float ));
207
+ EXPECT_EQ (refs[tid].numel (), static_cast < int64_t >( len / sizeof (float ) ));
208
208
for (int i = 0 ; i < refs[tid].numel (); ++i) {
209
- EXPECT_LT (ref_data[i] - data[i], 1e-3 );
210
- EXPECT_GT (ref_data[i] - data[i], -1e-3 );
209
+ EXPECT_NEAR (ref_data[i], data[i], 1e-3 );
211
210
}
211
+ free (data);
212
+ });
213
+ }
214
+ for (int i = 0 ; i < num_jobs; ++i) {
215
+ threads[i].join ();
216
+ }
217
+ }
218
+
219
+ TEST (paddle_inference_api_native_multithreads, image_classification) {
220
+ constexpr int num_jobs = 4 ; // each job run 1 batch
221
+ constexpr int batch_size = 1 ;
222
+ NativeConfig config = GetConfig ();
223
+ config.use_gpu = false ;
224
+ config.model_dir =
225
+ FLAGS_dirname + " image_classification_resnet.inference.model" ;
226
+
227
+ auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
228
+ std::vector<framework::LoDTensor> jobs (num_jobs);
229
+ std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds (num_jobs);
230
+ std::vector<framework::LoDTensor> refs (num_jobs);
231
+ for (size_t i = 0 ; i < jobs.size (); ++i) {
232
+ // prepare inputs
233
+ std::vector<std::vector<int64_t >> feed_target_shapes =
234
+ GetFeedTargetShapes (config.model_dir , /* is_combined*/ false );
235
+ feed_target_shapes[0 ][0 ] = batch_size;
236
+ framework::DDim input_dims = framework::make_ddim (feed_target_shapes[0 ]);
237
+ SetupTensor<float >(&jobs[i], input_dims, 0 .f , 1 .f );
238
+ paddle_tensor_feeds[i].push_back (LodTensorToPaddleTensor (&jobs[i]));
239
+
240
+ // get reference result of each job
241
+ std::vector<framework::LoDTensor*> ref_feeds (1 , &jobs[i]);
242
+ std::vector<framework::LoDTensor*> ref_fetches (1 , &refs[i]);
243
+ TestInference<platform::CPUPlace>(config.model_dir , ref_feeds, ref_fetches);
244
+ }
212
245
213
- free (local_outputs[0 ].data .data );
246
+ // create threads and each thread run 1 job
247
+ std::vector<std::thread> threads;
248
+ for (int tid = 0 ; tid < num_jobs; ++tid) {
249
+ threads.emplace_back ([&, tid]() {
250
+ auto predictor = main_predictor->Clone ();
251
+ auto & local_inputs = paddle_tensor_feeds[tid];
252
+ std::vector<PaddleTensor> local_outputs;
253
+ ASSERT_TRUE (predictor->Run (local_inputs, &local_outputs));
254
+
255
+ // check outputs correctness
256
+ ASSERT_EQ (local_outputs.size (), 1UL );
257
+ const size_t len = local_outputs[0 ].data .length ;
258
+ float * data = static_cast <float *>(local_outputs[0 ].data .data );
259
+ float * ref_data = refs[tid].data <float >();
260
+ EXPECT_EQ (refs[tid].numel (), len / sizeof (float ));
261
+ for (int i = 0 ; i < refs[tid].numel (); ++i) {
262
+ EXPECT_NEAR (ref_data[i], data[i], 1e-3 );
263
+ }
264
+ free (data);
214
265
});
215
266
}
216
267
for (int i = 0 ; i < num_jobs; ++i) {
0 commit comments