@@ -15,6 +15,8 @@ limitations under the License. */
15
15
#include < glog/logging.h>
16
16
#include < gtest/gtest.h>
17
17
18
+ #include < thread>
19
+
18
20
#include " gflags/gflags.h"
19
21
#include " paddle/contrib/inference/paddle_inference_api_impl.h"
20
22
#include " paddle/fluid/inference/tests/test_helper.h"
@@ -45,14 +47,19 @@ NativeConfig GetConfig() {
45
47
config.model_dir = FLAGS_dirname + " word2vec.inference.model" ;
46
48
LOG (INFO) << " dirname " << config.model_dir ;
47
49
config.fraction_of_gpu_memory = 0.15 ;
50
+ #ifdef PADDLE_WITH_CUDA
48
51
config.use_gpu = true ;
52
+ #else
53
+ config.use_gpu = false ;
54
+ #endif
49
55
config.device = 0 ;
50
56
return config;
51
57
}
52
58
53
- TEST (paddle_inference_api_impl, word2vec ) {
59
+ void MainWord2Vec ( bool use_gpu ) {
54
60
NativeConfig config = GetConfig ();
55
61
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
62
+ config.use_gpu = use_gpu;
56
63
57
64
framework::LoDTensor first_word, second_word, third_word, fourth_word;
58
65
framework::LoD lod{{0 , 1 }};
@@ -100,11 +107,12 @@ TEST(paddle_inference_api_impl, word2vec) {
100
107
free (outputs[0 ].data .data );
101
108
}
102
109
103
- TEST (paddle_inference_api_impl, image_classification ) {
110
+ void MainImageClassification ( bool use_gpu ) {
104
111
int batch_size = 2 ;
105
112
bool use_mkldnn = false ;
106
113
bool repeat = false ;
107
114
NativeConfig config = GetConfig ();
115
+ config.use_gpu = use_gpu;
108
116
config.model_dir =
109
117
FLAGS_dirname + " image_classification_resnet.inference.model" ;
110
118
@@ -149,4 +157,143 @@ TEST(paddle_inference_api_impl, image_classification) {
149
157
free (data);
150
158
}
151
159
160
+ void MainThreadsWord2Vec (bool use_gpu) {
161
+ NativeConfig config = GetConfig ();
162
+ config.use_gpu = use_gpu;
163
+ auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
164
+
165
+ // prepare inputs data and reference results
166
+ constexpr int num_jobs = 3 ;
167
+ std::vector<std::vector<framework::LoDTensor>> jobs (num_jobs);
168
+ std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds (num_jobs);
169
+ std::vector<framework::LoDTensor> refs (num_jobs);
170
+ for (size_t i = 0 ; i < jobs.size (); ++i) {
171
+ // each job has 4 words
172
+ jobs[i].resize (4 );
173
+ for (size_t j = 0 ; j < 4 ; ++j) {
174
+ framework::LoD lod{{0 , 1 }};
175
+ int64_t dict_size = 2073 ; // The size of dictionary
176
+ SetupLoDTensor (&jobs[i][j], lod, static_cast <int64_t >(0 ), dict_size - 1 );
177
+ paddle_tensor_feeds[i].push_back (LodTensorToPaddleTensor (&jobs[i][j]));
178
+ }
179
+
180
+ // get reference result of each job
181
+ std::vector<paddle::framework::LoDTensor*> ref_feeds;
182
+ std::vector<paddle::framework::LoDTensor*> ref_fetches (1 , &refs[i]);
183
+ for (auto & word : jobs[i]) {
184
+ ref_feeds.push_back (&word);
185
+ }
186
+ TestInference<platform::CPUPlace>(config.model_dir , ref_feeds, ref_fetches);
187
+ }
188
+
189
+ // create threads and each thread run 1 job
190
+ std::vector<std::thread> threads;
191
+ for (int tid = 0 ; tid < num_jobs; ++tid) {
192
+ threads.emplace_back ([&, tid]() {
193
+ auto predictor = main_predictor->Clone ();
194
+ auto & local_inputs = paddle_tensor_feeds[tid];
195
+ std::vector<PaddleTensor> local_outputs;
196
+ ASSERT_TRUE (predictor->Run (local_inputs, &local_outputs));
197
+
198
+ // check outputs range
199
+ ASSERT_EQ (local_outputs.size (), 1UL );
200
+ const size_t len = local_outputs[0 ].data .length ;
201
+ float * data = static_cast <float *>(local_outputs[0 ].data .data );
202
+ for (size_t j = 0 ; j < len / sizeof (float ); ++j) {
203
+ ASSERT_LT (data[j], 1.0 );
204
+ ASSERT_GT (data[j], -1.0 );
205
+ }
206
+
207
+ // check outputs correctness
208
+ float * ref_data = refs[tid].data <float >();
209
+ EXPECT_EQ (refs[tid].numel (), static_cast <int64_t >(len / sizeof (float )));
210
+ for (int i = 0 ; i < refs[tid].numel (); ++i) {
211
+ EXPECT_NEAR (ref_data[i], data[i], 1e-3 );
212
+ }
213
+ free (data);
214
+ });
215
+ }
216
+ for (int i = 0 ; i < num_jobs; ++i) {
217
+ threads[i].join ();
218
+ }
219
+ }
220
+
221
+ void MainThreadsImageClassification (bool use_gpu) {
222
+ constexpr int num_jobs = 4 ; // each job run 1 batch
223
+ constexpr int batch_size = 1 ;
224
+ NativeConfig config = GetConfig ();
225
+ config.use_gpu = use_gpu;
226
+ config.model_dir =
227
+ FLAGS_dirname + " image_classification_resnet.inference.model" ;
228
+
229
+ auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
230
+ std::vector<framework::LoDTensor> jobs (num_jobs);
231
+ std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds (num_jobs);
232
+ std::vector<framework::LoDTensor> refs (num_jobs);
233
+ for (size_t i = 0 ; i < jobs.size (); ++i) {
234
+ // prepare inputs
235
+ std::vector<std::vector<int64_t >> feed_target_shapes =
236
+ GetFeedTargetShapes (config.model_dir , /* is_combined*/ false );
237
+ feed_target_shapes[0 ][0 ] = batch_size;
238
+ framework::DDim input_dims = framework::make_ddim (feed_target_shapes[0 ]);
239
+ SetupTensor<float >(&jobs[i], input_dims, 0 .f , 1 .f );
240
+ paddle_tensor_feeds[i].push_back (LodTensorToPaddleTensor (&jobs[i]));
241
+
242
+ // get reference result of each job
243
+ std::vector<framework::LoDTensor*> ref_feeds (1 , &jobs[i]);
244
+ std::vector<framework::LoDTensor*> ref_fetches (1 , &refs[i]);
245
+ TestInference<platform::CPUPlace>(config.model_dir , ref_feeds, ref_fetches);
246
+ }
247
+
248
+ // create threads and each thread run 1 job
249
+ std::vector<std::thread> threads;
250
+ for (int tid = 0 ; tid < num_jobs; ++tid) {
251
+ threads.emplace_back ([&, tid]() {
252
+ auto predictor = main_predictor->Clone ();
253
+ auto & local_inputs = paddle_tensor_feeds[tid];
254
+ std::vector<PaddleTensor> local_outputs;
255
+ ASSERT_TRUE (predictor->Run (local_inputs, &local_outputs));
256
+
257
+ // check outputs correctness
258
+ ASSERT_EQ (local_outputs.size (), 1UL );
259
+ const size_t len = local_outputs[0 ].data .length ;
260
+ float * data = static_cast <float *>(local_outputs[0 ].data .data );
261
+ float * ref_data = refs[tid].data <float >();
262
+ EXPECT_EQ (refs[tid].numel (), len / sizeof (float ));
263
+ for (int i = 0 ; i < refs[tid].numel (); ++i) {
264
+ EXPECT_NEAR (ref_data[i], data[i], 1e-3 );
265
+ }
266
+ free (data);
267
+ });
268
+ }
269
+ for (int i = 0 ; i < num_jobs; ++i) {
270
+ threads[i].join ();
271
+ }
272
+ }
273
+
274
+ TEST (inference_api_native, word2vec_cpu) { MainWord2Vec (false /* use_gpu*/ ); }
275
+ TEST (inference_api_native, word2vec_cpu_threads) {
276
+ MainThreadsWord2Vec (false /* use_gpu*/ );
277
+ }
278
+ TEST (inference_api_native, image_classification_cpu) {
279
+ MainThreadsImageClassification (false /* use_gpu*/ );
280
+ }
281
+ TEST (inference_api_native, image_classification_cpu_threads) {
282
+ MainThreadsImageClassification (false /* use_gpu*/ );
283
+ }
284
+
285
+ #ifdef PADDLE_WITH_CUDA
286
+ TEST (inference_api_native, word2vec_gpu) { MainWord2Vec (true /* use_gpu*/ ); }
287
+ TEST (inference_api_native, word2vec_gpu_threads) {
288
+ MainThreadsWord2Vec (true /* use_gpu*/ );
289
+ }
290
+ TEST (inference_api_native, image_classification_gpu) {
291
+ MainThreadsImageClassification (true /* use_gpu*/ );
292
+ }
293
+ TEST (inference_api_native, image_classification_gpu_threads) {
294
+ MainThreadsImageClassification (true /* use_gpu*/ );
295
+ }
296
+
297
+ #endif
298
+
152
299
} // namespace paddle
0 commit comments