@@ -15,6 +15,8 @@ limitations under the License. */
15
15
#include < glog/logging.h>
16
16
#include < gtest/gtest.h>
17
17
18
+ #include < thread>
19
+
18
20
#include " gflags/gflags.h"
19
21
#include " paddle/contrib/inference/paddle_inference_api_impl.h"
20
22
#include " paddle/fluid/inference/tests/test_helper.h"
@@ -45,7 +47,11 @@ NativeConfig GetConfig() {
45
47
config.model_dir = FLAGS_dirname + " word2vec.inference.model" ;
46
48
LOG (INFO) << " dirname " << config.model_dir ;
47
49
config.fraction_of_gpu_memory = 0.15 ;
50
+ #ifdef PADDLE_WITH_CUDA
48
51
config.use_gpu = true ;
52
+ #else
53
+ config.use_gpu = false ;
54
+ #endif
49
55
config.device = 0 ;
50
56
return config;
51
57
}
@@ -149,4 +155,67 @@ TEST(paddle_inference_api_impl, image_classification) {
149
155
free (data);
150
156
}
151
157
158
+ TEST (paddle_inference_api_native_multithreads, word2vec) {
159
+ NativeConfig config = GetConfig ();
160
+ config.use_gpu = false ;
161
+ auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
162
+
163
+ // prepare inputs data
164
+ constexpr int num_jobs = 3 ;
165
+ std::vector<std::vector<framework::LoDTensor>> jobs (num_jobs);
166
+ std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds (num_jobs);
167
+ std::vector<framework::LoDTensor> refs (num_jobs);
168
+ for (size_t i = 0 ; i < jobs.size (); ++i) {
169
+ // each job has 4 words
170
+ jobs[i].resize (4 );
171
+ for (size_t j = 0 ; j < 4 ; ++j) {
172
+ framework::LoD lod{{0 , 1 }};
173
+ int64_t dict_size = 2073 ; // The size of dictionary
174
+ SetupLoDTensor (&jobs[i][j], lod, static_cast <int64_t >(0 ), dict_size - 1 );
175
+ paddle_tensor_feeds[i].push_back (LodTensorToPaddleTensor (&jobs[i][j]));
176
+ }
177
+
178
+ // get reference result of each job
179
+ std::vector<paddle::framework::LoDTensor*> ref_feeds;
180
+ std::vector<paddle::framework::LoDTensor*> ref_fetches (1 , &refs[i]);
181
+ for (auto & word : jobs[i]) {
182
+ ref_feeds.push_back (&word);
183
+ }
184
+ TestInference<platform::CPUPlace>(config.model_dir , ref_feeds, ref_fetches);
185
+ }
186
+
187
+ // create threads and each thread run 1 job
188
+ std::vector<std::thread> threads;
189
+ for (int tid = 0 ; tid < num_jobs; ++tid) {
190
+ threads.emplace_back ([&, tid]() {
191
+ auto predictor = main_predictor->Clone ();
192
+ auto & local_inputs = paddle_tensor_feeds[tid];
193
+ std::vector<PaddleTensor> local_outputs;
194
+ ASSERT_TRUE (predictor->Run (local_inputs, &local_outputs));
195
+
196
+ // check outputs range
197
+ ASSERT_EQ (local_outputs.size (), 1UL );
198
+ const size_t len = local_outputs[0 ].data .length ;
199
+ float * data = static_cast <float *>(local_outputs[0 ].data .data );
200
+ for (size_t j = 0 ; j < len / sizeof (float ); ++j) {
201
+ ASSERT_LT (data[j], 1.0 );
202
+ ASSERT_GT (data[j], -1.0 );
203
+ }
204
+
205
+ // check outputs correctness
206
+ float * ref_data = refs[tid].data <float >();
207
+ EXPECT_EQ (refs[tid].numel (), len / sizeof (float ));
208
+ for (int i = 0 ; i < refs[tid].numel (); ++i) {
209
+ EXPECT_LT (ref_data[i] - data[i], 1e-3 );
210
+ EXPECT_GT (ref_data[i] - data[i], -1e-3 );
211
+ }
212
+
213
+ free (local_outputs[0 ].data .data );
214
+ });
215
+ }
216
+ for (int i = 0 ; i < num_jobs; ++i) {
217
+ threads[i].join ();
218
+ }
219
+ }
220
+
152
221
} // namespace paddle
0 commit comments