Skip to content

Commit 80e882a

Browse files
authored
Merge pull request #11247 from tensor-tang/infer_api
Infer multi-threads API Demo and UT
2 parents 9141bee + e030741 commit 80e882a

File tree

2 files changed

+204
-3
lines changed

2 files changed

+204
-3
lines changed

paddle/contrib/inference/demo/simple_on_word2vec.cc

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ limitations under the License. */
1919
#include <glog/logging.h>
2020
#include <gtest/gtest.h>
2121
#include <memory>
22+
#include <thread>
2223
#include "paddle/contrib/inference/paddle_inference_api.h"
23-
2424
namespace paddle {
2525
namespace demo {
2626

@@ -61,13 +61,67 @@ void Main(bool use_gpu) {
6161
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
6262
LOG(INFO) << static_cast<float*>(outputs.front().data.data)[i];
6363
}
64+
// TODO(Superjomn): this is should be free automatically
65+
free(outputs[0].data.data);
66+
}
67+
}
68+
69+
void MainThreads(int num_threads, bool use_gpu) {
70+
// Multi-threads only support on CPU
71+
// 0. Create PaddlePredictor with a config.
72+
NativeConfig config;
73+
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
74+
config.use_gpu = use_gpu;
75+
config.fraction_of_gpu_memory = 0.15;
76+
config.device = 0;
77+
auto main_predictor =
78+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
79+
80+
std::vector<std::thread> threads;
81+
for (int tid = 0; tid < num_threads; ++tid) {
82+
threads.emplace_back([&, tid]() {
83+
// 1. clone a predictor which shares the same parameters
84+
auto predictor = main_predictor->Clone();
85+
constexpr int num_batches = 3;
86+
for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
87+
// 2. Dummy Input Data
88+
int64_t data[4] = {1, 2, 3, 4};
89+
PaddleBuf buf{.data = data, .length = sizeof(data)};
90+
PaddleTensor tensor{.name = "",
91+
.shape = std::vector<int>({4, 1}),
92+
.data = buf,
93+
.dtype = PaddleDType::INT64};
94+
std::vector<PaddleTensor> inputs(4, tensor);
95+
std::vector<PaddleTensor> outputs;
96+
// 3. Run
97+
CHECK(predictor->Run(inputs, &outputs));
98+
99+
// 4. Get output.
100+
ASSERT_EQ(outputs.size(), 1UL);
101+
LOG(INFO) << "TID: " << tid << ", "
102+
<< "output buffer size: " << outputs.front().data.length;
103+
const size_t num_elements = outputs.front().data.length / sizeof(float);
104+
// The outputs' buffers are in CPU memory.
105+
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
106+
LOG(INFO) << static_cast<float*>(outputs.front().data.data)[i];
107+
}
108+
free(outputs[0].data.data);
109+
}
110+
});
111+
}
112+
for (int i = 0; i < num_threads; ++i) {
113+
threads[i].join();
64114
}
65115
}
66116

67117
TEST(demo, word2vec_cpu) { Main(false /*use_gpu*/); }
118+
TEST(demo_multi_threads, word2vec_cpu_1) { MainThreads(1, false /*use_gpu*/); }
119+
TEST(demo_multi_threads, word2vec_cpu_4) { MainThreads(4, false /*use_gpu*/); }
68120

69121
#ifdef PADDLE_WITH_CUDA
70122
TEST(demo, word2vec_gpu) { Main(true /*use_gpu*/); }
123+
TEST(demo_multi_threads, word2vec_gpu_1) { MainThreads(1, true /*use_gpu*/); }
124+
TEST(demo_multi_threads, word2vec_gpu_4) { MainThreads(4, true /*use_gpu*/); }
71125
#endif
72126

73127
} // namespace demo

paddle/contrib/inference/test_paddle_inference_api_impl.cc

Lines changed: 149 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ limitations under the License. */
1515
#include <glog/logging.h>
1616
#include <gtest/gtest.h>
1717

18+
#include <thread>
19+
1820
#include "gflags/gflags.h"
1921
#include "paddle/contrib/inference/paddle_inference_api_impl.h"
2022
#include "paddle/fluid/inference/tests/test_helper.h"
@@ -45,14 +47,19 @@ NativeConfig GetConfig() {
4547
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
4648
LOG(INFO) << "dirname " << config.model_dir;
4749
config.fraction_of_gpu_memory = 0.15;
50+
#ifdef PADDLE_WITH_CUDA
4851
config.use_gpu = true;
52+
#else
53+
config.use_gpu = false;
54+
#endif
4955
config.device = 0;
5056
return config;
5157
}
5258

53-
TEST(paddle_inference_api_impl, word2vec) {
59+
void MainWord2Vec(bool use_gpu) {
5460
NativeConfig config = GetConfig();
5561
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
62+
config.use_gpu = use_gpu;
5663

5764
framework::LoDTensor first_word, second_word, third_word, fourth_word;
5865
framework::LoD lod{{0, 1}};
@@ -100,11 +107,12 @@ TEST(paddle_inference_api_impl, word2vec) {
100107
free(outputs[0].data.data);
101108
}
102109

103-
TEST(paddle_inference_api_impl, image_classification) {
110+
void MainImageClassification(bool use_gpu) {
104111
int batch_size = 2;
105112
bool use_mkldnn = false;
106113
bool repeat = false;
107114
NativeConfig config = GetConfig();
115+
config.use_gpu = use_gpu;
108116
config.model_dir =
109117
FLAGS_dirname + "image_classification_resnet.inference.model";
110118

@@ -149,4 +157,143 @@ TEST(paddle_inference_api_impl, image_classification) {
149157
free(data);
150158
}
151159

160+
void MainThreadsWord2Vec(bool use_gpu) {
161+
NativeConfig config = GetConfig();
162+
config.use_gpu = use_gpu;
163+
auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
164+
165+
// prepare inputs data and reference results
166+
constexpr int num_jobs = 3;
167+
std::vector<std::vector<framework::LoDTensor>> jobs(num_jobs);
168+
std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
169+
std::vector<framework::LoDTensor> refs(num_jobs);
170+
for (size_t i = 0; i < jobs.size(); ++i) {
171+
// each job has 4 words
172+
jobs[i].resize(4);
173+
for (size_t j = 0; j < 4; ++j) {
174+
framework::LoD lod{{0, 1}};
175+
int64_t dict_size = 2073; // The size of dictionary
176+
SetupLoDTensor(&jobs[i][j], lod, static_cast<int64_t>(0), dict_size - 1);
177+
paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i][j]));
178+
}
179+
180+
// get reference result of each job
181+
std::vector<paddle::framework::LoDTensor*> ref_feeds;
182+
std::vector<paddle::framework::LoDTensor*> ref_fetches(1, &refs[i]);
183+
for (auto& word : jobs[i]) {
184+
ref_feeds.push_back(&word);
185+
}
186+
TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
187+
}
188+
189+
// create threads and each thread run 1 job
190+
std::vector<std::thread> threads;
191+
for (int tid = 0; tid < num_jobs; ++tid) {
192+
threads.emplace_back([&, tid]() {
193+
auto predictor = main_predictor->Clone();
194+
auto& local_inputs = paddle_tensor_feeds[tid];
195+
std::vector<PaddleTensor> local_outputs;
196+
ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));
197+
198+
// check outputs range
199+
ASSERT_EQ(local_outputs.size(), 1UL);
200+
const size_t len = local_outputs[0].data.length;
201+
float* data = static_cast<float*>(local_outputs[0].data.data);
202+
for (size_t j = 0; j < len / sizeof(float); ++j) {
203+
ASSERT_LT(data[j], 1.0);
204+
ASSERT_GT(data[j], -1.0);
205+
}
206+
207+
// check outputs correctness
208+
float* ref_data = refs[tid].data<float>();
209+
EXPECT_EQ(refs[tid].numel(), static_cast<int64_t>(len / sizeof(float)));
210+
for (int i = 0; i < refs[tid].numel(); ++i) {
211+
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
212+
}
213+
free(data);
214+
});
215+
}
216+
for (int i = 0; i < num_jobs; ++i) {
217+
threads[i].join();
218+
}
219+
}
220+
221+
void MainThreadsImageClassification(bool use_gpu) {
222+
constexpr int num_jobs = 4; // each job run 1 batch
223+
constexpr int batch_size = 1;
224+
NativeConfig config = GetConfig();
225+
config.use_gpu = use_gpu;
226+
config.model_dir =
227+
FLAGS_dirname + "image_classification_resnet.inference.model";
228+
229+
auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
230+
std::vector<framework::LoDTensor> jobs(num_jobs);
231+
std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
232+
std::vector<framework::LoDTensor> refs(num_jobs);
233+
for (size_t i = 0; i < jobs.size(); ++i) {
234+
// prepare inputs
235+
std::vector<std::vector<int64_t>> feed_target_shapes =
236+
GetFeedTargetShapes(config.model_dir, /*is_combined*/ false);
237+
feed_target_shapes[0][0] = batch_size;
238+
framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
239+
SetupTensor<float>(&jobs[i], input_dims, 0.f, 1.f);
240+
paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i]));
241+
242+
// get reference result of each job
243+
std::vector<framework::LoDTensor*> ref_feeds(1, &jobs[i]);
244+
std::vector<framework::LoDTensor*> ref_fetches(1, &refs[i]);
245+
TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
246+
}
247+
248+
// create threads and each thread run 1 job
249+
std::vector<std::thread> threads;
250+
for (int tid = 0; tid < num_jobs; ++tid) {
251+
threads.emplace_back([&, tid]() {
252+
auto predictor = main_predictor->Clone();
253+
auto& local_inputs = paddle_tensor_feeds[tid];
254+
std::vector<PaddleTensor> local_outputs;
255+
ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));
256+
257+
// check outputs correctness
258+
ASSERT_EQ(local_outputs.size(), 1UL);
259+
const size_t len = local_outputs[0].data.length;
260+
float* data = static_cast<float*>(local_outputs[0].data.data);
261+
float* ref_data = refs[tid].data<float>();
262+
EXPECT_EQ(refs[tid].numel(), len / sizeof(float));
263+
for (int i = 0; i < refs[tid].numel(); ++i) {
264+
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
265+
}
266+
free(data);
267+
});
268+
}
269+
for (int i = 0; i < num_jobs; ++i) {
270+
threads[i].join();
271+
}
272+
}
273+
274+
TEST(inference_api_native, word2vec_cpu) { MainWord2Vec(false /*use_gpu*/); }
275+
TEST(inference_api_native, word2vec_cpu_threads) {
276+
MainThreadsWord2Vec(false /*use_gpu*/);
277+
}
278+
TEST(inference_api_native, image_classification_cpu) {
279+
MainThreadsImageClassification(false /*use_gpu*/);
280+
}
281+
TEST(inference_api_native, image_classification_cpu_threads) {
282+
MainThreadsImageClassification(false /*use_gpu*/);
283+
}
284+
285+
#ifdef PADDLE_WITH_CUDA
286+
TEST(inference_api_native, word2vec_gpu) { MainWord2Vec(true /*use_gpu*/); }
287+
TEST(inference_api_native, word2vec_gpu_threads) {
288+
MainThreadsWord2Vec(true /*use_gpu*/);
289+
}
290+
TEST(inference_api_native, image_classification_gpu) {
291+
MainThreadsImageClassification(true /*use_gpu*/);
292+
}
293+
TEST(inference_api_native, image_classification_gpu_threads) {
294+
MainThreadsImageClassification(true /*use_gpu*/);
295+
}
296+
297+
#endif
298+
152299
} // namespace paddle

0 commit comments

Comments
 (0)