Skip to content

Commit 20b40cb

Browse files
committed
add multi-thread for nlp unit-tests
1 parent e69d9c8 commit 20b40cb

File tree

7 files changed

+207
-235
lines changed

7 files changed

+207
-235
lines changed

paddle/fluid/inference/api/helper.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,16 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
123123
}
124124

125125
void PrintTime(int batch_size, int repeat, int num_threads, int tid,
126-
double latency) {
126+
double latency, int epoch = 1) {
127127
LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat
128128
<< ", threads: " << num_threads << ", thread id: " << tid
129129
<< ", latency: " << latency << "ms ======";
130+
if (epoch > 1) {
131+
int samples = batch_size * epoch;
132+
LOG(INFO) << "====== sample number: " << samples
133+
<< ", average latency of each sample: " << latency / samples
134+
<< "ms ======";
135+
}
130136
}
131137

132138
} // namespace inference

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@ set(TEXT_CLASSIFICATION_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/te
4949
set(TEXT_CLASSIFICATION_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/text_classification_data.txt.tar.gz")
5050
set(TEXT_CLASSIFICATION_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/text_classification")
5151
download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} ${TEXT_CLASSIFICATION_MODEL_URL} ${TEXT_CLASSIFICATION_DATA_URL})
52-
inference_analysis_test(test_text_classification SRCS analyzer_text_classification_tester.cc
52+
inference_analysis_test(test_analyzer_text_classification SRCS analyzer_text_classification_tester.cc
5353
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
5454
ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta
55-
--infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt
56-
--topn=1 # Just run top 1 batch.
57-
)
55+
--infer_data=${TEXT_CLASSIFICATION_INSTALL_DIR}/data.txt)

paddle/fluid/inference/tests/api/analyzer_lac_tester.cc

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/inference/analysis/analyzer.h"
16-
#include <gtest/gtest.h>
17-
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
18-
#include "paddle/fluid/inference/analysis/ut_helper.h"
19-
#include "paddle/fluid/inference/api/analysis_predictor.h"
20-
#include "paddle/fluid/inference/api/helper.h"
21-
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
22-
#include "paddle/fluid/platform/profiler.h"
23-
24-
DEFINE_string(infer_model, "", "model path for LAC");
25-
DEFINE_string(infer_data, "", "data file for LAC");
26-
DEFINE_int32(batch_size, 1, "batch size.");
27-
DEFINE_int32(burning, 0, "Burning before repeat.");
28-
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
29-
DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
15+
#include "paddle/fluid/inference/tests/api/tester_helper.h"
3016

3117
namespace paddle {
3218
namespace inference {
@@ -126,46 +112,37 @@ void TestLACPrediction(const std::string &model_path,
126112
const std::string &data_file, const int batch_size,
127113
const int repeat, bool test_all_data,
128114
bool use_analysis = false) {
129-
NativeConfig config;
130-
config.model_dir = model_path;
131-
config.use_gpu = false;
132-
config.device = 0;
133-
config.specify_input_name = true;
115+
AnalysisConfig cfg;
116+
cfg.model_dir = model_path;
117+
cfg.use_gpu = false;
118+
cfg.device = 0;
119+
cfg.specify_input_name = true;
120+
cfg.enable_ir_optim = true;
121+
134122
std::vector<PaddleTensor> input_slots, outputs_slots;
135123
DataRecord data(data_file, batch_size);
136124
GetOneBatch(&input_slots, &data, batch_size);
137125
std::unique_ptr<PaddlePredictor> predictor;
138126
if (use_analysis) {
139-
AnalysisConfig cfg;
140-
cfg.model_dir = model_path;
141-
cfg.use_gpu = false;
142-
cfg.device = 0;
143-
cfg.specify_input_name = true;
144-
cfg.enable_ir_optim = true;
145127
predictor =
146128
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
147129
} else {
148130
predictor =
149-
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
131+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
150132
}
151133
for (int i = 0; i < FLAGS_burning; i++) {
152134
predictor->Run(input_slots, &outputs_slots);
153135
}
154136
Timer timer;
155-
if (test_all_data) {
156-
double sum = 0;
157-
LOG(INFO) << "Total number of samples: " << data.datasets.size();
158-
for (int i = 0; i < repeat; i++) {
159-
for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) {
160-
GetOneBatch(&input_slots, &data, batch_size);
161-
timer.tic();
162-
predictor->Run(input_slots, &outputs_slots);
163-
sum += timer.toc();
164-
}
137+
if (FLAGS_test_all_data) {
138+
LOG(INFO) << "test all data";
139+
std::vector<std::vector<PaddleTensor>> input_slots_all;
140+
for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) {
141+
GetOneBatch(&input_slots, &data, batch_size);
142+
input_slots_all.emplace_back(input_slots);
165143
}
166-
PrintTime(batch_size, repeat, 1, 0, sum / repeat);
167-
LOG(INFO) << "Average latency of each sample: "
168-
<< sum / repeat / data.datasets.size() << " ms";
144+
LOG(INFO) << "total number of samples: " << data.datasets.size();
145+
TestPrediction(cfg, input_slots_all, &outputs_slots, FLAGS_num_threads);
169146
return;
170147
}
171148
timer.tic();
@@ -190,19 +167,10 @@ void TestLACPrediction(const std::string &model_path,
190167
if (use_analysis) {
191168
// run once for comparion as reference
192169
auto ref_predictor =
193-
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
170+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
194171
std::vector<PaddleTensor> ref_outputs_slots;
195172
ref_predictor->Run(input_slots, &ref_outputs_slots);
196-
EXPECT_EQ(ref_outputs_slots.size(), outputs_slots.size());
197-
auto &ref_out = ref_outputs_slots[0];
198-
size_t ref_size =
199-
std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1,
200-
[](int a, int b) { return a * b; });
201-
EXPECT_EQ(size, ref_size);
202-
int64_t *pdata_ref = static_cast<int64_t *>(ref_out.data.data());
203-
for (size_t i = 0; i < size; ++i) {
204-
EXPECT_EQ(pdata_ref[i], pdata[i]);
205-
}
173+
CompareResult(ref_outputs_slots, outputs_slots);
206174

207175
AnalysisPredictor *analysis_predictor =
208176
dynamic_cast<AnalysisPredictor *>(predictor.get());

paddle/fluid/inference/tests/api/analyzer_ner_tester.cc

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/inference/analysis/analyzer.h"
16-
#include <gtest/gtest.h>
17-
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
18-
#include "paddle/fluid/inference/analysis/ut_helper.h"
19-
#include "paddle/fluid/inference/api/analysis_predictor.h"
20-
#include "paddle/fluid/inference/api/helper.h"
21-
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
22-
#include "paddle/fluid/platform/profiler.h"
23-
24-
DEFINE_string(infer_model, "", "model path");
25-
DEFINE_string(infer_data, "", "data path");
26-
DEFINE_int32(batch_size, 10, "batch size.");
27-
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
28-
DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
15+
#include "paddle/fluid/inference/tests/api/tester_helper.h"
2916

3017
namespace paddle {
3118
namespace inference {
@@ -113,50 +100,35 @@ const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
113100
48, 39, 38, 16, 25};
114101

115102
void TestChineseNERPrediction(bool use_analysis) {
116-
NativeConfig config;
117-
config.prog_file = FLAGS_infer_model + "/__model__";
118-
config.param_file = FLAGS_infer_model + "/param";
119-
config.use_gpu = false;
120-
config.device = 0;
121-
config.specify_input_name = true;
103+
AnalysisConfig cfg;
104+
cfg.prog_file = FLAGS_infer_model + "/__model__";
105+
cfg.param_file = FLAGS_infer_model + "/param";
106+
cfg.use_gpu = false;
107+
cfg.device = 0;
108+
cfg.specify_input_name = true;
109+
cfg.enable_ir_optim = true;
122110

123111
std::vector<PaddleTensor> input_slots, outputs;
124112
std::unique_ptr<PaddlePredictor> predictor;
125113
Timer timer;
126114
if (use_analysis) {
127-
AnalysisConfig cfg;
128-
cfg.prog_file = FLAGS_infer_model + "/__model__";
129-
cfg.param_file = FLAGS_infer_model + "/param";
130-
cfg.use_gpu = false;
131-
cfg.device = 0;
132-
cfg.specify_input_name = true;
133-
cfg.enable_ir_optim = true;
134115
predictor =
135116
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
136117
} else {
137118
predictor =
138-
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
119+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
139120
}
140121

141122
if (FLAGS_test_all_data) {
142123
LOG(INFO) << "test all data";
143-
double sum = 0;
144-
size_t num_samples;
145-
for (int i = 0; i < FLAGS_repeat; i++) {
146-
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
147-
// Just one batch, the num_samples remains the same.
148-
num_samples = data.num_samples;
149-
for (size_t bid = 0; bid < num_samples / FLAGS_batch_size; ++bid) {
150-
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
151-
timer.tic();
152-
predictor->Run(input_slots, &outputs);
153-
sum += timer.toc();
154-
}
124+
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
125+
std::vector<std::vector<PaddleTensor>> input_slots_all;
126+
for (size_t bid = 0; bid < data.num_samples / FLAGS_batch_size; ++bid) {
127+
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
128+
input_slots_all.emplace_back(input_slots);
155129
}
156-
LOG(INFO) << "total number of samples: " << num_samples;
157-
PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, sum / FLAGS_repeat);
158-
LOG(INFO) << "average latency of each sample: "
159-
<< sum / FLAGS_repeat / num_samples;
130+
LOG(INFO) << "total number of samples: " << data.num_samples;
131+
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
160132
return;
161133
}
162134
// Prepare inputs.
@@ -182,19 +154,10 @@ void TestChineseNERPrediction(bool use_analysis) {
182154
if (use_analysis) {
183155
// run once for comparion as reference
184156
auto ref_predictor =
185-
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
157+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
186158
std::vector<PaddleTensor> ref_outputs_slots;
187159
ref_predictor->Run(input_slots, &ref_outputs_slots);
188-
EXPECT_EQ(ref_outputs_slots.size(), outputs.size());
189-
auto &ref_out = ref_outputs_slots[0];
190-
size_t ref_size =
191-
std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1,
192-
[](int a, int b) { return a * b; });
193-
EXPECT_EQ(size, ref_size);
194-
int64_t *pdata_ref = static_cast<int64_t *>(ref_out.data.data());
195-
for (size_t i = 0; i < size; ++i) {
196-
EXPECT_EQ(pdata_ref[i], result[i]);
197-
}
160+
CompareResult(ref_outputs_slots, outputs);
198161

199162
AnalysisPredictor *analysis_predictor =
200163
dynamic_cast<AnalysisPredictor *>(predictor.get());

paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc

Lines changed: 7 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/inference/analysis/analyzer.h"
16-
17-
#include <google/protobuf/text_format.h>
18-
#include <gtest/gtest.h>
19-
#include <thread> // NOLINT
20-
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
21-
#include "paddle/fluid/framework/ir/pass.h"
22-
#include "paddle/fluid/inference/analysis/ut_helper.h"
23-
#include "paddle/fluid/inference/api/analysis_predictor.h"
24-
#include "paddle/fluid/inference/api/helper.h"
25-
#include "paddle/fluid/inference/api/paddle_inference_api.h"
26-
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
27-
28-
DEFINE_string(infer_model, "", "model path");
29-
DEFINE_string(infer_data, "", "data path");
30-
DEFINE_int32(batch_size, 10, "batch size.");
31-
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
32-
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
15+
#include "paddle/fluid/inference/tests/api/tester_helper.h"
3316

3417
namespace paddle {
3518
namespace inference {
@@ -164,26 +147,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
164147
}
165148
}
166149

167-
void CompareResult(const std::vector<PaddleTensor> &outputs,
168-
const std::vector<PaddleTensor> &base_outputs) {
169-
PADDLE_ENFORCE_GT(outputs.size(), 0);
170-
PADDLE_ENFORCE_EQ(outputs.size(), base_outputs.size());
171-
for (size_t i = 0; i < outputs.size(); i++) {
172-
auto &out = outputs[i];
173-
auto &base_out = base_outputs[i];
174-
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
175-
[](int a, int b) { return a * b; });
176-
size_t size1 = std::accumulate(base_out.shape.begin(), base_out.shape.end(),
177-
1, [](int a, int b) { return a * b; });
178-
PADDLE_ENFORCE_EQ(size, size1);
179-
PADDLE_ENFORCE_GT(size, 0);
180-
float *data = static_cast<float *>(out.data.data());
181-
float *base_data = static_cast<float *>(base_out.data.data());
182-
for (size_t i = 0; i < size; i++) {
183-
EXPECT_NEAR(data[i], base_data[i], 1e-3);
184-
}
185-
}
186-
}
187150
// Test with a really complicate model.
188151
void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
189152
AnalysisConfig config;
@@ -198,7 +161,6 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
198161
config.ir_passes.clear(); // Do not exclude any pass.
199162

200163
int batch_size = FLAGS_batch_size;
201-
int num_times = FLAGS_repeat;
202164

203165
auto base_predictor =
204166
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
@@ -213,45 +175,14 @@ void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) {
213175

214176
base_predictor->Run(input_slots, &base_outputs);
215177

178+
std::vector<std::vector<PaddleTensor>> input_slots_all;
179+
input_slots_all.emplace_back(input_slots);
216180
if (num_threads == 1) {
217-
// Prepare inputs.
218-
Timer timer;
219-
timer.tic();
220-
for (int i = 0; i < num_times; i++) {
221-
predictor->Run(input_slots, &outputs);
222-
}
223-
PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times);
181+
TestOneThreadPrediction(config, input_slots_all, &outputs);
224182
CompareResult(outputs, base_outputs);
225183
} else {
226-
std::vector<std::thread> threads;
227-
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
228-
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
229-
// because AttentionLSTM's hard code nodeid will be damanged.
230-
for (int tid = 0; tid < num_threads; ++tid) {
231-
predictors.emplace_back(
232-
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
233-
config));
234-
}
235-
for (int tid = 0; tid < num_threads; ++tid) {
236-
threads.emplace_back([&, tid]() {
237-
// Each thread should have local input_slots and outputs.
238-
std::vector<PaddleTensor> input_slots;
239-
DataRecord data(FLAGS_infer_data, batch_size);
240-
PrepareInputs(&input_slots, &data, batch_size);
241-
std::vector<PaddleTensor> outputs;
242-
Timer timer;
243-
timer.tic();
244-
for (int i = 0; i < num_times; i++) {
245-
predictors[tid]->Run(input_slots, &outputs);
246-
}
247-
PrintTime(batch_size, num_times, num_threads, tid,
248-
timer.toc() / num_times);
249-
CompareResult(outputs, base_outputs);
250-
});
251-
}
252-
for (int i = 0; i < num_threads; ++i) {
253-
threads[i].join();
254-
}
184+
// only return the output of first thread
185+
TestMultiThreadPrediction(config, input_slots_all, &outputs, num_threads);
255186
}
256187

257188
if (use_analysis && activate_ir) {
@@ -293,8 +224,7 @@ TEST(Analyzer, RNN_tests) {
293224
// Directly infer with the original model.
294225
TestRNN1Prediction(false, false, i);
295226
// Inference with the original model with the analysis turned on, the
296-
// analysis
297-
// module will transform the program to a data flow graph.
227+
// analysis module will transform the program to a data flow graph.
298228
TestRNN1Prediction(true, false, i);
299229
// Inference with analysis and IR. The IR module will fuse some large
300230
// kernels.

0 commit comments

Comments
 (0)