Skip to content

Commit e90afec

Browse files
authored
Merge pull request #14543 from luotao1/threads
add thread related inference api
2 parents 64ca3d1 + 116979a commit e90afec

File tree

11 files changed

+44
-18
lines changed

11 files changed

+44
-18
lines changed

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
4646
prog_file = other.prog_file;
4747
param_file = other.param_file;
4848
specify_input_name = other.specify_input_name;
49+
cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
4950
// fields from this.
5051
enable_ir_optim = other.enable_ir_optim;
5152
use_feed_fetch_ops = other.use_feed_fetch_ops;
@@ -72,6 +73,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
7273
prog_file = other.prog_file;
7374
param_file = other.param_file;
7475
specify_input_name = other.specify_input_name;
76+
cpu_math_library_num_threads_ = other.cpu_math_library_num_threads_;
7577
// fields from this.
7678
enable_ir_optim = other.enable_ir_optim;
7779
use_feed_fetch_ops = other.use_feed_fetch_ops;

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
#include "paddle/fluid/platform/profiler.h"
3636

3737
DECLARE_bool(profile);
38-
DECLARE_int32(paddle_num_threads);
3938

4039
namespace paddle {
4140

@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
6766
#endif
6867

6968
// no matter with or without MKLDNN
70-
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
69+
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
7170

7271
if (!PrepareScope(parent_scope)) {
7372
return false;
@@ -160,6 +159,14 @@ bool AnalysisPredictor::PrepareExecutor() {
160159
return true;
161160
}
162161

162+
void AnalysisPredictor::SetMkldnnThreadID(int tid) {
163+
#ifdef PADDLE_WITH_MKLDNN
164+
platform::set_cur_thread_id(tid);
165+
#else
166+
LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
167+
#endif
168+
}
169+
163170
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
164171
std::vector<PaddleTensor> *output_data,
165172
int batch_size) {

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class AnalysisPredictor : public PaddlePredictor {
6969
framework::Scope *scope() { return scope_.get(); }
7070
framework::ProgramDesc &program() { return *inference_program_; }
7171

72+
void SetMkldnnThreadID(int tid);
73+
7274
protected:
7375
bool PrepareProgram(const std::shared_ptr<framework::ProgramDesc> &program);
7476
bool PrepareScope(const std::shared_ptr<framework::Scope> &parent_scope);

paddle/fluid/inference/api/api_impl.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ limitations under the License. */
2828
#include "paddle/fluid/platform/profiler.h"
2929

3030
DEFINE_bool(profile, false, "Turn on profiler for fluid");
31-
DECLARE_int32(paddle_num_threads);
3231

3332
namespace paddle {
3433
namespace {
@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
7675
#endif
7776

7877
// no matter with or without MKLDNN
79-
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
78+
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
8079

8180
if (config_.use_gpu) {
8281
place_ = paddle::platform::CUDAPlace(config_.device);

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ struct AnalysisConfig : public NativeConfig {
5151
int max_batch_size = 1);
5252
bool use_tensorrt() const { return use_tensorrt_; }
5353

54+
void EnableMKLDNN();
5455
// NOTE this is just for internal development, please not use it.
5556
// NOT stable yet.
56-
void EnableMKLDNN();
5757
bool use_mkldnn() const { return use_mkldnn_; }
5858

5959
friend class ::paddle::AnalysisPredictor;

paddle/fluid/inference/api/paddle_api.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,19 @@ struct NativeConfig : public PaddlePredictor::Config {
186186
// Specify the variable's name of each input if input tensors don't follow the
187187
// `feeds` and `fetches` of the phase `save_inference_model`.
188188
bool specify_input_name{false};
189+
190+
// Set and get the number of cpu math library threads.
191+
void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads) {
192+
cpu_math_library_num_threads_ = cpu_math_library_num_threads;
193+
}
194+
int cpu_math_library_num_threads() const {
195+
return cpu_math_library_num_threads_;
196+
}
197+
198+
protected:
199+
// number of cpu math library (such as MKL, OpenBlas) threads for each
200+
// instance.
201+
int cpu_math_library_num_threads_{1};
189202
};
190203

191204
// A factory to help create different predictors.

paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
2727
cfg->device = 0;
2828
cfg->enable_ir_optim = true;
2929
cfg->specify_input_name = true;
30+
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
3031
}
3132

3233
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

paddle/fluid/inference/tests/api/config_printer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
5353
os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
5454
os << GenSpaces(num_spaces)
5555
<< "specify_input_name: " << config.specify_input_name << "\n";
56+
os << GenSpaces(num_spaces)
57+
<< "cpu_num_threads: " << config.cpu_math_library_num_threads() << "\n";
5658
num_spaces--;
5759
os << GenSpaces(num_spaces) << "}\n";
5860
return os;

paddle/fluid/inference/tests/api/tester_helper.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
4242
"Running the inference program in analysis mode.");
4343

4444
DECLARE_bool(profile);
45+
DECLARE_int32(paddle_num_threads);
4546

4647
namespace paddle {
4748
namespace inference {
@@ -206,22 +207,23 @@ void TestMultiThreadPrediction(
206207
int batch_size = FLAGS_batch_size;
207208
int num_times = FLAGS_repeat;
208209
std::vector<std::thread> threads;
209-
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
210-
predictors.emplace_back(CreateTestPredictor(config, use_analysis));
211-
for (int tid = 1; tid < num_threads; ++tid) {
212-
predictors.emplace_back(predictors.front()->Clone());
213-
}
210+
auto main_predictor = CreateTestPredictor(config, use_analysis);
214211

215212
size_t total_time{0};
216213
for (int tid = 0; tid < num_threads; ++tid) {
217214
threads.emplace_back([&, tid]() {
218-
#ifdef PADDLE_WITH_MKLDNN
219-
platform::set_cur_thread_id(static_cast<int>(tid) + 1);
220-
#endif
221215
// Each thread should have local inputs and outputs.
222216
// The inputs of each thread are all the same.
223217
std::vector<PaddleTensor> outputs_tid;
224-
auto &predictor = predictors[tid];
218+
// To ensure the thread binding correctly,
219+
// please clone inside the threadpool.
220+
auto predictor = main_predictor->Clone();
221+
#ifdef PADDLE_WITH_MKLDNN
222+
if (use_analysis) {
223+
static_cast<AnalysisPredictor *>(predictor.get())
224+
->SetMkldnnThreadID(static_cast<int>(tid) + 1);
225+
}
226+
#endif
225227

226228
// warmup run
227229
LOG(INFO) << "Running thread " << tid << ", warm up run...";

paddle/fluid/operators/math/fc_compute.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ limitations under the License. */
1717
#include "paddle/fluid/operators/math/blas.h"
1818
#include "paddle/fluid/operators/math/jit_kernel.h"
1919

20-
DECLARE_int32(paddle_num_threads);
21-
2220
namespace paddle {
2321
namespace operators {
2422
namespace math {
@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
4341
.template Get<jitkernel::VAddKernel<T>>(N);
4442

4543
#ifdef PADDLE_WITH_MKLML
46-
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
44+
#pragma omp parallel for
4745
#endif
4846
for (int i = 0; i < M; i++) {
4947
T* dst = Y + i * N;

0 commit comments

Comments
 (0)