Skip to content

Commit 18d6402

Browse files
authored
simplify inference api (#11104)
1 parent 86d8659 commit 18d6402

File tree

3 files changed

+36
-27
lines changed

3 files changed

+36
-27
lines changed

paddle/contrib/inference/paddle_inference_api.h

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,23 @@ struct PaddleBuf {
4040
struct PaddleTensor {
4141
std::string name; // variable name.
4242
std::vector<int> shape;
43+
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
4344
PaddleBuf data; // blob of data.
4445
PaddleDType dtype;
4546
};
4647

48+
enum class PaddleEngineKind {
49+
kNative = 0, // Use the native Fluid facility.
50+
// TODO(Superjomn) support following engines latter.
51+
// kAnakin, // Use Anakin for inference.
52+
// kTensorRT, // Use TensorRT for inference.
53+
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
54+
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
55+
};
56+
4757
/*
4858
* A simple Inference API for Paddle. Currently this API can be used by
4959
* non-sequence scenerios.
50-
* TODO(Superjomn) Support another API for NLP-related usages.
5160
*/
5261
class PaddlePredictor {
5362
public:
@@ -69,15 +78,6 @@ class PaddlePredictor {
6978
// Destroy the Predictor.
7079
virtual ~PaddlePredictor() {}
7180

72-
enum class EngineKind {
73-
kNative = -1, // Use the native Fluid facility.
74-
// TODO(Superjomn) support latter.
75-
// kAnakin, // Use Anakin for inference.
76-
// kTensorRT, // Use TensorRT for inference.
77-
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
78-
// kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
79-
};
80-
8181
// The common configs for all the predictors.
8282
struct Config {
8383
std::string model_dir; // path to the model directory.
@@ -86,18 +86,24 @@ class PaddlePredictor {
8686
};
8787

8888
struct NativeConfig : public PaddlePredictor::Config {
89+
// GPU related fields.
8990
bool use_gpu{false};
90-
int device;
91-
float fraction_of_gpu_memory;
91+
int device{0};
92+
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.
93+
9294
std::string prog_file;
9395
std::string param_file;
94-
bool share_variables;
9596
};
9697

97-
// A factory to help create difference predictor.
98-
template <
99-
typename ConfigT,
100-
PaddlePredictor::EngineKind engine = PaddlePredictor::EngineKind::kNative>
98+
// A factory to help create different predictors.
99+
//
100+
// FOR EXTENSION DEVELOPER:
101+
// Different predictors are designated by config type and engine kind. Similar
102+
// configs can be merged, but there shouldn't be a huge config containing
103+
// different fields for more than one kind of predictors.
104+
//
105+
// Similarly, each engine kind should map to a unique predictor implementation.
106+
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
101107
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
102108

103109
} // namespace paddle

paddle/contrib/inference/paddle_inference_api_impl.cc

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,7 @@ std::string num2str(T a) {
5757
bool NativePaddlePredictor::Init() {
5858
VLOG(3) << "Predictor::init()";
5959

60-
// TODO(panyx0718): Should CPU vs GPU device be decided by id?
61-
if (config_.device >= 0) {
60+
if (config_.use_gpu) {
6261
place_ = paddle::platform::CUDAPlace(config_.device);
6362
} else {
6463
place_ = paddle::platform::CPUPlace();
@@ -85,11 +84,13 @@ bool NativePaddlePredictor::Init() {
8584
}
8685
ctx_ = executor_->Prepare(*inference_program_, 0);
8786

88-
// Create variables
89-
// TODO(panyx0718): Why need to test share_variables here?
90-
if (config_.share_variables) {
91-
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
92-
}
87+
// Create temporary variables first, so that the first batch do not need to
88+
// create variables in the runtime. This is the logics of the old inference
89+
// API.
90+
// TODO(Superjomn) this should be modified when `Clone` is valid for
91+
// multi-thread application.
92+
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
93+
9394
// Get the feed_target_names and fetch_target_names
9495
feed_target_names_ = inference_program_->GetFeedTargetNames();
9596
fetch_target_names_ = inference_program_->GetFetchTargetNames();
@@ -124,7 +125,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
124125
scope_.get(),
125126
&feed_targets,
126127
&fetch_targets,
127-
!config_.share_variables);
128+
false /* don't create variable eatch time */);
128129
if (!GetFetch(fetchs, output_data)) {
129130
LOG(ERROR) << "fail to get fetchs";
130131
return false;
@@ -242,11 +243,14 @@ bool NativePaddlePredictor::GetFetch(
242243

243244
template <>
244245
std::unique_ptr<PaddlePredictor>
245-
CreatePaddlePredictor<NativeConfig, PaddlePredictor::EngineKind::kNative>(
246+
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
246247
const NativeConfig &config) {
247248
VLOG(3) << "create NativePaddlePredictor";
248249
if (config.use_gpu) {
249250
// 1. GPU memeroy
251+
PADDLE_ENFORCE(
252+
config.fraction_of_gpu_memory > 0.f,
253+
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
250254
std::vector<std::string> flags;
251255
if (config.fraction_of_gpu_memory >= 0.0f ||
252256
config.fraction_of_gpu_memory <= 0.95f) {

paddle/contrib/inference/test_paddle_inference_api_impl.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ NativeConfig GetConfig() {
4747
config.fraction_of_gpu_memory = 0.15;
4848
config.use_gpu = true;
4949
config.device = 0;
50-
config.share_variables = true;
5150
return config;
5251
}
5352

0 commit comments

Comments
 (0)