Skip to content

Commit d60751f

Browse files
authored
add python inference api (#15248)
add python inference api
1 parent 59ab98c commit d60751f

File tree

9 files changed

+346
-9
lines changed

9 files changed

+346
-9
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], vararg
4545
paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
4646
paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=None)
4747
paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None))
48+
paddle.fluid.CompiledProgram.with_inference_optimize ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None)
4849
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
4950
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
5051
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None

paddle/fluid/inference/api/analysis_predictor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ using contrib::AnalysisConfig;
4545
class AnalysisPredictor : public PaddlePredictor {
4646
public:
4747
explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {}
48+
~AnalysisPredictor();
4849

4950
bool Init(const std::shared_ptr<framework::Scope> &parent_scope,
5051
const std::shared_ptr<framework::ProgramDesc> &program = nullptr);
@@ -95,7 +96,6 @@ class AnalysisPredictor : public PaddlePredictor {
9596
template <typename T>
9697
void GetFetchOne(const framework::LoDTensor &fetchs,
9798
PaddleTensor *output_data);
98-
~AnalysisPredictor();
9999

100100
// Some more detailed tests, they are made the friends of the predictor, so that
101101
// the all the details can be tested.

paddle/fluid/pybind/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune
22
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
3-
tracer)
3+
tracer analysis_predictor)
4+
45
if(WITH_PYTHON)
56
list(APPEND PYBIND_DEPS py_func_op)
67
endif()
7-
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc ir.cc)
8+
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc async_executor_py.cc imperative.cc ir.cc inference_api.cc)
89

910
if(WITH_PYTHON)
1011
if(WITH_AMD_GPU)

paddle/fluid/pybind/inference_api.cc

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/pybind/inference_api.h"
16+
#include <pybind11/stl.h>
17+
#include <cstring>
18+
#include <iostream>
19+
#include <string>
20+
#include <vector>
21+
#include "paddle/fluid/inference/api/analysis_predictor.h"
22+
#include "paddle/fluid/inference/api/paddle_inference_api.h"
23+
24+
namespace py = pybind11;
25+
26+
namespace paddle {
27+
namespace pybind {
28+
using paddle::PaddleDType;
29+
using paddle::PaddleBuf;
30+
using paddle::PaddleTensor;
31+
using paddle::PaddlePlace;
32+
using paddle::PaddlePredictor;
33+
using paddle::NativeConfig;
34+
using paddle::NativePaddlePredictor;
35+
using paddle::AnalysisPredictor;
36+
using paddle::contrib::AnalysisConfig;
37+
38+
static void BindPaddleDType(py::module *m);
39+
static void BindPaddleBuf(py::module *m);
40+
static void BindPaddleTensor(py::module *m);
41+
static void BindPaddlePlace(py::module *m);
42+
static void BindPaddlePredictor(py::module *m);
43+
static void BindNativeConfig(py::module *m);
44+
static void BindNativePredictor(py::module *m);
45+
static void BindAnalysisConfig(py::module *m);
46+
static void BindAnalysisPredictor(py::module *m);
47+
48+
void BindInferenceApi(py::module *m) {
49+
BindPaddleDType(m);
50+
BindPaddleBuf(m);
51+
BindPaddleTensor(m);
52+
BindPaddlePlace(m);
53+
BindPaddlePredictor(m);
54+
BindNativeConfig(m);
55+
BindNativePredictor(m);
56+
BindAnalysisConfig(m);
57+
BindAnalysisPredictor(m);
58+
59+
m->def("create_paddle_predictor",
60+
&paddle::CreatePaddlePredictor<AnalysisConfig>);
61+
m->def("create_paddle_predictor",
62+
&paddle::CreatePaddlePredictor<NativeConfig>);
63+
m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
64+
}
65+
66+
void BindPaddleDType(py::module *m) {
67+
py::enum_<PaddleDType>(*m, "PaddleDType")
68+
.value("FLOAT32", PaddleDType::FLOAT32)
69+
.value("INT64", PaddleDType::INT64);
70+
}
71+
72+
void BindPaddleBuf(py::module *m) {
73+
py::class_<PaddleBuf>(*m, "PaddleBuf")
74+
.def(py::init<size_t>())
75+
.def(py::init([](std::vector<float> &data) {
76+
auto buf = PaddleBuf(data.size() * sizeof(float));
77+
std::memcpy(buf.data(), static_cast<void *>(data.data()), buf.length());
78+
return std::move(buf);
79+
}))
80+
.def(py::init([](std::vector<int64_t> &data) {
81+
auto buf = PaddleBuf(data.size() * sizeof(int64_t));
82+
std::memcpy(buf.data(), static_cast<void *>(data.data()), buf.length());
83+
return std::move(buf);
84+
}))
85+
.def("resize", &PaddleBuf::Resize)
86+
.def("reset",
87+
[](PaddleBuf &self, std::vector<float> &data) {
88+
self.Resize(data.size() * sizeof(float));
89+
std::memcpy(self.data(), data.data(), self.length());
90+
})
91+
.def("reset",
92+
[](PaddleBuf &self, std::vector<int64_t> &data) {
93+
self.Resize(data.size() * sizeof(int64_t));
94+
std::memcpy(self.data(), data.data(), self.length());
95+
})
96+
.def("empty", &PaddleBuf::empty)
97+
.def("float_data",
98+
[](PaddleBuf &self) -> std::vector<float> {
99+
auto *data = static_cast<float *>(self.data());
100+
return {data, data + self.length() / sizeof(*data)};
101+
})
102+
.def("int64_data",
103+
[](PaddleBuf &self) -> std::vector<int64_t> {
104+
int64_t *data = static_cast<int64_t *>(self.data());
105+
return {data, data + self.length() / sizeof(*data)};
106+
})
107+
.def("length", &PaddleBuf::length);
108+
}
109+
110+
void BindPaddleTensor(py::module *m) {
111+
py::class_<PaddleTensor>(*m, "PaddleTensor")
112+
.def(py::init<>())
113+
.def_readwrite("name", &PaddleTensor::name)
114+
.def_readwrite("shape", &PaddleTensor::shape)
115+
.def_readwrite("data", &PaddleTensor::data)
116+
.def_readwrite("dtype", &PaddleTensor::dtype)
117+
.def_readwrite("lod", &PaddleTensor::lod);
118+
}
119+
120+
void BindPaddlePlace(py::module *m) {
121+
py::enum_<PaddlePlace>(*m, "PaddlePlace")
122+
.value("UNK", PaddlePlace::kUNK)
123+
.value("CPU", PaddlePlace::kCPU)
124+
.value("GPU", PaddlePlace::kGPU);
125+
}
126+
127+
void BindPaddlePredictor(py::module *m) {
128+
auto paddle_predictor = py::class_<PaddlePredictor>(*m, "PaddlePredictor");
129+
paddle_predictor
130+
.def("run",
131+
[](PaddlePredictor &self, const std::vector<PaddleTensor> &inputs) {
132+
std::vector<PaddleTensor> outputs;
133+
self.Run(inputs, &outputs);
134+
return outputs;
135+
})
136+
.def("get_input_tensor", &PaddlePredictor::GetInputTensor)
137+
.def("get_output_tensor", &PaddlePredictor::GetOutputTensor)
138+
.def("zero_copy_run", &PaddlePredictor::ZeroCopyRun)
139+
.def("clone", &PaddlePredictor::Clone);
140+
141+
auto config = py::class_<PaddlePredictor::Config>(paddle_predictor, "Config");
142+
config.def(py::init<>())
143+
.def_readwrite("model_dir", &PaddlePredictor::Config::model_dir);
144+
}
145+
146+
void BindNativeConfig(py::module *m) {
147+
py::class_<NativeConfig, PaddlePredictor::Config>(*m, "NativeConfig")
148+
.def(py::init<>())
149+
.def_readwrite("use_gpu", &NativeConfig::use_gpu)
150+
.def_readwrite("device", &NativeConfig::device)
151+
.def_readwrite("fraction_of_gpu_memory",
152+
&NativeConfig::fraction_of_gpu_memory)
153+
.def_readwrite("prog_file", &NativeConfig::prog_file)
154+
.def_readwrite("param_file", &NativeConfig::param_file)
155+
.def_readwrite("specify_input_name", &NativeConfig::specify_input_name)
156+
.def("set_cpu_math_library_num_threads",
157+
&NativeConfig::SetCpuMathLibraryNumThreads)
158+
.def("cpu_math_library_num_threads",
159+
&NativeConfig::cpu_math_library_num_threads);
160+
}
161+
162+
void BindNativePredictor(py::module *m) {
163+
py::class_<NativePaddlePredictor, PaddlePredictor>(*m,
164+
"NativePaddlePredictor")
165+
.def(py::init<const NativeConfig &>())
166+
.def("init", &NativePaddlePredictor::Init)
167+
.def("run",
168+
[](NativePaddlePredictor &self,
169+
const std::vector<PaddleTensor> &inputs) {
170+
std::vector<PaddleTensor> outputs;
171+
self.Run(inputs, &outputs);
172+
return outputs;
173+
})
174+
.def("get_input_tensor", &NativePaddlePredictor::GetInputTensor)
175+
.def("get_output_tensor", &NativePaddlePredictor::GetOutputTensor)
176+
.def("zero_copy_run", &NativePaddlePredictor::ZeroCopyRun)
177+
.def("clone", &NativePaddlePredictor::Clone)
178+
.def("scope", &NativePaddlePredictor::scope,
179+
py::return_value_policy::reference);
180+
}
181+
182+
void BindAnalysisConfig(py::module *m) {
183+
py::class_<AnalysisConfig>(*m, "AnalysisConfig")
184+
.def(py::init<const AnalysisConfig &>())
185+
.def(py::init<const std::string &>())
186+
.def(py::init<const std::string &, const std::string &>())
187+
.def("set_model", (void (AnalysisConfig::*)(const std::string &)) &
188+
AnalysisConfig::SetModel)
189+
.def("set_model", (void (AnalysisConfig::*)(const std::string &,
190+
const std::string &)) &
191+
AnalysisConfig::SetModel)
192+
.def("set_prog_file", &AnalysisConfig::SetProgFile)
193+
.def("set_params_file", &AnalysisConfig::SetParamsFile)
194+
.def("model_dir", &AnalysisConfig::model_dir)
195+
.def("prog_file", &AnalysisConfig::prog_file)
196+
.def("params_file", &AnalysisConfig::params_file)
197+
.def("enable_use_gpu", &AnalysisConfig::EnableUseGpu,
198+
py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0)
199+
.def("disable_gpu", &AnalysisConfig::DisableGpu)
200+
.def("use_gpu", &AnalysisConfig::use_gpu)
201+
.def("gpu_device_id", &AnalysisConfig::gpu_device_id)
202+
.def("memory_pool_init_size_mb",
203+
&AnalysisConfig::memory_pool_init_size_mb)
204+
.def("fraction_of_gpu_memory_for_pool",
205+
&AnalysisConfig::fraction_of_gpu_memory_for_pool)
206+
.def("switch_ir_optim", &AnalysisConfig::SwitchIrOptim,
207+
py::arg("x") = true)
208+
.def("ir_optim", &AnalysisConfig::ir_optim)
209+
.def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps,
210+
py::arg("x") = true)
211+
.def("use_feed_fetch_ops_enabled",
212+
&AnalysisConfig::use_feed_fetch_ops_enabled)
213+
.def("switch_specify_input_names",
214+
&AnalysisConfig::SwitchSpecifyInputNames, py::arg("x") = true)
215+
.def("specify_input_name", &AnalysisConfig::specify_input_name)
216+
.def("enable_tensorrt_engine", &AnalysisConfig::EnableTensorRtEngine,
217+
py::arg("workspace_size") = 1 << 20, py::arg("max_batch_size") = 1,
218+
py::arg("min_subgraph_size") = 3)
219+
.def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
220+
.def("switch_ir_debug", &AnalysisConfig::SwitchIrDebug,
221+
py::arg("x") = true)
222+
.def("enable_mkldnn", &AnalysisConfig::EnableMKLDNN)
223+
.def("mkldnn_enabled", &AnalysisConfig::mkldnn_enabled)
224+
.def("set_cpu_math_library_num_threads",
225+
&AnalysisConfig::SetCpuMathLibraryNumThreads)
226+
.def("cpu_math_library_num_threads",
227+
&AnalysisConfig::cpu_math_library_num_threads)
228+
.def("to_native_config", &AnalysisConfig::ToNativeConfig)
229+
.def("set_mkldnn_op", &AnalysisConfig::SetMKLDNNOp)
230+
.def("set_model_buffer", &AnalysisConfig::SetModelBuffer)
231+
.def("model_from_memory", &AnalysisConfig::model_from_memory)
232+
.def("pass_builder", &AnalysisConfig::pass_builder,
233+
py::return_value_policy::reference);
234+
}
235+
236+
void BindAnalysisPredictor(py::module *m) {
237+
py::class_<AnalysisPredictor, PaddlePredictor>(*m, "AnalysisPredictor")
238+
.def(py::init<const AnalysisConfig &>())
239+
.def("init", &AnalysisPredictor::Init)
240+
.def(
241+
"run",
242+
[](AnalysisPredictor &self, const std::vector<PaddleTensor> &inputs) {
243+
std::vector<PaddleTensor> outputs;
244+
self.Run(inputs, &outputs);
245+
return outputs;
246+
})
247+
.def("get_input_tensor", &AnalysisPredictor::GetInputTensor)
248+
.def("get_output_tensor", &AnalysisPredictor::GetOutputTensor)
249+
.def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun)
250+
.def("clone", &AnalysisPredictor::Clone)
251+
.def("scope", &AnalysisPredictor::scope,
252+
py::return_value_policy::reference);
253+
}
254+
255+
} // namespace pybind
256+
} // namespace paddle

paddle/fluid/pybind/inference_api.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <pybind11/pybind11.h>
18+
19+
namespace paddle {
20+
namespace pybind {
21+
void BindInferenceApi(pybind11::module *m);
22+
} // namespace pybind
23+
} // namespace paddle

paddle/fluid/pybind/pybind.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ limitations under the License. */
4949
#include "paddle/fluid/pybind/const_value.h"
5050
#include "paddle/fluid/pybind/exception.h"
5151
#include "paddle/fluid/pybind/imperative.h"
52+
#include "paddle/fluid/pybind/inference_api.h"
5253
#include "paddle/fluid/pybind/ir.h"
5354
#include "paddle/fluid/pybind/protobuf.h"
5455
#include "paddle/fluid/pybind/pybind.h" // NOLINT
@@ -1083,9 +1084,9 @@ All parameter, weight, gradient are variables in Paddle.
10831084

10841085
BindRecordIOWriter(&m);
10851086
BindAsyncExecutor(&m);
1086-
10871087
BindGraph(&m);
10881088
BindNode(&m);
1089+
BindInferenceApi(&m);
10891090
}
10901091
} // namespace pybind
10911092
} // namespace paddle

0 commit comments

Comments
 (0)