Skip to content

Commit 6640661

Browse files
committed
merge develop
2 parents a2749ad + c58af84 commit 6640661

21 files changed

+577
-179
lines changed

cmake/external/anakin.cmake

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header f
88
set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library")
99

1010
set(ANAKIN_COMPILE_EXTRA_FLAGS
11+
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
1112
-Wno-error=unused-variable -Wno-unused-variable
1213
-Wno-error=format-extra-args -Wno-format-extra-args
1314
-Wno-error=comment -Wno-comment
@@ -19,7 +20,7 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
1920
-Wno-reorder
2021
-Wno-error=cpp)
2122

22-
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz")
23+
set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/Version0.1.0/anakin.tar.gz")
2324

2425
# A helper function used in Anakin, currently, to use it, one need to recursively include
2526
# nearly all the header files.
@@ -41,9 +42,9 @@ if (NOT EXISTS "${ANAKIN_INSTALL_DIR}")
4142
message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}")
4243
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
4344
execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*")
44-
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}")
45+
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget --no-check-certificate -q ${ANAKIN_LIBRARY_URL}")
4546
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}")
46-
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz")
47+
execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin.tar.gz")
4748
endif()
4849

4950
if (WITH_ANAKIN)

paddle/fluid/inference/analysis/argument.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#pragma once
2525

26+
#include <string>
2627
#include "paddle/fluid/framework/program_desc.h"
2728
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
2829

paddle/fluid/inference/analysis/data_flow_graph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ struct GraphTraits<DataFlowGraph> {
176176
// sub-graph is the inputs nodes and output nodes that doesn't inside the
177177
// sub-graph.
178178
std::pair<std::vector<Node *>, std::vector<Node *>>
179-
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph);
179+
ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph); // NOLINT
180180

181181
} // namespace analysis
182182
} // namespace inference

paddle/fluid/inference/analysis/model_store_pass.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "paddle/fluid/inference/analysis/model_store_pass.h"
1615
#include <stdio.h>
1716
#include <stdlib.h>
17+
#include <string>
18+
1819
#include "paddle/fluid/inference/analysis/analyzer.h"
1920
#include "paddle/fluid/inference/analysis/argument.h"
21+
#include "paddle/fluid/inference/analysis/model_store_pass.h"
2022

2123
namespace paddle {
2224
namespace inference {

paddle/fluid/inference/analysis/model_store_pass.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
* model in the disk, and that model can be reloaded for prediction.
1818
*/
1919

20+
#pragma once
21+
#include <string>
2022
#include "paddle/fluid/inference/analysis/pass.h"
2123

2224
namespace paddle {

paddle/fluid/inference/api/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ endif(APPLE)
1919

2020

2121
set(inference_deps paddle_inference_api paddle_fluid_api)
22+
2223
if(WITH_GPU AND TENSORRT_FOUND)
2324
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine)
2425
endif()
@@ -63,6 +64,8 @@ endif()
6364
if (WITH_ANAKIN) # only needed in CI
6465
# Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's,
6566
# so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to
67+
# compile the libinference_anakin_api.a and compile with anakin.so.
68+
fetch_include_recursively(${ANAKIN_INCLUDE})
6669
# compile the libinference_anakin_api.a and anakin.so.
6770
nv_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc)
6871
nv_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc)
@@ -73,7 +76,7 @@ if (WITH_ANAKIN) # only needed in CI
7376
if (WITH_TESTING)
7477
cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
7578
ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin
76-
DEPS inference_anakin_api)
79+
DEPS inference_anakin_api_shared)
7780
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
7881
endif(WITH_TESTING)
7982
endif()

paddle/fluid/inference/api/api_anakin_engine.cc

Lines changed: 71 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,36 @@
1818

1919
namespace paddle {
2020

21-
PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
21+
template <typename Target>
22+
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
2223
const AnakinConfig &config) {
2324
CHECK(Init(config));
2425
}
2526

26-
bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
27+
template <typename Target>
28+
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
2729
if (!(graph_.load(config.model_file))) {
30+
LOG(FATAL) << "fail to load graph from " << config.model_file;
2831
return false;
2932
}
30-
graph_.ResetBatchSize("input_0", config.max_batch_size);
33+
auto inputs = graph_.get_ins();
34+
for (auto &input_str : inputs) {
35+
graph_.ResetBatchSize(input_str, config.max_batch_size);
36+
}
3137
// optimization for graph
3238
if (!(graph_.Optimize())) {
3339
return false;
3440
}
3541
// construct executer
36-
executor_.init(graph_);
42+
if (executor_p_ == nullptr) {
43+
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
44+
anakin::Precision::FP32>(graph_, true);
45+
}
3746
return true;
3847
}
3948

40-
bool PaddleInferenceAnakinPredictor::Run(
49+
template <typename Target>
50+
bool PaddleInferenceAnakinPredictor<Target>::Run(
4151
const std::vector<PaddleTensor> &inputs,
4252
std::vector<PaddleTensor> *output_data, int batch_size) {
4353
for (const auto &input : inputs) {
@@ -46,7 +56,29 @@ bool PaddleInferenceAnakinPredictor::Run(
4656
<< "'s type is not float";
4757
return false;
4858
}
49-
auto d_tensor_in_p = executor_.get_in(input.name);
59+
auto d_tensor_in_p = executor_p_->get_in(input.name);
60+
auto net_shape = d_tensor_in_p->valid_shape();
61+
if (net_shape.size() != input.shape.size()) {
62+
LOG(ERROR) << " input " << input.name
63+
<< "'s shape size should be equal to that of net";
64+
return false;
65+
}
66+
int sum = 1;
67+
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
68+
if (sum > net_shape.count()) {
69+
graph_.Reshape(input.name, input.shape);
70+
delete executor_p_;
71+
executor_p_ = new anakin::Net<Target, anakin::saber::AK_FLOAT,
72+
anakin::Precision::FP32>(graph_, true);
73+
d_tensor_in_p = executor_p_->get_in(input.name);
74+
}
75+
76+
anakin::saber::Shape tmp_shape;
77+
for (auto s : input.shape) {
78+
tmp_shape.push_back(s);
79+
}
80+
d_tensor_in_p->reshape(tmp_shape);
81+
5082
float *d_data_p = d_tensor_in_p->mutable_data();
5183
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
5284
d_tensor_in_p->valid_size() * sizeof(float),
@@ -56,16 +88,17 @@ bool PaddleInferenceAnakinPredictor::Run(
5688
}
5789
cudaStreamSynchronize(NULL);
5890
}
59-
60-
executor_.prediction();
91+
cudaDeviceSynchronize();
92+
executor_p_->prediction();
93+
cudaDeviceSynchronize();
6194

6295
if (output_data->empty()) {
6396
LOG(ERROR) << "At least one output should be set with tensors' names.";
6497
return false;
6598
}
6699
for (auto &output : *output_data) {
67-
auto *tensor = executor_.get_out(output.name);
68-
output.shape = tensor->shape();
100+
auto *tensor = executor_p_->get_out(output.name);
101+
output.shape = tensor->valid_shape();
69102
if (output.data.length() < tensor->valid_size() * sizeof(float)) {
70103
output.data.Resize(tensor->valid_size() * sizeof(float));
71104
}
@@ -81,19 +114,23 @@ bool PaddleInferenceAnakinPredictor::Run(
81114
return true;
82115
}
83116

84-
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
85-
&PaddleInferenceAnakinPredictor::get_executer() {
86-
return executor_;
117+
template <typename Target>
118+
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
119+
&PaddleInferenceAnakinPredictor<Target>::get_executer() {
120+
return *executor_p_;
87121
}
88122

89123
// the cloned new Predictor of anakin share the same net weights from original
90124
// Predictor
91-
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
125+
template <typename Target>
126+
std::unique_ptr<PaddlePredictor>
127+
PaddleInferenceAnakinPredictor<Target>::Clone() {
92128
VLOG(3) << "Anakin Predictor::clone";
93-
std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
129+
std::unique_ptr<PaddlePredictor> cls(
130+
new PaddleInferenceAnakinPredictor<Target>());
94131
// construct executer from other graph
95132
auto anakin_predictor_p =
96-
dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
133+
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
97134
if (!anakin_predictor_p) {
98135
LOG(ERROR) << "fail to call Init";
99136
return nullptr;
@@ -103,14 +140,28 @@ std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
103140
return std::move(cls);
104141
}
105142

143+
template class PaddleInferenceAnakinPredictor<anakin::NV>;
144+
template class PaddleInferenceAnakinPredictor<anakin::X86>;
145+
106146
// A factory to help create difference predictor.
107147
template <>
108148
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
109149
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
110150
VLOG(3) << "Anakin Predictor create.";
111-
std::unique_ptr<PaddlePredictor> x(
112-
new PaddleInferenceAnakinPredictor(config));
113-
return x;
114-
}
151+
if (config.target_type == AnakinConfig::NVGPU) {
152+
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
153+
std::unique_ptr<PaddlePredictor> x(
154+
new PaddleInferenceAnakinPredictor<anakin::NV>(config));
155+
return x;
156+
} else if (config.target_type == AnakinConfig::X86) {
157+
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
158+
std::unique_ptr<PaddlePredictor> x(
159+
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
160+
return x;
161+
} else {
162+
VLOG(3) << "Anakin Predictor create on unknown platform.";
163+
return nullptr;
164+
}
165+
};
115166

116167
} // namespace paddle

paddle/fluid/inference/api/api_anakin_engine.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ limitations under the License. */
2020
#pragma once
2121

2222
#include <vector>
23-
#include "paddle/fluid/inference/api/paddle_inference_api.h"
2423

25-
// from anakin
2624
#include "framework/core/net/net.h"
25+
#include "framework/graph/graph.h"
26+
#include "paddle/fluid/inference/api/paddle_inference_api.h"
27+
#include "saber/core/shape.h"
2728
#include "saber/saber_types.h"
2829

2930
namespace paddle {
3031

32+
template <typename Target>
3133
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
3234
public:
3335
PaddleInferenceAnakinPredictor() {}
@@ -42,19 +44,21 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
4244

4345
std::unique_ptr<PaddlePredictor> Clone() override;
4446

45-
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
47+
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
4648
get_executer();
4749

48-
~PaddleInferenceAnakinPredictor() override{};
50+
~PaddleInferenceAnakinPredictor() override {
51+
delete executor_p_;
52+
executor_p_ = nullptr;
53+
};
4954

5055
private:
5156
bool Init(const AnakinConfig& config);
5257

53-
anakin::graph::Graph<anakin::NV, anakin::saber::AK_FLOAT,
54-
anakin::Precision::FP32>
58+
anakin::graph::Graph<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
5559
graph_;
56-
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
57-
executor_;
60+
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>*
61+
executor_p_{nullptr};
5862
AnakinConfig config_;
5963
};
6064

paddle/fluid/inference/api/api_anakin_engine_tester.cc

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include <gflags/gflags.h>
1615
#include <glog/logging.h>
1716
#include <gtest/gtest.h>
1817

18+
#include "gflags/gflags.h"
1919
#include "paddle/fluid/inference/api/paddle_inference_api.h"
2020

21-
DEFINE_string(model, "", "Directory of the inference model.");
21+
DEFINE_string(model, "", "Directory of the inference model(mobile_v2).");
2222

2323
namespace paddle {
2424

2525
AnakinConfig GetConfig() {
2626
AnakinConfig config;
27+
// using AnakinConfig::X86 if you need to use cpu to do inference
28+
config.target_type = AnakinConfig::NVGPU;
2729
config.model_file = FLAGS_model;
2830
config.device = 0;
2931
config.max_batch_size = 1;
@@ -36,30 +38,27 @@ TEST(inference, anakin) {
3638
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
3739

3840
float data[1 * 3 * 224 * 224] = {1.0f};
39-
4041
PaddleTensor tensor;
4142
tensor.name = "input_0";
4243
tensor.shape = std::vector<int>({1, 3, 224, 224});
4344
tensor.data = PaddleBuf(data, sizeof(data));
4445
tensor.dtype = PaddleDType::FLOAT32;
4546

4647
// For simplicity, we set all the slots with the same data.
47-
std::vector<PaddleTensor> paddle_tensor_feeds;
48-
paddle_tensor_feeds.emplace_back(std::move(tensor));
48+
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
4949

5050
PaddleTensor tensor_out;
5151
tensor_out.name = "prob_out";
52-
tensor_out.shape = std::vector<int>({1000, 1});
52+
tensor_out.shape = std::vector<int>({});
5353
tensor_out.data = PaddleBuf();
5454
tensor_out.dtype = PaddleDType::FLOAT32;
5555

56-
std::vector<PaddleTensor> outputs;
57-
outputs.emplace_back(std::move(tensor_out));
56+
std::vector<PaddleTensor> outputs(1, tensor_out);
5857

5958
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
6059

6160
float* data_o = static_cast<float*>(outputs[0].data.data());
62-
for (size_t j = 0; j < 1000; ++j) {
61+
for (size_t j = 0; j < outputs[0].data.length(); ++j) {
6362
LOG(INFO) << "output[" << j << "]: " << data_o[j];
6463
}
6564
}

paddle/fluid/inference/api/demo_ci/vis_demo.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ limitations under the License. */
2020
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
2121
#include <fstream>
2222
#include <iostream>
23+
#include "paddle/fluid/inference/demo_ci/utils.h"
2324
#include "paddle/fluid/platform/enforce.h"
24-
#include "utils.h"
2525

2626
#ifdef PADDLE_WITH_CUDA
2727
DECLARE_double(fraction_of_gpu_memory_to_use);

0 commit comments

Comments
 (0)