Skip to content

Commit 9141bee

Browse files
panggeSuperjomn
authored andcommitted
add Anakin api for paddle (#11228)
1 parent d48172f commit 9141bee

File tree

5 files changed

+123
-38
lines changed

5 files changed

+123
-38
lines changed

paddle/contrib/inference/CMakeLists.txt

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,37 @@ set(ANAKIN_LIBRARY "" CACHE STRING "path of Anakin library")
2424
set(inference_deps paddle_inference_api paddle_fluid_api)
2525

2626
# if anakin is set enable anakin api implementation
27-
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
27+
if(ANAKIN_INCLUDE AND ANAKIN_LIBRARY)
2828
set(ANAKIN_FOUND ON)
2929
else()
3030
set(ANAKIN_FOUND OFF)
3131
endif()
3232

33+
function(fetch_include_recursively root_dir)
34+
if (IS_DIRECTORY ${root_dir})
35+
include_directories(${root_dir})
36+
endif()
37+
38+
file(GLOB ALL_SUB RELATIVE ${root_dir} ${root_dir}/*)
39+
foreach(sub ${ALL_SUB})
40+
if (IS_DIRECTORY ${root_dir}/${sub})
41+
fetch_include_recursively(${root_dir}/${sub})
42+
endif()
43+
endforeach()
44+
endfunction()
45+
3346
if (ANAKIN_FOUND)
3447
# Anakin's code style doesn't follow google c style.
35-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=comment
36-
-Wno-error=reorder
37-
-Wno-error=format
38-
-Wno-error=switch
39-
-Wno-error=return-type
40-
-Wno-error=non-virtual-dtor
41-
-Wno-error=cpp")
48+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp")
4249

4350
message(STATUS "Anakin for inference is enabled")
4451
message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
45-
include_directories("${ANAKIN_INCLUDE}")
46-
# Anakin's source path is a mass, need to set sub-directories trivially.
47-
include_directories("${ANAKIN_INCLUDE}/saber")
48-
link_directories("${ANAKIN_LIBRARY}")
52+
fetch_include_recursively(${ANAKIN_INCLUDE})
53+
54+
link_directories(${ANAKIN_LIBRARY})
4955

50-
nv_library(inference_anakin_api SRCS paddle_inference_api_anakin_engine.cc)
51-
target_link_libraries(inference_anakin_api anakin)
56+
nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc)
57+
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
5258
list(APPEND inference_deps inference_anakin_api)
5359
endif()
5460

@@ -73,7 +79,7 @@ function(inference_api_test TARGET_NAME)
7379
endfunction(inference_api_test)
7480

7581
cc_library(paddle_inference_api
76-
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
82+
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
7783
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
7884

7985
cc_test(test_paddle_inference_api
@@ -84,8 +90,8 @@ inference_api_test(test_paddle_inference_api_impl
8490
ARGS test_word2vec test_image_classification)
8591

8692
if (ANAKIN_FOUND)
87-
nv_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
88-
DEPS ${inference_deps} protobuf)
93+
cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc
94+
DEPS ${inference_deps})
8995
endif()
9096

9197
if(WITH_TESTING)

paddle/contrib/inference/paddle_inference_api.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,4 @@ struct AnakinConfig : public PaddlePredictor::Config {
113113
// Similarly, each engine kind should map to a unique predictor implementation.
114114
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
115115
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
116-
117116
} // namespace paddle

paddle/contrib/inference/paddle_inference_api_anakin_engine.cc

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,16 @@ PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor(
2424
}
2525

2626
bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) {
27-
// TODO(Superjomn) Tell anakin to support return code.
28-
engine_.Build(config.model_file, config.max_batch_size);
27+
if (!(graph_.load(config.model_file))) {
28+
return false;
29+
}
30+
graph_.ResetBatchSize("input_0", config.max_batch_size);
31+
// optimization for graph
32+
if (!(graph_.Optimize())) {
33+
return false;
34+
}
35+
// construct executer
36+
executor_.init(graph_);
2937
return true;
3038
}
3139

@@ -38,24 +46,30 @@ bool PaddleInferenceAnakinPredictor::Run(
3846
<< "'s type is not float";
3947
return false;
4048
}
41-
engine_.SetInputFromCPU(
42-
input.name, static_cast<float *>(input.data.data), input.data.length);
49+
auto d_tensor_in_p = executor_.get_in(input.name);
50+
float *d_data_p = d_tensor_in_p->mutable_data();
51+
if (cudaMemcpy(d_data_p,
52+
static_cast<float *>(input.data.data),
53+
d_tensor_in_p->valid_size() * sizeof(float),
54+
cudaMemcpyHostToDevice) != 0) {
55+
LOG(ERROR) << "copy data from CPU to GPU error";
56+
return false;
57+
}
4358
}
4459

45-
// TODO(Superjomn) Tell anakin to support return code.
46-
engine_.Execute();
60+
executor_.prediction();
4761

4862
if (output_data->empty()) {
4963
LOG(ERROR) << "At least one output should be set with tensors' names.";
5064
return false;
5165
}
5266
for (auto &output : *output_data) {
53-
auto *tensor = engine_.GetOutputInGPU(output.name);
67+
auto *tensor = executor_.get_out(output.name);
5468
output.shape = tensor->shape();
5569
// Copy data from GPU -> CPU
5670
if (cudaMemcpy(output.data.data,
57-
tensor->data(),
58-
tensor->size(),
71+
tensor->mutable_data(),
72+
tensor->valid_size() * sizeof(float),
5973
cudaMemcpyDeviceToHost) != 0) {
6074
LOG(ERROR) << "copy data from GPU to CPU error";
6175
return false;
@@ -64,16 +78,34 @@ bool PaddleInferenceAnakinPredictor::Run(
6478
return true;
6579
}
6680

67-
// TODO(Superjomn) To implement latter.
81+
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
82+
&PaddleInferenceAnakinPredictor::get_executer() {
83+
return executor_;
84+
}
85+
86+
// the cloned new Predictor of anakin share the same net weights from original
87+
// Predictor
6888
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinPredictor::Clone() {
69-
return nullptr;
89+
VLOG(3) << "Anakin Predictor::clone";
90+
std::unique_ptr<PaddlePredictor> cls(new PaddleInferenceAnakinPredictor());
91+
// construct executer from other graph
92+
auto anakin_predictor_p =
93+
dynamic_cast<PaddleInferenceAnakinPredictor *>(cls.get());
94+
if (!anakin_predictor_p) {
95+
LOG(ERROR) << "fail to call Init";
96+
return nullptr;
97+
}
98+
anakin_predictor_p->get_executer().init(graph_);
99+
100+
return std::move(cls);
70101
}
71102

72103
// A factory to help create difference predictor.
73104
template <>
74105
std::unique_ptr<PaddlePredictor>
75106
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(
76107
const AnakinConfig &config) {
108+
VLOG(3) << "Anakin Predictor create.";
77109
std::unique_ptr<PaddlePredictor> x(
78110
new PaddleInferenceAnakinPredictor(config));
79111
return x;

paddle/contrib/inference/paddle_inference_api_anakin_engine.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,42 @@ limitations under the License. */
2020
#pragma once
2121

2222
// NOTE This header file do not have namespace.
23-
// TODO(Superjomn) Tell Anakin to provide better APIs.
24-
#include <test/framework/net/paddle_api.h>
23+
//#include <test/framework/net/paddle_api.h>
2524
#include "paddle/contrib/inference/paddle_inference_api.h"
2625

26+
#include "framework/core/net/net.h"
27+
#include "saber/saber_types.h"
28+
2729
namespace paddle {
2830

2931
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
3032
public:
33+
PaddleInferenceAnakinPredictor() {}
34+
3135
PaddleInferenceAnakinPredictor(const AnakinConfig& config);
3236

3337
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
3438
// should be allocated first.
35-
// TODO(Superjomn) should unify all the behaviors of output_data accross all
36-
// the engines.
3739
bool Run(const std::vector<PaddleTensor>& inputs,
3840
std::vector<PaddleTensor>* output_data) override;
3941

4042
std::unique_ptr<PaddlePredictor> Clone() override;
4143

44+
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
45+
get_executer();
46+
47+
~PaddleInferenceAnakinPredictor() override{};
48+
4249
private:
4350
bool Init(const AnakinConfig& config);
4451

45-
anakin::AnakinEngine<anakin::NV,
52+
anakin::graph::Graph<anakin::NV,
4653
anakin::saber::AK_FLOAT,
4754
anakin::Precision::FP32>
48-
engine_;
55+
graph_;
56+
anakin::Net<anakin::NV, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
57+
executor_;
58+
AnakinConfig config_;
4959
};
5060

5161
} // namespace paddle

paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/contrib/inference/paddle_inference_api.h"
15+
#include <glog/logging.h>
1616
#include <gtest/gtest.h>
1717

18+
#include "gflags/gflags.h"
19+
#include "paddle/contrib/inference/paddle_inference_api.h"
20+
1821
namespace paddle {
1922

20-
TEST(inference, anakin) {
23+
AnakinConfig GetConfig() {
2124
AnakinConfig config;
25+
config.model_file = "./mobilenet_v2.anakin.bin";
26+
config.device = 0;
27+
config.max_batch_size = 1;
28+
return config;
29+
}
2230

23-
auto engine =
31+
TEST(inference, anakin) {
32+
AnakinConfig config = GetConfig();
33+
auto predictor =
2434
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
35+
36+
float data[1 * 3 * 224 * 224] = {1.0f};
37+
38+
PaddleBuf buf{.data = data, .length = sizeof(data)};
39+
PaddleTensor tensor{.name = "input_0",
40+
.shape = std::vector<int>({1, 3, 224, 224}),
41+
.data = buf,
42+
.dtype = PaddleDType::FLOAT32};
43+
44+
// For simplicity, we set all the slots with the same data.
45+
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
46+
47+
float data_out[1000];
48+
49+
PaddleBuf buf_out{.data = data_out, .length = sizeof(data)};
50+
PaddleTensor tensor_out{.name = "prob_out",
51+
.shape = std::vector<int>({1000, 1}),
52+
.data = buf_out,
53+
.dtype = PaddleDType::FLOAT32};
54+
55+
std::vector<PaddleTensor> outputs(1, tensor_out);
56+
57+
ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
58+
59+
float* data_o = static_cast<float*>(outputs[0].data.data);
60+
for (size_t j = 0; j < 1000; ++j) {
61+
LOG(INFO) << "output[" << j << "]: " << data_o[j];
62+
}
2563
}
2664

2765
} // namespace paddle

0 commit comments

Comments
 (0)