Skip to content

Commit decda73

Browse files
authored
fea/anakin compile with demo (#12772)
* anakin support x86 * fix code style * add anakin ditu cnn demo * add timer * add rnn * fix inference_anakin_cnn/rnn_test compile error * make anakin_rnn_tester run * add anakin_enable_op_time option * update api/CMakeLists.txt * enlarge the max_batch_size in anakin.config * update with comments
1 parent bcaa1d5 commit decda73

File tree

7 files changed

+463
-42
lines changed

7 files changed

+463
-42
lines changed

cmake/external/anakin.cmake

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ if (NOT WITH_ANAKIN)
22
return()
33
endif()
44

5+
option(ANAKIN_ENABLE_OP_TIMER "Get more detailed information with Anakin op time" OFF)
6+
if(ANAKIN_ENABLE_OP_TIMER)
7+
add_definitions(-DPADDLE_ANAKIN_ENABLE_OP_TIMER)
8+
endif()
9+
510
INCLUDE(ExternalProject)
611
set(ANAKIN_SOURCE_DIR ${THIRD_PARTY_PATH}/anakin)
712
# the anakin install dir is only default one now
@@ -11,23 +16,34 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
1116
set(ANAKIN_SHARED_LIB ${ANAKIN_LIBRARY}/libanakin.so)
1217
set(ANAKIN_SABER_LIB ${ANAKIN_LIBRARY}/libanakin_saber_common.so)
1318

14-
# TODO(luotao): ANAKIN_MODLE_URL will move to demo ci later.
15-
set(ANAKIN_MODLE_URL "http://paddle-inference-dist.bj.bcebos.com/mobilenet_v2.anakin.bin")
19+
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
20+
set(INFERENCE_URL "http://paddle-inference-dist.bj.bcebos.com")
21+
set(ANAKIN_MODLE_URL "${INFERENCE_URL}/mobilenet_v2.anakin.bin")
22+
set(ANAKIN_RNN_MODLE_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn.anakin2.model.bin")
23+
set(ANAKIN_RNN_DATA_URL "${INFERENCE_URL}/anakin_test%2Fditu_rnn_data.txt")
1624
execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_SOURCE_DIR}")
17-
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL}")
25+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_MODLE_URL} -N")
26+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_MODLE_URL} -N")
27+
execute_process(COMMAND bash -c "cd ${ANAKIN_SOURCE_DIR}; wget -q --no-check-certificate ${ANAKIN_RNN_DATA_URL} -N")
1828

1929
include_directories(${ANAKIN_INCLUDE})
2030
include_directories(${ANAKIN_INCLUDE}/saber/)
31+
include_directories(${ANAKIN_INCLUDE}/saber/core/)
32+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/x86/)
33+
include_directories(${ANAKIN_INCLUDE}/saber/funcs/impl/cuda/base/cuda_c/)
2134

2235
set(ANAKIN_COMPILE_EXTRA_FLAGS
2336
-Wno-error=unused-but-set-variable -Wno-unused-but-set-variable
2437
-Wno-error=unused-variable -Wno-unused-variable
2538
-Wno-error=format-extra-args -Wno-format-extra-args
26-
-Wno-error=comment -Wno-comment
27-
-Wno-error=format -Wno-format
39+
-Wno-error=comment -Wno-comment
40+
-Wno-error=format -Wno-format
41+
-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized
2842
-Wno-error=switch -Wno-switch
2943
-Wno-error=return-type -Wno-return-type
3044
-Wno-error=non-virtual-dtor -Wno-non-virtual-dtor
45+
-Wno-error=ignored-qualifiers
46+
-Wno-ignored-qualifiers
3147
-Wno-sign-compare
3248
-Wno-reorder
3349
-Wno-error=cpp)
@@ -38,7 +54,7 @@ ExternalProject_Add(
3854
DEPENDS ${MKLML_PROJECT}
3955
# Anakin codes error on Intel(R) Xeon(R) Gold 5117 CPU, temporary do not compile avx512 related code.
4056
GIT_REPOSITORY "https://github.com/luotao1/Anakin"
41-
GIT_TAG "bcf17aabe7921ceb7bce591244b4f9dce7dba5c8"
57+
GIT_TAG "211d1fc5d813d70c0c14072f9083cf25f40940ea"
4258
PREFIX ${ANAKIN_SOURCE_DIR}
4359
UPDATE_COMMAND ""
4460
CMAKE_ARGS -DUSE_GPU_PLACE=YES
@@ -48,6 +64,7 @@ ExternalProject_Add(
4864
-DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml
4965
-DCUDNN_ROOT=${CUDNN_ROOT}
5066
-DCUDNN_INCLUDE_DIR=${CUDNN_INCLUDE_DIR}
67+
-DENABLE_OP_TIMER=${ANAKIN_ENABLE_OP_TIMER}
5168
${EXTERNAL_OPTIONAL_ARGS}
5269
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR}
5370
)

paddle/fluid/inference/api/CMakeLists.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,20 @@ endif()
6565

6666
if (WITH_ANAKIN AND WITH_GPU) # only needed in CI
6767
# compile the libinference_anakin_api.a and anakin.so.
68-
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber)
68+
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml)
6969
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber)
7070
function(anakin_target target_name)
7171
target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
7272
endfunction()
7373
anakin_target(inference_anakin_api)
7474
anakin_target(inference_anakin_api_shared)
7575
if (WITH_TESTING)
76-
cc_test(inference_anakin_test SRCS api_anakin_engine_tester.cc
76+
cc_test(api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
7777
ARGS --model=${ANAKIN_SOURCE_DIR}/mobilenet_v2.anakin.bin
78-
DEPS inference_anakin_api dynload_cuda SERIAL)
79-
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
78+
DEPS inference_anakin_api_shared dynload_cuda SERIAL)
79+
cc_test(api_anakin_engine_rnn_tester SRCS api_anakin_engine_rnn_tester.cc
80+
ARGS --model=${ANAKIN_SOURCE_DIR}/anakin_test%2Fditu_rnn.anakin2.model.bin
81+
--datapath=${ANAKIN_SOURCE_DIR}/anakin_test%2Fditu_rnn_data.txt
82+
DEPS inference_anakin_api_shared dynload_cuda SERIAL)
8083
endif(WITH_TESTING)
8184
endif()

paddle/fluid/inference/api/api.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2-
32
Licensed under the Apache License, Version 2.0 (the "License");
43
you may not use this file except in compliance with the License.
54
You may obtain a copy of the License at
6-
75
http://www.apache.org/licenses/LICENSE-2.0
8-
96
Unless required by applicable law or agreed to in writing, software
107
distributed under the License is distributed on an "AS IS" BASIS,
118
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

paddle/fluid/inference/api/api_anakin_engine.cc

Lines changed: 114 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,47 @@
1313
// limitations under the License.
1414

1515
#include "paddle/fluid/inference/api/api_anakin_engine.h"
16+
17+
#ifdef PADDLE_WITH_CUDA
1618
#include <cuda.h>
19+
#endif
20+
21+
#include <mkl_service.h>
22+
#include <omp.h>
23+
#include <map>
24+
#include <string>
25+
#include <utility>
1726
#include <vector>
1827

28+
#include "framework/core/net/net.h"
29+
#include "framework/operators/ops.h"
30+
#include "saber/funcs/timer.h"
31+
1932
namespace paddle {
2033

2134
template <typename Target>
2235
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
2336
const AnakinConfig &config) {
2437
CHECK(Init(config));
2538
}
26-
39+
template <>
40+
PaddleInferenceAnakinPredictor<anakin::X86>::PaddleInferenceAnakinPredictor(
41+
const AnakinConfig &config) {
42+
omp_set_dynamic(0);
43+
omp_set_num_threads(1);
44+
mkl_set_num_threads(1);
45+
CHECK(Init(config));
46+
}
2747
template <typename Target>
2848
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
2949
if (!(graph_.load(config.model_file))) {
30-
LOG(FATAL) << "fail to load graph from " << config.model_file;
50+
VLOG(3) << "fail to load graph from " << config.model_file;
3151
return false;
3252
}
3353
auto inputs = graph_.get_ins();
3454
for (auto &input_str : inputs) {
3555
graph_.ResetBatchSize(input_str, config.max_batch_size);
56+
max_batch_size_ = config.max_batch_size;
3657
}
3758
// optimization for graph
3859
if (!(graph_.Optimize())) {
@@ -52,15 +73,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
5273
std::vector<PaddleTensor> *output_data, int batch_size) {
5374
for (const auto &input : inputs) {
5475
if (input.dtype != PaddleDType::FLOAT32) {
55-
LOG(ERROR) << "Only support float type inputs. " << input.name
56-
<< "'s type is not float";
76+
VLOG(3) << "Only support float type inputs. " << input.name
77+
<< "'s type is not float";
5778
return false;
5879
}
5980
auto d_tensor_in_p = executor_p_->get_in(input.name);
60-
auto net_shape = d_tensor_in_p->valid_shape();
81+
auto net_shape = d_tensor_in_p->shape();
6182
if (net_shape.size() != input.shape.size()) {
62-
LOG(ERROR) << " input " << input.name
63-
<< "'s shape size should be equal to that of net";
83+
VLOG(3) << " input " << input.name
84+
<< "'s shape size should be equal to that of net";
6485
return false;
6586
}
6687
int sum = 1;
@@ -79,21 +100,45 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
79100
}
80101
d_tensor_in_p->reshape(tmp_shape);
81102

103+
if (input.lod.size() > 0) {
104+
if (input.lod.size() > 1) {
105+
VLOG(3) << " input lod first dim should <=1, but you set "
106+
<< input.lod.size();
107+
return false;
108+
}
109+
std::vector<int> offset(input.lod[0].begin(), input.lod[0].end());
110+
d_tensor_in_p->set_seq_offset(offset);
111+
VLOG(3) << "offset.size(): " << offset.size();
112+
for (int i = 0; i < offset.size(); i++) {
113+
VLOG(3) << offset[i];
114+
}
115+
}
116+
82117
float *d_data_p = d_tensor_in_p->mutable_data();
83-
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
84-
d_tensor_in_p->valid_size() * sizeof(float),
85-
cudaMemcpyHostToDevice) != 0) {
86-
LOG(ERROR) << "copy data from CPU to GPU error";
87-
return false;
118+
119+
#ifdef PADDLE_WITH_CUDA
120+
if (std::is_same<anakin::NV, Target>::value) {
121+
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
122+
d_tensor_in_p->valid_size() * sizeof(float),
123+
cudaMemcpyHostToDevice) != 0) {
124+
VLOG(3) << "copy data from CPU to GPU error";
125+
return false;
126+
}
127+
}
128+
#endif
129+
if (std::is_same<anakin::X86, Target>::value) {
130+
memcpy(d_data_p, static_cast<float *>(input.data.data()),
131+
d_tensor_in_p->valid_size() * sizeof(float));
88132
}
89-
cudaStreamSynchronize(NULL);
90133
}
134+
#ifdef PADDLE_WITH_CUDA
91135
cudaDeviceSynchronize();
92136
executor_p_->prediction();
93137
cudaDeviceSynchronize();
138+
#endif
94139

95140
if (output_data->empty()) {
96-
LOG(ERROR) << "At least one output should be set with tensors' names.";
141+
VLOG(3) << "At least one output should be set with tensors' names.";
97142
return false;
98143
}
99144
for (auto &output : *output_data) {
@@ -102,14 +147,22 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
102147
if (output.data.length() < tensor->valid_size() * sizeof(float)) {
103148
output.data.Resize(tensor->valid_size() * sizeof(float));
104149
}
105-
// Copy data from GPU -> CPU
106-
if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
107-
tensor->valid_size() * sizeof(float),
108-
cudaMemcpyDeviceToHost) != 0) {
109-
LOG(ERROR) << "copy data from GPU to CPU error";
110-
return false;
150+
151+
#if PADDLE_WITH_CUDA
152+
if (std::is_same<anakin::NV, Target>::value) {
153+
// Copy data from GPU -> CPU
154+
if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
155+
tensor->valid_size() * sizeof(float),
156+
cudaMemcpyDeviceToHost) != 0) {
157+
VLOG(3) << "copy data from GPU to CPU error";
158+
return false;
159+
}
160+
}
161+
#endif
162+
if (std::is_same<anakin::X86, Target>::value) {
163+
memcpy(output.data.data(), tensor->mutable_data(),
164+
tensor->valid_size() * sizeof(float));
111165
}
112-
cudaStreamSynchronize(NULL);
113166
}
114167
return true;
115168
}
@@ -132,7 +185,7 @@ PaddleInferenceAnakinPredictor<Target>::Clone() {
132185
auto anakin_predictor_p =
133186
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
134187
if (!anakin_predictor_p) {
135-
LOG(ERROR) << "fail to call Init";
188+
VLOG(3) << "fail to call Init";
136189
return nullptr;
137190
}
138191
anakin_predictor_p->get_executer().init(graph_);
@@ -162,6 +215,44 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
162215
VLOG(3) << "Anakin Predictor create on unknown platform.";
163216
return nullptr;
164217
}
165-
};
218+
}
219+
220+
#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER
221+
template <typename Target>
222+
using executor_t =
223+
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>;
224+
225+
template <typename Target>
226+
void DisplayOpTimer(executor_t<Target> *net_executor, int epoch) {
227+
std::vector<float> op_time = net_executor->get_op_time();
228+
auto exec_funcs = net_executor->get_exec_funcs();
229+
auto op_param = net_executor->get_op_param();
230+
for (int i = 0; i < op_time.size(); i++) {
231+
LOG(INFO) << "name: " << exec_funcs[i].name
232+
<< " op_type: " << exec_funcs[i].op_name
233+
<< " op_param: " << op_param[i] << " time " << op_time[i] / epoch;
234+
}
235+
std::map<std::string, float> op_map;
236+
for (int i = 0; i < op_time.size(); i++) {
237+
auto it = op_map.find(op_param[i]);
238+
if (it != op_map.end())
239+
op_map[op_param[i]] += op_time[i];
240+
else
241+
op_map.insert(std::pair<std::string, float>(op_param[i], op_time[i]));
242+
}
243+
for (auto it = op_map.begin(); it != op_map.end(); ++it) {
244+
LOG(INFO) << it->first << " " << (it->second) / epoch << " ms";
245+
}
246+
}
247+
#endif
248+
249+
template <typename Target>
250+
PaddleInferenceAnakinPredictor<Target>::~PaddleInferenceAnakinPredictor() {
251+
#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER
252+
DisplayOpTimer<Target>(executor_p_, max_batch_size_);
253+
#endif
254+
delete executor_p_;
255+
executor_p_ = nullptr;
256+
}
166257

167258
} // namespace paddle

paddle/fluid/inference/api/api_anakin_engine.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
4747
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>&
4848
get_executer();
4949

50-
~PaddleInferenceAnakinPredictor() override {
51-
delete executor_p_;
52-
executor_p_ = nullptr;
53-
};
50+
~PaddleInferenceAnakinPredictor() override;
5451

5552
private:
5653
bool Init(const AnakinConfig& config);
@@ -60,6 +57,7 @@ class PaddleInferenceAnakinPredictor : public PaddlePredictor {
6057
anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>*
6158
executor_p_{nullptr};
6259
AnakinConfig config_;
60+
int max_batch_size_{0};
6361
};
6462

6563
} // namespace paddle

0 commit comments

Comments
 (0)