Skip to content

Commit 7ad182e

Browse files
committed
Cherry-Pick from 16662 : Anakin subgraph cpu support
1 parent 8643dbc commit 7ad182e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1149
-680
lines changed

cmake/anakin_subgraph.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ endif()
2525

2626
if(ANAKIN_FOUND)
2727
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
28+
include_directories(${ANAKIN_ROOT})
2829
include_directories(${ANAKIN_ROOT}/include)
29-
include_directories(${ANAKIN_ROOT}/include/saber)
30+
include_directories(${ANAKIN_ROOT}/saber)
3031
link_directories(${ANAKIN_ROOT})
3132
add_definitions(-DPADDLE_WITH_ANAKIN)
3233
endif()

paddle/fluid/inference/anakin/convert/activation.cc

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,41 +16,45 @@
1616
#include <algorithm>
1717
#include <map>
1818

19-
using anakin::graph::GraphGlobalMem;
20-
using anakin::AK_FLOAT;
21-
using anakin::saber::NV;
22-
using anakin::saber::Shape;
23-
2419
namespace paddle {
2520
namespace inference {
2621
namespace anakin {
2722

28-
ActivationOpConverter::ActivationOpConverter(const std::string &op_type)
23+
template <typename TargetT>
24+
ActivationOpConverter<TargetT>::ActivationOpConverter(
25+
const std::string &op_type)
2926
: op_type_(op_type) {
3027
auto it = anakin_op_types_.find(op_type_);
3128
PADDLE_ENFORCE(it != anakin_op_types_.end(),
3229
"activation op type is not support");
3330
anakin_op_type_ = it->second;
3431
}
3532

36-
void ActivationOpConverter::operator()(const framework::proto::OpDesc &op,
37-
const framework::BlockDesc &block_desc,
38-
const framework::Scope &scope,
39-
bool test_mode) {
33+
template <typename TargetT>
34+
void ActivationOpConverter<TargetT>::operator()(
35+
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
36+
const framework::Scope &scope, bool test_mode) {
4037
framework::OpDesc op_desc(op, nullptr);
4138
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
4239
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
4340

4441
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
4542
auto input_name = op_desc.Input("X").front();
4643
auto output_name = op_desc.Output("Out").front();
47-
engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
48-
engine_->AddOpAttr(op_name, "type", anakin_op_type_);
44+
this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
45+
this->engine_->AddOpAttr(op_name, "type", anakin_op_type_);
4946
}
5047

5148
} // namespace anakin
5249
} // namespace inference
5350
} // namespace paddle
5451

55-
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
56-
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
52+
#ifdef PADDLE_WITH_CUDA
53+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid,
54+
SigmoidOpConverter<::anakin::saber::NV>);
55+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::NV>);
56+
#endif
57+
58+
REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid,
59+
SigmoidOpConverter<::anakin::saber::X86>);
60+
REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::X86>);

paddle/fluid/inference/anakin/convert/activation.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ namespace paddle {
2222
namespace inference {
2323
namespace anakin {
2424

25-
class ActivationOpConverter : public AnakinOpConverter {
25+
template <typename TargetT>
26+
class ActivationOpConverter : public AnakinOpConverter<TargetT> {
2627
public:
2728
explicit ActivationOpConverter(const std::string &op_type);
2829

@@ -39,14 +40,16 @@ class ActivationOpConverter : public AnakinOpConverter {
3940
{"sigmoid", "Sigmoid"}};
4041
};
4142

42-
class TanhOpConverter : public ActivationOpConverter {
43+
template <typename TargetT>
44+
class TanhOpConverter : public ActivationOpConverter<TargetT> {
4345
public:
44-
TanhOpConverter() : ActivationOpConverter("tanh") {}
46+
TanhOpConverter() : ActivationOpConverter<TargetT>("tanh") {}
4547
};
4648

47-
class SigmoidOpConverter : public ActivationOpConverter {
49+
template <typename TargetT>
50+
class SigmoidOpConverter : public ActivationOpConverter<TargetT> {
4851
public:
49-
SigmoidOpConverter() : ActivationOpConverter("sigmoid") {}
52+
SigmoidOpConverter() : ActivationOpConverter<TargetT>("sigmoid") {}
5053
};
5154
} // namespace anakin
5255
} // namespace inference

paddle/fluid/inference/anakin/convert/affine_channel.cc

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,16 @@
1818
#include <vector>
1919

2020
using anakin::graph::GraphGlobalMem;
21+
using anakin::PTuple;
2122
using anakin::AK_FLOAT;
22-
using anakin::Precision;
23-
using anakin::saber::NV;
24-
using anakin::saber::X86;
2523
using anakin::saber::Shape;
26-
using anakin::PBlock;
27-
using anakin::PTuple;
2824

2925
namespace paddle {
3026
namespace inference {
3127
namespace anakin {
3228

33-
void AffineChannelOpConverter::operator()(
29+
template <typename TargetT>
30+
void AffineChannelOpConverter<TargetT>::operator()(
3431
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
3532
const framework::Scope &scope, bool test_mode) {
3633
framework::OpDesc op_desc(op, nullptr);
@@ -59,42 +56,49 @@ void AffineChannelOpConverter::operator()(
5956
bias_tensor->Resize(bias_t->dims());
6057
TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get());
6158

62-
engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
59+
this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
6360

6461
// Generate the Scale parameter of Anakin.
6562
auto scale_shape = framework::vectorize2int(scale_t->dims());
6663
while (scale_shape.size() < 4) {
6764
scale_shape.insert(scale_shape.begin(), 1);
6865
}
6966
Shape anakin_scale_shape(scale_shape);
70-
auto *weight1 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
71-
anakin_scale_shape);
67+
auto *weight1 =
68+
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
69+
anakin_scale_shape);
7270
float *scale_cpu_data =
7371
static_cast<float *>(weight1->h_tensor().mutable_data());
7472
std::copy_n(scale_tensor->data<float>(), scale_tensor->numel(),
7573
scale_cpu_data);
7674
weight1->d_tensor().set_shape(anakin_scale_shape);
7775
weight1->d_tensor().copy_from(weight1->h_tensor());
78-
engine_->AddOpAttr(op_name, "weight_1", *weight1);
76+
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
7977

8078
// Generate the Bias parameter of Anakin.
8179
auto bias_shape = framework::vectorize2int(bias_t->dims());
8280
while (bias_shape.size() < 4) {
8381
bias_shape.insert(bias_shape.begin(), 1);
8482
}
8583
Shape anakin_bias_shape(bias_shape);
86-
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
87-
anakin_bias_shape);
84+
auto *weight2 =
85+
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
86+
anakin_bias_shape);
8887
float *bias_cpu_data =
8988
static_cast<float *>(weight2->h_tensor().mutable_data());
9089
std::copy_n(bias_tensor->data<float>(), bias_tensor->numel(), bias_cpu_data);
9190
weight2->d_tensor().set_shape(anakin_bias_shape);
9291
weight2->d_tensor().copy_from(weight2->h_tensor());
93-
engine_->AddOpAttr(op_name, "weight_2", *weight2);
92+
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
9493
}
9594

9695
} // namespace anakin
9796
} // namespace inference
9897
} // namespace paddle
9998

100-
REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
99+
#ifdef PADDLE_WITH_CUDA
100+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
101+
affine_channel, AffineChannelOpConverter<::anakin::saber::NV>);
102+
#endif
103+
REGISTER_CPU_ANAKIN_OP_CONVERTER(
104+
affine_channel, AffineChannelOpConverter<::anakin::saber::X86>);

paddle/fluid/inference/anakin/convert/affine_channel.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ namespace paddle {
2121
namespace inference {
2222
namespace anakin {
2323

24-
class AffineChannelOpConverter : public AnakinOpConverter {
24+
template <typename TargetT>
25+
class AffineChannelOpConverter : public AnakinOpConverter<TargetT> {
2526
public:
2627
AffineChannelOpConverter() = default;
2728

paddle/fluid/inference/anakin/convert/batch_norm.cc

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,16 @@
2121

2222
using anakin::graph::GraphGlobalMem;
2323
using anakin::AK_FLOAT;
24-
using anakin::saber::NV;
2524
using anakin::saber::Shape;
2625

2726
namespace paddle {
2827
namespace inference {
2928
namespace anakin {
3029

31-
void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
32-
const framework::BlockDesc &block_desc,
33-
const framework::Scope &scope,
34-
bool test_mode) {
30+
template <typename TargetT>
31+
void BatchNormOpConverter<TargetT>::operator()(
32+
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
33+
const framework::Scope &scope, bool test_mode) {
3534
framework::OpDesc op_desc(op, nullptr);
3635
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
3736
std::map<std::string, std::string> inputs;
@@ -48,9 +47,9 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
4847

4948
auto bn_op_name = op_name + ":bn";
5049
auto bn_output = bn_op_name + "_output";
51-
engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
52-
engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
53-
engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
50+
this->engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
51+
this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
52+
this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
5453

5554
auto scale_op_name = op_name + ":scale";
5655
auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name,
@@ -81,48 +80,54 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
8180
Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims())));
8281
Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims())));
8382
auto *weight1 =
84-
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape1);
83+
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape1);
8584
auto *mean_data = static_cast<float *>(weight1->h_tensor().mutable_data());
8685
std::copy_n(mean_t.data<float>(), mean_t.numel(), mean_data);
87-
engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
86+
this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
8887

8988
auto *weight2 =
90-
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape2);
89+
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape2);
9190
auto *variance_data =
9291
static_cast<float *>(weight2->h_tensor().mutable_data());
9392
std::copy_n(variance_t.data<float>(), variance_t.numel(), variance_data);
94-
engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
93+
this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
9594

9695
Shape shape3(std::vector<int>({1, 1, 1, 1}));
9796
auto *weight3 =
98-
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(shape3);
97+
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(shape3);
9998
auto *alpha_data = static_cast<float *>(weight3->h_tensor().mutable_data());
10099
float weight3_data[] = {1};
101100
std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data);
102-
engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
101+
this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
103102

104103
Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims())));
105-
auto *scale =
106-
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(scale_shape);
104+
auto *scale = GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
105+
scale_shape);
107106
auto *scale_data = static_cast<float *>(scale->h_tensor().mutable_data());
108107
std::copy_n(scale_t.data<float>(), scale_t.numel(), scale_data);
109108

110109
Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims())));
111-
auto *bias =
112-
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(bias_shape);
110+
auto *bias = GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
111+
bias_shape);
113112
auto *bias_data = static_cast<float *>(bias->h_tensor().mutable_data());
114113
std::copy_n(bias_t.data<float>(), bias_t.numel(), bias_data);
115114

116-
engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
117-
engine_->AddOpAttr(scale_op_name, "axis", 1);
118-
engine_->AddOpAttr(scale_op_name, "num_axes", 1);
119-
engine_->AddOpAttr(scale_op_name, "bias_term", true);
120-
engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
121-
engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
115+
this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
116+
this->engine_->AddOpAttr(scale_op_name, "axis", 1);
117+
this->engine_->AddOpAttr(scale_op_name, "num_axes", 1);
118+
this->engine_->AddOpAttr(scale_op_name, "bias_term", true);
119+
this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
120+
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
122121
}
123122

124123
} // namespace anakin
125124
} // namespace inference
126125
} // namespace paddle
127126

128-
REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);
127+
#ifdef PADDLE_WITH_CUDA
128+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(batch_norm,
129+
BatchNormOpConverter<::anakin::saber::NV>);
130+
#endif
131+
132+
REGISTER_CPU_ANAKIN_OP_CONVERTER(batch_norm,
133+
BatchNormOpConverter<::anakin::saber::X86>);

paddle/fluid/inference/anakin/convert/batch_norm.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ namespace paddle {
2020
namespace inference {
2121
namespace anakin {
2222

23-
class BatchNormOpConverter : public AnakinOpConverter {
23+
template <typename TargetT>
24+
class BatchNormOpConverter : public AnakinOpConverter<TargetT> {
2425
public:
2526
BatchNormOpConverter() = default;
2627

paddle/fluid/inference/anakin/convert/concat.cc

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,38 +15,32 @@
1515
#include "paddle/fluid/inference/anakin/convert/concat.h"
1616
#include <algorithm>
1717

18-
using anakin::graph::GraphGlobalMem;
19-
using anakin::AK_FLOAT;
20-
using anakin::Precision;
21-
using anakin::saber::NV;
22-
using anakin::saber::X86;
23-
using anakin::saber::Shape;
24-
using anakin::PBlock;
25-
using anakin::PTuple;
26-
2718
namespace paddle {
2819
namespace inference {
2920
namespace anakin {
3021

31-
void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
32-
const framework::BlockDesc &block_desc,
33-
const framework::Scope &scope,
34-
bool test_mode) {
22+
template <typename TargetT>
23+
void ConcatOpConverter<TargetT>::operator()(
24+
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
25+
const framework::Scope &scope, bool test_mode) {
3526
framework::OpDesc op_desc(op, nullptr);
3627
int axis = boost::get<int>(op_desc.GetAttr("axis"));
3728
auto input_names = op_desc.Input("X");
38-
// PADDLE_ENFORCE(axis > 0,
39-
// "The axis attr of Concat op should be large than 0 for trt");
4029

4130
auto y_name = op_desc.Output("Out").front();
4231
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
4332

44-
engine_->AddOp(op_name, "Concat", input_names, {y_name});
45-
engine_->AddOpAttr(op_name, "axis", axis);
33+
this->engine_->AddOp(op_name, "Concat", input_names, {y_name});
34+
this->engine_->AddOpAttr(op_name, "axis", axis);
4635
}
4736

4837
} // namespace anakin
4938
} // namespace inference
5039
} // namespace paddle
5140

52-
REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);
41+
#ifdef PADDLE_WITH_CUDA
42+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(concat,
43+
ConcatOpConverter<::anakin::saber::NV>);
44+
#endif
45+
REGISTER_CPU_ANAKIN_OP_CONVERTER(concat,
46+
ConcatOpConverter<::anakin::saber::X86>);

paddle/fluid/inference/anakin/convert/concat.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ namespace paddle {
2020
namespace inference {
2121
namespace anakin {
2222

23-
class ConcatOpConverter : public AnakinOpConverter {
23+
template <typename TargetT>
24+
class ConcatOpConverter : public AnakinOpConverter<TargetT> {
2425
public:
2526
ConcatOpConverter() = default;
2627

0 commit comments

Comments
 (0)