Skip to content

Commit e14ab18

Browse files
committed
Cherry-pick from 1662, 16797.. : add anakin int8 support
1 parent 7ad182e commit e14ab18

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+1103
-589
lines changed

paddle/fluid/framework/ir/fc_fuse_pass.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
4848
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
4949
GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
5050

51+
auto base_op_desc = *mul->Op()->Proto();
5152
// Create an FC Node.
52-
OpDesc desc;
53+
OpDesc desc(base_op_desc, nullptr);
5354
std::string fc_x_in = subgraph.at(x)->Name();
5455
std::string fc_Y_in = w->Name();
5556
std::string fc_bias_in = fc_bias->Name();

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,32 +1640,31 @@ PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
16401640
void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
16411641
const std::string &op_type,
16421642
const std::string &weight_name,
1643-
int times) {
1643+
int times,
1644+
const std::string &quant_type) {
16441645
const int kNumFields = 5;
16451646
const int kQuantizedWeightOffset = 0;
16461647
const int kQuantizedOpOffset = 1;
16471648
const int kQuantizedOpOutOffset = 2;
16481649
const int kDequantOpOffset = 3;
16491650
const int kDequantOpOutOffset = 4;
16501651
// the quant op always be one.
1651-
auto quant_op_in_scale =
1652-
pattern->NewNode(GetNodeName("quant_op_in_scale"))
1653-
->assert_is_op_input("fake_quantize_range_abs_max", "InScale")
1654-
->AsInput();
1655-
auto quant_op = pattern->NewNode(GetNodeName("quant_op"))
1656-
->assert_is_op("fake_quantize_range_abs_max");
1652+
auto quant_op_in_scale = pattern->NewNode(GetNodeName("quant_op_in_scale"))
1653+
->assert_is_op_input(quant_type, "InScale")
1654+
->AsInput();
1655+
auto quant_op =
1656+
pattern->NewNode(GetNodeName("quant_op"))->assert_is_op(quant_type);
16571657

16581658
auto quant_op_out_scale =
16591659
pattern->NewNode(GetNodeName("quant_op_out_scale"))
1660-
->assert_is_op_output("fake_quantize_range_abs_max", "OutScale")
1660+
->assert_is_op_output(quant_type, "OutScale")
16611661
->assert_is_op_input("fake_dequantize_max_abs", "Scale")
16621662
->AsIntermediate();
16631663

1664-
auto quant_op_out =
1665-
pattern->NewNode(GetNodeName("quant_op_out"))
1666-
->assert_is_op_output("fake_quantize_range_abs_max", "Out")
1667-
->assert_is_op_input(op_type)
1668-
->AsIntermediate();
1664+
auto quant_op_out = pattern->NewNode(GetNodeName("quant_op_out"))
1665+
->assert_is_op_output(quant_type, "Out")
1666+
->assert_is_op_input(op_type)
1667+
->AsIntermediate();
16691668

16701669
// there are 'times' quantized and dequant op
16711670
std::vector<PDNode *> nodes;

paddle/fluid/framework/ir/graph_pattern_detector.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,8 @@ struct QuantDequantOpFuse : public PatternBase {
880880
: PatternBase(pattern, name_scope, "quant_dequant_fuse") {}
881881

882882
void operator()(PDNode* quant_op_input, const std::string& op_name,
883-
const std::string& weight_name, int times = 1);
883+
const std::string& weight_name, int times,
884+
const std::string& quant_type);
884885

885886
std::string GetNodeName(const std::string& op_type) {
886887
return PDNodeName(name_scope_, repr_, id_, op_type);

paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ namespace framework {
2525
namespace ir {
2626

2727
void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
28-
std::string op_type) {
28+
const std::string& op_type,
29+
const std::string& quant_type) {
2930
const std::string pattern_name = "quant_dequant_fuse";
3031
// FusePassBase::Init(pattern_name, graph);
3132
const int kNumFields = 5;
@@ -38,14 +39,17 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
3839
GraphPatternDetector gpd;
3940
auto* x = gpd.mutable_pattern()
4041
->NewNode("x")
41-
->assert_is_op_input("fake_quantize_range_abs_max", "X")
42+
->assert_is_op_input(quant_type, "X")
4243
->AsInput();
4344

4445
std::string quantized_op_type = "";
4546
std::string weight_name = "";
4647
if (op_type == "conv2d") {
4748
quantized_op_type = "conv2d";
4849
weight_name = "Filter";
50+
} else if (op_type == "depthwise_conv2d") {
51+
quantized_op_type = "depthwise_conv2d";
52+
weight_name = "Filter";
4953
} else if (op_type == "conv2d_fusion") {
5054
quantized_op_type = "conv2d_fusion";
5155
weight_name = "Filter";
@@ -62,7 +66,7 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
6266
}
6367

6468
patterns::QuantDequantOpFuse pattern(gpd.mutable_pattern(), pattern_name);
65-
pattern(x, quantized_op_type, weight_name, times);
69+
pattern(x, quantized_op_type, weight_name, times, quant_type);
6670

6771
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
6872
Graph* g) {
@@ -103,7 +107,6 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
103107
std::unordered_set<const Node*> delete_nodes;
104108

105109
for (int i = 0; i < times; i++) {
106-
// max_range = (range * range) / weight_scale
107110
float max_range = boost::get<float>(
108111
nodes[i * kNumFields + kDequantOpOffset]->Op()->GetAttr("max_range"));
109112
float weight_scale = (range * range) / max_range;
@@ -118,7 +121,8 @@ void RunQuantDequant(ir::Graph* graph, Scope* scope, int times,
118121
new_op_desc.SetType(quantized_op_type);
119122

120123
if (quantized_op_type == "conv2d" ||
121-
quantized_op_type == "conv2d_fusion") {
124+
quantized_op_type == "conv2d_fusion" ||
125+
quantized_op_type == "depthwise_conv2d") {
122126
new_op_desc.SetInput("Input", {new_input});
123127
new_op_desc.SetOutput("Output", {new_output});
124128
} else if (quantized_op_type == "fc") {
@@ -156,11 +160,17 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
156160
const std::string pattern_name = "quant_dequant_fuse";
157161
FusePassBase::Init(pattern_name, graph);
158162

159-
std::unordered_set<std::string> quantized_op_types = {"conv2d", "mul"};
163+
std::unordered_set<std::string> quant_types = {
164+
"fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};
165+
166+
std::unordered_set<std::string> quantized_op_types = {"conv2d", "mul",
167+
"depthwise_conv2d"};
160168
auto* scope = param_scope();
161-
for (auto& op_type : quantized_op_types) {
162-
for (int i = 1; i <= 6; i++) {
163-
RunQuantDequant(graph, scope, i, op_type);
169+
for (auto& quant_type : quant_types) {
170+
for (auto& op_type : quantized_op_types) {
171+
for (int i = 6; i >= 1; i--) {
172+
RunQuantDequant(graph, scope, i, op_type, quant_type);
173+
}
164174
}
165175
}
166176
}

paddle/fluid/inference/anakin/convert/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry)
1+
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
2+
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
3+
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
4+
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
5+
roi_align.cc helper.cc DEPS anakin_engine framework_proto scope op_registry
6+
gtest)
27

38
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
49
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)

paddle/fluid/inference/anakin/convert/activation.cc

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ namespace paddle {
2020
namespace inference {
2121
namespace anakin {
2222

23-
template <typename TargetT>
24-
ActivationOpConverter<TargetT>::ActivationOpConverter(
23+
template <typename TargetT, ::anakin::Precision PrecisionT>
24+
ActivationOpConverter<TargetT, PrecisionT>::ActivationOpConverter(
2525
const std::string &op_type)
2626
: op_type_(op_type) {
2727
auto it = anakin_op_types_.find(op_type_);
@@ -30,8 +30,8 @@ ActivationOpConverter<TargetT>::ActivationOpConverter(
3030
anakin_op_type_ = it->second;
3131
}
3232

33-
template <typename TargetT>
34-
void ActivationOpConverter<TargetT>::operator()(
33+
template <typename TargetT, ::anakin::Precision PrecisionT>
34+
void ActivationOpConverter<TargetT, PrecisionT>::operator()(
3535
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
3636
const framework::Scope &scope, bool test_mode) {
3737
framework::OpDesc op_desc(op, nullptr);
@@ -50,11 +50,40 @@ void ActivationOpConverter<TargetT>::operator()(
5050
} // namespace paddle
5151

5252
#ifdef PADDLE_WITH_CUDA
53-
REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid,
54-
SigmoidOpConverter<::anakin::saber::NV>);
55-
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::NV>);
53+
using sigmoid_nv_fp32 =
54+
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV,
55+
::anakin::Precision::FP32>;
56+
using sigmoid_nv_int8 =
57+
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::NV,
58+
::anakin::Precision::INT8>;
59+
using tanh_nv_fp32 =
60+
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV,
61+
::anakin::Precision::FP32>;
62+
using tanh_nv_int8 =
63+
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::NV,
64+
::anakin::Precision::INT8>;
65+
66+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_fp32);
67+
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_nv_int8);
68+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(tanh, tanh_nv_fp32);
69+
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_nv_int8);
5670
#endif
5771

58-
REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid,
59-
SigmoidOpConverter<::anakin::saber::X86>);
60-
REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter<::anakin::saber::X86>);
72+
using sigmoid_cpu_fp32 =
73+
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86,
74+
::anakin::Precision::FP32>;
75+
using sigmoid_cpu_int8 =
76+
::paddle::inference::anakin::SigmoidOpConverter<::anakin::saber::X86,
77+
::anakin::Precision::INT8>;
78+
using tanh_cpu_fp32 =
79+
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86,
80+
::anakin::Precision::FP32>;
81+
using tanh_cpu_int8 =
82+
::paddle::inference::anakin::TanhOpConverter<::anakin::saber::X86,
83+
::anakin::Precision::INT8>;
84+
85+
REGISTER_CPU_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_fp32);
86+
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(sigmoid, sigmoid_cpu_int8);
87+
88+
REGISTER_CPU_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_fp32);
89+
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(tanh, tanh_cpu_int8);

paddle/fluid/inference/anakin/convert/activation.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ namespace paddle {
2222
namespace inference {
2323
namespace anakin {
2424

25-
template <typename TargetT>
26-
class ActivationOpConverter : public AnakinOpConverter<TargetT> {
25+
template <typename TargetT, ::anakin::Precision PrecisionT>
26+
class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
2727
public:
2828
explicit ActivationOpConverter(const std::string &op_type);
2929

@@ -40,16 +40,17 @@ class ActivationOpConverter : public AnakinOpConverter<TargetT> {
4040
{"sigmoid", "Sigmoid"}};
4141
};
4242

43-
template <typename TargetT>
44-
class TanhOpConverter : public ActivationOpConverter<TargetT> {
43+
template <typename TargetT, ::anakin::Precision PrecisionT>
44+
class TanhOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
4545
public:
46-
TanhOpConverter() : ActivationOpConverter<TargetT>("tanh") {}
46+
TanhOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("tanh") {}
4747
};
4848

49-
template <typename TargetT>
50-
class SigmoidOpConverter : public ActivationOpConverter<TargetT> {
49+
template <typename TargetT, ::anakin::Precision PrecisionT>
50+
class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
5151
public:
52-
SigmoidOpConverter() : ActivationOpConverter<TargetT>("sigmoid") {}
52+
SigmoidOpConverter()
53+
: ActivationOpConverter<TargetT, PrecisionT>("sigmoid") {}
5354
};
5455
} // namespace anakin
5556
} // namespace inference

paddle/fluid/inference/anakin/convert/affine_channel.cc

Lines changed: 24 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -16,79 +16,35 @@
1616
#include <algorithm>
1717
#include <string>
1818
#include <vector>
19-
20-
using anakin::graph::GraphGlobalMem;
21-
using anakin::PTuple;
22-
using anakin::AK_FLOAT;
23-
using anakin::saber::Shape;
19+
#include "paddle/fluid/inference/anakin/convert/helper.h"
2420

2521
namespace paddle {
2622
namespace inference {
2723
namespace anakin {
2824

29-
template <typename TargetT>
30-
void AffineChannelOpConverter<TargetT>::operator()(
25+
template <typename TargetT, ::anakin::Precision PrecisionT>
26+
void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
3127
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
3228
const framework::Scope &scope, bool test_mode) {
3329
framework::OpDesc op_desc(op, nullptr);
3430
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
3531
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
3632

3733
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
38-
3934
auto input_name = op_desc.Input("X").front();
4035
auto output_name = op_desc.Output("Out").front();
36+
this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
4137

4238
// Copy the Scale to CPUPlace and get the pointer.
4339
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
4440
PADDLE_ENFORCE_NOT_NULL(scale_v);
45-
auto *scale_t = scale_v->GetMutable<framework::LoDTensor>();
46-
std::unique_ptr<framework::LoDTensor> scale_tensor(
47-
new framework::LoDTensor());
48-
scale_tensor->Resize(scale_t->dims());
49-
TensorCopySync((*scale_t), platform::CPUPlace(), scale_tensor.get());
41+
auto weight1 = pblock_from_var<TargetT>(*scale_v);
42+
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
5043

5144
// Copy the Bias to CPUPlace and get the pointer.
5245
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
5346
PADDLE_ENFORCE_NOT_NULL(bias_v);
54-
auto *bias_t = bias_v->GetMutable<framework::LoDTensor>();
55-
std::unique_ptr<framework::LoDTensor> bias_tensor(new framework::LoDTensor());
56-
bias_tensor->Resize(bias_t->dims());
57-
TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get());
58-
59-
this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
60-
61-
// Generate the Scale parameter of Anakin.
62-
auto scale_shape = framework::vectorize2int(scale_t->dims());
63-
while (scale_shape.size() < 4) {
64-
scale_shape.insert(scale_shape.begin(), 1);
65-
}
66-
Shape anakin_scale_shape(scale_shape);
67-
auto *weight1 =
68-
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
69-
anakin_scale_shape);
70-
float *scale_cpu_data =
71-
static_cast<float *>(weight1->h_tensor().mutable_data());
72-
std::copy_n(scale_tensor->data<float>(), scale_tensor->numel(),
73-
scale_cpu_data);
74-
weight1->d_tensor().set_shape(anakin_scale_shape);
75-
weight1->d_tensor().copy_from(weight1->h_tensor());
76-
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
77-
78-
// Generate the Bias parameter of Anakin.
79-
auto bias_shape = framework::vectorize2int(bias_t->dims());
80-
while (bias_shape.size() < 4) {
81-
bias_shape.insert(bias_shape.begin(), 1);
82-
}
83-
Shape anakin_bias_shape(bias_shape);
84-
auto *weight2 =
85-
GraphGlobalMem<TargetT>::Global().template new_block<AK_FLOAT>(
86-
anakin_bias_shape);
87-
float *bias_cpu_data =
88-
static_cast<float *>(weight2->h_tensor().mutable_data());
89-
std::copy_n(bias_tensor->data<float>(), bias_tensor->numel(), bias_cpu_data);
90-
weight2->d_tensor().set_shape(anakin_bias_shape);
91-
weight2->d_tensor().copy_from(weight2->h_tensor());
47+
auto weight2 = pblock_from_var<TargetT>(*bias_v);
9248
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
9349
}
9450

@@ -97,8 +53,21 @@ void AffineChannelOpConverter<TargetT>::operator()(
9753
} // namespace paddle
9854

9955
#ifdef PADDLE_WITH_CUDA
100-
REGISTER_CUDA_ANAKIN_OP_CONVERTER(
101-
affine_channel, AffineChannelOpConverter<::anakin::saber::NV>);
56+
using affine_channel_nv_fp32 =
57+
::paddle::inference::anakin::AffineChannelOpConverter<
58+
::anakin::saber::NV, ::anakin::Precision::FP32>;
59+
using affine_channel_nv_int8 =
60+
::paddle::inference::anakin::AffineChannelOpConverter<
61+
::anakin::saber::NV, ::anakin::Precision::INT8>;
62+
REGISTER_CUDA_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_fp32);
63+
REGISTER_CUDA_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_nv_int8);
10264
#endif
103-
REGISTER_CPU_ANAKIN_OP_CONVERTER(
104-
affine_channel, AffineChannelOpConverter<::anakin::saber::X86>);
65+
66+
using affine_channel_cpu_fp32 =
67+
::paddle::inference::anakin::AffineChannelOpConverter<
68+
::anakin::saber::X86, ::anakin::Precision::FP32>;
69+
using affine_channel_cpu_int8 =
70+
::paddle::inference::anakin::AffineChannelOpConverter<
71+
::anakin::saber::X86, ::anakin::Precision::INT8>;
72+
REGISTER_CPU_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_fp32);
73+
REGISTER_CPU_INT8_ANAKIN_OP_CONVERTER(affine_channel, affine_channel_cpu_int8);

paddle/fluid/inference/anakin/convert/affine_channel.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ namespace paddle {
2121
namespace inference {
2222
namespace anakin {
2323

24-
template <typename TargetT>
25-
class AffineChannelOpConverter : public AnakinOpConverter<TargetT> {
24+
template <typename TargetT, ::anakin::Precision PrecisionT>
25+
class AffineChannelOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
2626
public:
2727
AffineChannelOpConverter() = default;
2828

0 commit comments

Comments
 (0)