Skip to content

Commit 4b9fa42

Browse files
committed
Cherry-pick from 16813 : change singleton to graph RegistBlock
test=release/1.4
1 parent e14ab18 commit 4b9fa42

File tree

12 files changed

+86
-52
lines changed

12 files changed

+86
-52
lines changed

paddle/fluid/framework/ir/fc_fuse_pass.cc

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,18 +48,37 @@ void FCFusePass::ApplyImpl(ir::Graph* graph) const {
4848
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
4949
GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern);
5050

51-
auto base_op_desc = *mul->Op()->Proto();
51+
auto base_op_desc = mul->Op();
5252
// Create an FC Node.
53-
OpDesc desc(base_op_desc, nullptr);
53+
// OpDesc desc(base_op_desc, nullptr);
54+
OpDesc desc;
5455
std::string fc_x_in = subgraph.at(x)->Name();
5556
std::string fc_Y_in = w->Name();
5657
std::string fc_bias_in = fc_bias->Name();
5758
std::string fc_out_out = fc_out->Name();
59+
5860
desc.SetInput("Input", std::vector<std::string>({fc_x_in}));
5961
desc.SetInput("W", std::vector<std::string>({fc_Y_in}));
6062
desc.SetInput("Bias", std::vector<std::string>({fc_bias_in}));
6163
desc.SetOutput("Out", std::vector<std::string>({fc_out_out}));
6264
desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims"));
65+
66+
// For anakin subgraph int8
67+
// When in anakin subgraph int8 mode, the pattern like "fake_quant + mul +
68+
// fake_dequant"
69+
// can be detected by the quant_dequant_fuse_pass. This pass will add
70+
// "input_scale",
71+
// "weight_scale" which are extracted from fake_quant op and fake_dequant op
72+
// to mul op,
73+
// and then delete the fake_quant op and fake_dequant op in the graph. If
74+
// the mul op
75+
// has the scale info, we should add those to the fused fc.
76+
if (base_op_desc->HasAttr("enable_int8")) {
77+
desc.SetAttr("enable_int8", base_op_desc->GetAttr("enable_int8"));
78+
desc.SetAttr("input_scale", base_op_desc->GetAttr("input_scale"));
79+
desc.SetAttr("weight_scale", base_op_desc->GetAttr("weight_scale"));
80+
}
81+
6382
desc.SetType("fc");
6483
auto fc_node = g->CreateOpNode(&desc); // OpDesc will be copied.
6584
GraphSafeRemoveNodes(graph, {mul, elementwise_add, mul_out});

paddle/fluid/inference/anakin/convert/affine_channel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
3838
// Copy the Scale to CPUPlace and get the pointer.
3939
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
4040
PADDLE_ENFORCE_NOT_NULL(scale_v);
41-
auto weight1 = pblock_from_var<TargetT>(*scale_v);
41+
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
4242
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
4343

4444
// Copy the Bias to CPUPlace and get the pointer.
4545
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
4646
PADDLE_ENFORCE_NOT_NULL(bias_v);
47-
auto weight2 = pblock_from_var<TargetT>(*bias_v);
47+
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
4848
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
4949
}
5050

paddle/fluid/inference/anakin/convert/batch_norm.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,25 +54,27 @@ void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
5454

5555
auto *mean_v = scope.FindVar(op_desc.Input("Mean").front());
5656
PADDLE_ENFORCE_NOT_NULL(mean_v);
57-
auto weight1 = pblock_from_var<TargetT>(*mean_v);
57+
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*mean_v, this->engine_);
5858
this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
5959

6060
auto *variance_v = scope.FindVar(op_desc.Input("Variance").front());
6161
PADDLE_ENFORCE_NOT_NULL(variance_v);
62-
auto weight2 = pblock_from_var<TargetT>(*variance_v);
62+
auto weight2 =
63+
pblock_from_var<TargetT, PrecisionT>(*variance_v, this->engine_);
6364
this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
6465

65-
auto *weight3 = pblock_from_vector<TargetT>(std::vector<float>({1}));
66+
auto *weight3 = pblock_from_vector<TargetT, PrecisionT>(
67+
std::vector<float>({1}), this->engine_);
6668
this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
6769

6870
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
6971
PADDLE_ENFORCE_NOT_NULL(scale_v);
70-
auto scale = pblock_from_var<TargetT>(*scale_v);
72+
auto scale = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
7173
this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
7274

7375
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
7476
PADDLE_ENFORCE_NOT_NULL(bias_v);
75-
auto bias = pblock_from_var<TargetT>(*bias_v);
77+
auto bias = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
7678
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
7779
}
7880

paddle/fluid/inference/anakin/convert/conv2d.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
7171
const float int8_range = 127.;
7272
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
7373
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
74-
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global()
75-
.template new_block<::anakin::AK_INT8>(anakin_shape);
74+
PBlock<TargetT> *weight1 =
75+
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
76+
this->engine_->RegistBlock(weight1);
7677
float *weight_data = weight_tensor->data<float>();
7778
std::vector<char> weight_int8;
7879
int weight_num = weight_tensor->numel();
@@ -94,7 +95,8 @@ void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
9495
{weight_scale / int8_range}, false);
9596
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
9697
} else {
97-
auto *weight1 = pblock_from_tensor<TargetT>(*weight_tensor, weight_shape);
98+
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
99+
*weight_tensor, weight_shape, this->engine_);
98100
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
99101
}
100102
}

paddle/fluid/inference/anakin/convert/conv2d_fusion.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
7373
const float int8_range = 127.;
7474
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
7575
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
76-
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global()
77-
.template new_block<::anakin::AK_INT8>(anakin_shape);
76+
PBlock<TargetT> *weight1 =
77+
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
78+
this->engine_->RegistBlock(weight1);
7879
float *weight_data = weight_tensor->data<float>();
7980
std::vector<char> weight_int8;
8081
int weight_num = weight_tensor->numel();
@@ -98,9 +99,10 @@ void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
9899
} else {
99100
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
100101
auto weight_shape = framework::vectorize2int(weight_tensor->dims());
101-
auto *weight1 = pblock_from_tensor<TargetT>(*weight_tensor, weight_shape);
102+
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
103+
*weight_tensor, weight_shape, this->engine_);
102104
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
103-
auto weight2 = pblock_from_var<TargetT>(*b_v);
105+
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
104106
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
105107
}
106108
}

paddle/fluid/inference/anakin/convert/dropout.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ void DropoutOpConverter<TargetT, PrecisionT>::operator()(
3939

4040
auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
4141
auto factor = 1 - dropout_prob;
42-
auto *weight1 = pblock_from_vector<TargetT>(std::vector<float>({factor}));
42+
auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(
43+
std::vector<float>({factor}), this->engine_);
4344

4445
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
4546
this->engine_->AddOpAttr(op_name, "axis", 0);

paddle/fluid/inference/anakin/convert/fc.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,9 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
7777
const float int8_range = 127.;
7878
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
7979
float weight_scale = boost::get<float>(op_desc.GetAttr("weight_scale"));
80-
auto *weight1 = ::anakin::graph::GraphGlobalMem<TargetT>::Global()
81-
.template new_block<::anakin::AK_INT8>(anakin_shape);
80+
PBlock<TargetT> *weight1 =
81+
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
82+
this->engine_->RegistBlock(weight1);
8283
std::vector<char> weight_int8;
8384
for (int i = 0; i < weight_num; i++) {
8485
bool is_valid_int8 =
@@ -98,15 +99,16 @@ void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
9899
{weight_scale / int8_range}, false);
99100
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
100101
} else {
101-
auto *weight1 = pblock_from_vector<TargetT>(trans_weight_data);
102+
auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(trans_weight_data,
103+
this->engine_);
102104
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
103105
}
104106

105107
// get bias
106108
if (with_bias) {
107109
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
108110
PADDLE_ENFORCE_NOT_NULL(b_v);
109-
auto weight2 = pblock_from_var<TargetT>(*b_v);
111+
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
110112
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
111113
}
112114
}

paddle/fluid/inference/anakin/convert/helper.h

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "paddle/fluid/framework/lod_tensor.h"
2222
#include "paddle/fluid/framework/variable.h"
23+
#include "paddle/fluid/inference/anakin/engine.h"
2324

2425
#include "framework/core/net/net.h"
2526
#include "framework/core/types.h"
@@ -29,58 +30,64 @@
2930

3031
using anakin::saber::Shape;
3132
using anakin::AK_FLOAT;
33+
using anakin::AK_INT8;
3234
using anakin::PBlock;
33-
using anakin::graph::GraphGlobalMem;
3435

3536
namespace paddle {
3637
namespace inference {
3738
namespace anakin {
3839

3940
std::unique_ptr<framework::LoDTensor> tensor_from_var(
4041
const framework::Variable& var, const platform::Place& place);
41-
template <typename T>
42-
PBlock<T>* pblock_from_tensor(const framework::LoDTensor& tensor,
43-
std::vector<int> shape) {
44-
while (shape.size() < 4) {
45-
shape.insert(shape.begin(), 1);
42+
43+
template <typename TargetT, ::anakin::Precision PrecisionT>
44+
PBlock<TargetT>* pblock_from_tensor(const framework::LoDTensor& tensor,
45+
std::vector<int> shape_vec,
46+
AnakinEngine<TargetT, PrecisionT>* engine) {
47+
while (shape_vec.size() < 4) {
48+
shape_vec.insert(shape_vec.begin(), 1);
4649
}
47-
Shape anakin_shape(shape);
48-
auto* weight =
49-
GraphGlobalMem<T>::Global().template new_block<AK_FLOAT>(anakin_shape);
50+
Shape shape(shape_vec);
51+
PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
52+
engine->RegistBlock(weight);
5053
float* cpu_data = static_cast<float*>(weight->h_tensor().mutable_data());
5154
std::copy_n(tensor.data<float>(), tensor.numel(), cpu_data);
52-
weight->d_tensor().set_shape(anakin_shape);
55+
weight->d_tensor().set_shape(shape);
5356
weight->d_tensor().copy_from(weight->h_tensor());
5457
return weight;
5558
}
5659

57-
template <typename T>
58-
PBlock<T>* pblock_from_vector(const std::vector<float>& vec,
59-
std::vector<int> shape_vec) {
60+
template <typename TargetT, ::anakin::Precision PrecisionT>
61+
PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
62+
std::vector<int> shape_vec,
63+
AnakinEngine<TargetT, PrecisionT>* engine) {
6064
while (shape_vec.size() < 4) {
6165
shape_vec.insert(shape_vec.begin(), 1);
6266
}
6367
Shape shape(shape_vec);
64-
auto* weight =
65-
GraphGlobalMem<T>::Global().template new_block<AK_FLOAT>(shape);
68+
PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
69+
engine->RegistBlock(weight);
6670
auto* weight_data = static_cast<float*>(weight->h_tensor().mutable_data());
6771
std::copy(std::begin(vec), std::end(vec), weight_data);
6872
weight->d_tensor().set_shape(shape);
6973
weight->d_tensor().copy_from(weight->h_tensor());
7074
return weight;
7175
}
7276

73-
template <typename T>
74-
PBlock<T>* pblock_from_vector(const std::vector<float>& vec) {
77+
template <typename TargetT, ::anakin::Precision PrecisionT>
78+
PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
79+
AnakinEngine<TargetT, PrecisionT>* engine) {
7580
int size = vec.size();
76-
return pblock_from_vector<T>(vec, std::vector<int>({1, 1, 1, size}));
81+
return pblock_from_vector<TargetT, PrecisionT>(
82+
vec, std::vector<int>({1, 1, 1, size}), engine);
7783
}
7884

79-
template <typename T>
80-
PBlock<T>* pblock_from_var(const framework::Variable& var) {
85+
template <typename TargetT, ::anakin::Precision PrecisionT>
86+
PBlock<TargetT>* pblock_from_var(const framework::Variable& var,
87+
AnakinEngine<TargetT, PrecisionT>* engine) {
8188
auto tensor = tensor_from_var(var, platform::CPUPlace());
8289
auto shape = framework::vectorize2int(tensor->dims());
83-
return pblock_from_tensor<T>(*tensor, shape);
90+
return pblock_from_tensor<TargetT, PrecisionT>(*tensor, shape, engine);
8491
}
8592

8693
} // namespace anakin

paddle/fluid/inference/anakin/engine.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
162162
PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
163163
}
164164

165+
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
166+
void AnakinEngine<TargetT, PrecisionType, RunType>::RegistBlock(
167+
::anakin::PBlock<TargetT> *block_p) {
168+
PADDLE_ENFORCE(graph_->RegistBlock(block_p), "Block register.");
169+
}
170+
165171
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
166172
std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
167173
AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {

paddle/fluid/inference/anakin/engine.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class AnakinEngine {
9090
int GetMaxBatchSize() { return max_batch_size_; }
9191
void Freeze();
9292
void Optimize();
93+
void RegistBlock(::anakin::PBlock<TargetT> *block_p);
9394
void Save(std::string path) { graph_->save(path); }
9495
bool IsInit() { return initialized_; }
9596
int GetDevice() { return device_; }

0 commit comments

Comments
 (0)