Skip to content

Commit 8643dbc

Browse files
committed
cherry-pick from 16691:Anakin subgraph support yolo_v3 and faster-rcnn
1 parent 463f88a commit 8643dbc

20 files changed

+382
-35
lines changed

paddle/fluid/inference/anakin/convert/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry)
1+
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc roi_align.cc DEPS anakin_engine framework_proto scope op_registry)
22

33
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL)
44
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL)
@@ -14,5 +14,5 @@ cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter fla
1414
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL)
1515
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL)
1616
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL)
17-
#cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col)
1817
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor SERIAL)
18+
cc_test(test_anakin_affine_channel SRCS test_affine_channel_op.cc DEPS anakin_op_converter affine_channel_op SERIAL)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
16+
#include <algorithm>
17+
#include <string>
18+
#include <vector>
19+
20+
using anakin::graph::GraphGlobalMem;
21+
using anakin::AK_FLOAT;
22+
using anakin::Precision;
23+
using anakin::saber::NV;
24+
using anakin::saber::X86;
25+
using anakin::saber::Shape;
26+
using anakin::PBlock;
27+
using anakin::PTuple;
28+
29+
namespace paddle {
30+
namespace inference {
31+
namespace anakin {
32+
33+
void AffineChannelOpConverter::operator()(
34+
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
35+
const framework::Scope &scope, bool test_mode) {
36+
framework::OpDesc op_desc(op, nullptr);
37+
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
38+
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
39+
40+
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
41+
42+
auto input_name = op_desc.Input("X").front();
43+
auto output_name = op_desc.Output("Out").front();
44+
45+
// Copy the Scale to CPUPlace and get the pointer.
46+
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
47+
PADDLE_ENFORCE_NOT_NULL(scale_v);
48+
auto *scale_t = scale_v->GetMutable<framework::LoDTensor>();
49+
std::unique_ptr<framework::LoDTensor> scale_tensor(
50+
new framework::LoDTensor());
51+
scale_tensor->Resize(scale_t->dims());
52+
TensorCopySync((*scale_t), platform::CPUPlace(), scale_tensor.get());
53+
54+
// Copy the Bias to CPUPlace and get the pointer.
55+
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
56+
PADDLE_ENFORCE_NOT_NULL(bias_v);
57+
auto *bias_t = bias_v->GetMutable<framework::LoDTensor>();
58+
std::unique_ptr<framework::LoDTensor> bias_tensor(new framework::LoDTensor());
59+
bias_tensor->Resize(bias_t->dims());
60+
TensorCopySync((*bias_t), platform::CPUPlace(), bias_tensor.get());
61+
62+
engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
63+
64+
// Generate the Scale parameter of Anakin.
65+
auto scale_shape = framework::vectorize2int(scale_t->dims());
66+
while (scale_shape.size() < 4) {
67+
scale_shape.insert(scale_shape.begin(), 1);
68+
}
69+
Shape anakin_scale_shape(scale_shape);
70+
auto *weight1 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
71+
anakin_scale_shape);
72+
float *scale_cpu_data =
73+
static_cast<float *>(weight1->h_tensor().mutable_data());
74+
std::copy_n(scale_tensor->data<float>(), scale_tensor->numel(),
75+
scale_cpu_data);
76+
weight1->d_tensor().set_shape(anakin_scale_shape);
77+
weight1->d_tensor().copy_from(weight1->h_tensor());
78+
engine_->AddOpAttr(op_name, "weight_1", *weight1);
79+
80+
// Generate the Bias parameter of Anakin.
81+
auto bias_shape = framework::vectorize2int(bias_t->dims());
82+
while (bias_shape.size() < 4) {
83+
bias_shape.insert(bias_shape.begin(), 1);
84+
}
85+
Shape anakin_bias_shape(bias_shape);
86+
auto *weight2 = GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(
87+
anakin_bias_shape);
88+
float *bias_cpu_data =
89+
static_cast<float *>(weight2->h_tensor().mutable_data());
90+
std::copy_n(bias_tensor->data<float>(), bias_tensor->numel(), bias_cpu_data);
91+
weight2->d_tensor().set_shape(anakin_bias_shape);
92+
weight2->d_tensor().copy_from(weight2->h_tensor());
93+
engine_->AddOpAttr(op_name, "weight_2", *weight2);
94+
}
95+
96+
} // namespace anakin
97+
} // namespace inference
98+
} // namespace paddle
99+
100+
REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <memory>
18+
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
19+
20+
namespace paddle {
21+
namespace inference {
22+
namespace anakin {
23+
24+
class AffineChannelOpConverter : public AnakinOpConverter {
25+
public:
26+
AffineChannelOpConverter() = default;
27+
28+
virtual void operator()(const framework::proto::OpDesc &op,
29+
const framework::BlockDesc &block_desc,
30+
const framework::Scope &scope,
31+
bool test_mode) override;
32+
virtual ~AffineChannelOpConverter() {}
33+
34+
private:
35+
};
36+
37+
} // namespace anakin
38+
} // namespace inference
39+
} // namespace paddle

paddle/fluid/inference/anakin/convert/op_converter.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ class AnakinOpConverter {
8181
const std::unordered_set<std::string> &parameters,
8282
const std::vector<std::string> &outputs, AnakinNvEngine *engine) {
8383
ConvertBlock(block_desc, parameters, *scope, engine);
84-
engine->Freeze();
8584
// if the max_batch size
8685
int max_batch_size = engine->GetMaxBatchSize();
8786
PADDLE_ENFORCE(max_batch_size > 0,
@@ -91,15 +90,20 @@ class AnakinOpConverter {
9190
// the block_desc.
9291
auto max_input_shape = engine->GetMaxInputShape();
9392
std::map<std::string, std::vector<int>> temp_max_input_shape;
94-
93+
// Register outputs with anakin using the RegistVar interface before Freeze.
94+
// Note that RegistVar's parameters can only be outputs, not inputs.
95+
for (auto &output : outputs) {
96+
engine->Graph()->RegistVar(output);
97+
}
98+
engine->Freeze();
9599
for (auto &input : inputs) {
96100
if (parameters.count(input)) continue;
97101
std::vector<int> input_shape;
98102
input_shape.resize(4);
99103
input_shape[0] = max_batch_size;
100104
if (max_input_shape.count(input)) {
101105
PADDLE_ENFORCE(max_input_shape[input].size() == 4,
102-
"the dimensions of max_input_shape setted from "
106+
"the dimensions of max_input_shape setted from "
103107
"config->EnableAnakinEngine must be 4");
104108
for (int i = 1; i < 4; i++) {
105109
input_shape[i] = max_input_shape[input][i];
@@ -118,14 +122,10 @@ class AnakinOpConverter {
118122
}
119123
temp_max_input_shape[input] = input_shape;
120124
engine->SetInputShape(input, input_shape);
121-
engine->Graph()->RegistVar(input); // For share from data.
122125
}
123126
engine->SetMaxInputShape(temp_max_input_shape);
124127
engine->Optimize();
125-
126-
// For anakin share with fluid tensor.
127-
engine->AllocTmpMem();
128-
engine->InitGraph();
128+
engine->InitNet();
129129
}
130130

131131
void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }

paddle/fluid/inference/anakin/convert/relu.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,26 @@ void ReluOpConverter::operator()(const framework::proto::OpDesc &op,
4141
engine_->AddOpAttr(op_name, "alpha", 0);
4242
}
4343

44+
void LeakyReluOpConverter::operator()(const framework::proto::OpDesc &op,
45+
const framework::BlockDesc &block_desc,
46+
const framework::Scope &scope,
47+
bool test_mode) {
48+
framework::OpDesc op_desc(op, nullptr);
49+
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
50+
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
51+
52+
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
53+
auto input_name = op_desc.Input("X").front();
54+
auto output_name = op_desc.Output("Out").front();
55+
56+
float alpha = boost::get<float>(op_desc.GetAttr("alpha"));
57+
engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
58+
engine_->AddOpAttr(op_name, "alpha", alpha);
59+
}
60+
4461
} // namespace anakin
4562
} // namespace inference
4663
} // namespace paddle
4764

4865
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
66+
REGISTER_ANAKIN_OP_CONVERTER(leaky_relu, LeakyReluOpConverter);

paddle/fluid/inference/anakin/convert/relu.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,17 @@ class ReluOpConverter : public AnakinOpConverter {
3333
virtual ~ReluOpConverter() {}
3434
};
3535

36+
class LeakyReluOpConverter : public AnakinOpConverter {
37+
public:
38+
LeakyReluOpConverter() = default;
39+
40+
virtual void operator()(const framework::proto::OpDesc &op,
41+
const framework::BlockDesc &block_desc,
42+
const framework::Scope &scope,
43+
bool test_mode) override;
44+
virtual ~LeakyReluOpConverter() {}
45+
};
46+
3647
} // namespace anakin
3748
} // namespace inference
3849
} // namespace paddle
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/anakin/convert/roi_align.h"
16+
#include <algorithm>
17+
#include <map>
18+
19+
using anakin::graph::GraphGlobalMem;
20+
using anakin::AK_FLOAT;
21+
using anakin::saber::NV;
22+
using anakin::saber::Shape;
23+
24+
namespace paddle {
25+
namespace inference {
26+
namespace anakin {
27+
28+
void RoiAlignOpConverter::operator()(const framework::proto::OpDesc &op,
29+
const framework::BlockDesc &block_desc,
30+
const framework::Scope &scope,
31+
bool test_mode) {
32+
framework::OpDesc op_desc(op, nullptr);
33+
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
34+
PADDLE_ENFORCE_EQ(op_desc.Input("ROIs").size(), 1);
35+
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
36+
37+
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
38+
auto input_x_name = op_desc.Input("X").front();
39+
auto input_rois_name = op_desc.Input("ROIs").front();
40+
auto output_name = op_desc.Output("Out").front();
41+
42+
auto spatial_scale = boost::get<float>(op_desc.GetAttr("spatial_scale"));
43+
auto pooled_height = boost::get<int>(op_desc.GetAttr("pooled_height"));
44+
auto pooled_width = boost::get<int>(op_desc.GetAttr("pooled_width"));
45+
auto sampling_ratio = boost::get<int>(op_desc.GetAttr("sampling_ratio"));
46+
47+
engine_->AddOp(op_name, "RoiAlign", {input_x_name, input_rois_name},
48+
{output_name});
49+
engine_->AddOpAttr(op_name, "spatial_scale", spatial_scale);
50+
engine_->AddOpAttr(op_name, "pooled_height", pooled_height);
51+
engine_->AddOpAttr(op_name, "pooled_width", pooled_width);
52+
engine_->AddOpAttr(op_name, "sampling_ratio", sampling_ratio);
53+
}
54+
55+
} // namespace anakin
56+
} // namespace inference
57+
} // namespace paddle
58+
59+
REGISTER_ANAKIN_OP_CONVERTER(roi_align, RoiAlignOpConverter);
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <map>
18+
#include <string>
19+
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
20+
21+
namespace paddle {
22+
namespace inference {
23+
namespace anakin {
24+
25+
class RoiAlignOpConverter : public AnakinOpConverter {
26+
public:
27+
RoiAlignOpConverter() = default;
28+
29+
virtual void operator()(const framework::proto::OpDesc &op,
30+
const framework::BlockDesc &block_desc,
31+
const framework::Scope &scope,
32+
bool test_mode) override;
33+
virtual ~RoiAlignOpConverter() {}
34+
};
35+
36+
} // namespace anakin
37+
} // namespace inference
38+
} // namespace paddle

0 commit comments

Comments
 (0)