Skip to content

Commit efc5392

Browse files
authored
Merge pull request #12676 from tensor-tang/refine/op/fc
refine fc op
2 parents 5d2834f + eee3846 commit efc5392

File tree

5 files changed

+182
-31
lines changed

5 files changed

+182
-31
lines changed

paddle/fluid/operators/CMakeLists.txt

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ function(op_library TARGET)
170170
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
171171
elseif(${TARGET} STREQUAL "tensorrt_engine_op")
172172
message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference")
173+
elseif(${TARGET} STREQUAL "fc")
174+
# HACK: fc only have mkldnn and cpu, which would mismatch the cpu only condition
175+
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
173176
else()
174177
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
175178
endif()
@@ -300,12 +303,6 @@ op_library(channel_recv_op DEPS concurrency)
300303

301304
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
302305

303-
# The fully connected layer is deleted when the WITH_MKLDNN flag is OFF
304-
# Because the fully connected layer has only one MKLDNN's operator
305-
if(NOT WITH_MKLDNN)
306-
list(REMOVE_ITEM GENERAL_OPS fc_op)
307-
endif(NOT WITH_MKLDNN)
308-
309306
foreach(src ${GENERAL_OPS})
310307
op_library(${src})
311308
endforeach()

paddle/fluid/operators/fc_mkldnn_op.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,16 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
125125

126126
auto input = ctx.Input<Tensor>("Input");
127127
auto w = ctx.Input<Tensor>("W");
128+
auto bias = ctx.Input<Tensor>("Bias");
128129

129130
PADDLE_ENFORCE(input->dims().size() == 2 || input->dims().size() == 4,
130131
"Input must be with 2 or 4 dimensions, i.e. NCHW");
132+
// TODO(intel friends): the native weight format is io,
133+
// but the mkldnn weight format is oihw, which may need be transposed.
131134
PADDLE_ENFORCE(w->dims().size() == 2 || w->dims().size() == 4,
132135
"Weights must be with 2 or 4 dimensions, i.e. OI or OIHW");
133136

134-
bool with_bias = ctx.Attr<bool>("bias_attr");
137+
bool with_bias = bias != nullptr;
135138
MKLDNNMD<Tensor> md(input, w, with_bias);
136139

137140
std::shared_ptr<mkldnn::inner_product_forward::primitive_desc> pd =
@@ -154,6 +157,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
154157
auto dst_memory = mem.dst(output_data);
155158
auto src_memory = mem.src(input_data);
156159
auto weights_memory = mem.weights(w_data);
160+
// TODO(intel friends): bias memory should also be obtain from bias->data()
157161
auto bias_memory = mem.bias();
158162

159163
auto forward = with_bias ? mkldnn::inner_product_forward(
@@ -216,7 +220,8 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
216220
const Tensor* out_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
217221
const T* out_grad_data = out_grad->data<T>();
218222

219-
bool with_bias = ctx.Attr<bool>("bias_attr");
223+
auto bias = ctx.Input<Tensor>("Bias");
224+
bool with_bias = bias != nullptr;
220225

221226
MKLDNNMD<Tensor> md(input, w, with_bias);
222227
MKLDNNMemory mem(&md, mkldnn_engine);

paddle/fluid/operators/fc_op.cc

Lines changed: 80 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ limitations under the License. */
1414

1515
#include "paddle/fluid/operators/fc_op.h"
1616
#include <vector>
17+
#include "paddle/fluid/operators/math/blas.h"
18+
19+
DECLARE_int32(paddle_num_threads);
1720

1821
namespace paddle {
1922
namespace operators {
@@ -25,26 +28,37 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
2528
"Out(Output) of Fully Connected should not be null.");
2629
PADDLE_ENFORCE(ctx->HasInput("W"),
2730
"W(Input) of Fully Connected should not be null.");
28-
31+
// NCHW
2932
auto in_dims = ctx->GetInputDim("Input");
33+
// IO, I=C*H*W
3034
auto w_dims = ctx->GetInputDim("W");
3135
std::vector<int64_t> output_shape({in_dims[0], w_dims[1]});
3236

37+
if (ctx->HasInput("Bias")) {
38+
auto bias_dims = ctx->GetInputDim("Bias");
39+
PADDLE_ENFORCE_EQ(bias_dims[0], 1, "The shape of Bias must be [1, dim].");
40+
PADDLE_ENFORCE_EQ(bias_dims[1], w_dims[1],
41+
"The shape of Bias must be [1, dim].");
42+
}
3343
PADDLE_ENFORCE(in_dims.size() == 2 || in_dims.size() == 4,
3444
"Fully Connected input should be 2-D or 4-D tensor.");
35-
36-
PADDLE_ENFORCE(w_dims.size() == 2 || w_dims.size() == 4,
37-
"Fully Connected input should be 2-D or 4-D tensor.");
45+
PADDLE_ENFORCE_EQ(w_dims.size(), 2UL,
46+
"Fully Connected input should be 2-D tensor.");
47+
PADDLE_ENFORCE_EQ(framework::product(in_dims) / in_dims[0], w_dims[0],
48+
"Fully Connected input and weigth size do not match.");
3849

3950
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
4051
ctx->ShareLoD("Input", "Out");
4152
}
4253

4354
framework::OpKernelType FCOp::GetExpectedKernelType(
4455
const framework::ExecutionContext& ctx) const {
45-
framework::LibraryType library{framework::LibraryType::kMKLDNN};
46-
framework::DataLayout layout{framework::DataLayout::kMKLDNN};
47-
56+
framework::LibraryType library = framework::LibraryType::kPlain;
57+
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
58+
if (ctx.Attr<bool>("use_mkldnn")) {
59+
library = framework::LibraryType::kMKLDNN;
60+
layout = framework::DataLayout::kMKLDNN;
61+
}
4862
return framework::OpKernelType(
4963
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
5064
layout, library);
@@ -60,27 +74,39 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
6074
if (ctx->HasOutput(framework::GradVarName("W"))) {
6175
ctx->SetOutputDim(framework::GradVarName("W"), w_dims);
6276
}
77+
78+
if (ctx->HasInput("Bias")) {
79+
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")),
80+
"Should have bias grad");
81+
auto bias_dims = ctx->GetInputDim("Bias");
82+
ctx->SetOutputDim(framework::GradVarName("Bias"), bias_dims);
83+
}
6384
}
6485

6586
framework::OpKernelType FCOpGrad::GetExpectedKernelType(
6687
const framework::ExecutionContext& ctx) const {
67-
framework::LibraryType library{framework::LibraryType::kMKLDNN};
68-
framework::DataLayout layout{framework::DataLayout::kMKLDNN};
69-
88+
framework::LibraryType library = framework::LibraryType::kPlain;
89+
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
90+
if (ctx.Attr<bool>("use_mkldnn")) {
91+
library = framework::LibraryType::kMKLDNN;
92+
layout = framework::DataLayout::kMKLDNN;
93+
}
7094
return framework::OpKernelType(
7195
framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
7296
layout, library);
7397
}
7498

7599
void FCOpMaker::Make() {
76-
AddInput("Input", "(Tensor) The input tensor of fully connected operator. ");
77-
AddInput("W", "(Tensor), The second input tensor of fc op.");
100+
AddInput("Input",
101+
"(Tensor), The input tensor of fully connected operator with format "
102+
"(NCHW). ");
103+
AddInput("W", "(Tensor), The weight fc op with shape (I, O).");
104+
AddInput("Bias", "(Tensor, optional) Bias vector with shape (1 x O")
105+
.AsDispensable();
78106
AddOutput("Out", "(Tensor) The output tensor of fully connected operator. ");
79107
AddAttr<bool>("use_mkldnn",
80108
"(bool, default false) Only used in mkldnn kernel")
81109
.SetDefault(false);
82-
AddAttr<bool>("bias_attr", "(bool, default false) Only used in mkldnn kernel")
83-
.SetDefault(false);
84110
AddComment(R"DOC(
85111
Fully Connected Operator.
86112
@@ -94,9 +120,47 @@ void FCOpMaker::Make() {
94120
)DOC");
95121
}
96122

123+
template <typename T>
124+
class FCOpKernel : public framework::OpKernel<T> {
125+
public:
126+
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
127+
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
128+
"It must use CPUPlace.");
129+
auto input = ctx.Input<Tensor>("Input");
130+
auto w = ctx.Input<Tensor>("W");
131+
auto bias = ctx.Input<Tensor>("Bias");
132+
auto output = ctx.Output<Tensor>("Out");
133+
auto in_dims = input->dims();
134+
auto w_dims = w->dims();
135+
136+
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
137+
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(dev_ctx);
138+
const T* input_data = input->data<T>();
139+
const T* w_data = w->data<T>();
140+
T* output_data = output->mutable_data<T>(ctx.GetPlace());
141+
142+
blas.GEMM(CblasNoTrans, CblasNoTrans, in_dims[0], w_dims[1], w_dims[0],
143+
static_cast<T>(1), input_data, w_data, static_cast<T>(0),
144+
output_data);
145+
146+
if (bias) {
147+
const T* bias_data = bias->data<T>();
148+
#ifdef PADDLE_WITH_MKLML
149+
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
150+
#endif
151+
for (int bs = 0; bs < in_dims[0]; bs++) {
152+
blas.AXPY(w_dims[1], static_cast<T>(1), bias_data,
153+
output_data + bs * w_dims[1]);
154+
}
155+
}
156+
}
157+
};
158+
97159
} // namespace operators
98160
} // namespace paddle
99161

100-
REGISTER_OPERATOR(fc, paddle::operators::FCOp, paddle::operators::FCOpMaker,
162+
namespace ops = paddle::operators;
163+
REGISTER_OPERATOR(fc, ops::FCOp, ops::FCOpMaker,
101164
paddle::framework::DefaultGradOpDescMaker<true>);
102-
REGISTER_OPERATOR(fc_grad, paddle::operators::FCOpGrad);
165+
REGISTER_OPERATOR(fc_grad, ops::FCOpGrad);
166+
REGISTER_OP_CPU_KERNEL(fc, ops::FCOpKernel<float>, ops::FCOpKernel<double>);

python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def fully_connected_naive(input, weights, bias_data=None):
2222
w_h, w_c = weights.shape
2323

2424
x_data = np.reshape(input, [in_n, in_c * in_h * in_w])
25+
# this transpose should be implemented at C code
2526
w_data = np.transpose(np.reshape(weights, (w_c, in_c * in_h * in_w)))
2627
result = None
2728

@@ -43,15 +44,11 @@ class TestFCMKLDNNOp(OpTest):
4344
def setUp(self):
4445
self.op_type = "fc"
4546
self.use_mkldnn = True
46-
self.with_bias = True
4747
self.matrix = MatrixGenerate(1, 10, 15, 3, 3)
4848

4949
self.inputs = {'Input': self.matrix.input, 'W': self.matrix.weights}
5050

51-
self.attrs = {
52-
'use_mkldnn': self.use_mkldnn,
53-
'with_bias': self.with_bias
54-
}
51+
self.attrs = {'use_mkldnn': self.use_mkldnn, }
5552

5653
self.outputs = {
5754
'Out': fully_connected_naive(self.matrix.input, self.matrix.weights)
@@ -85,13 +82,11 @@ def init_op_type(self):
8582

8683
class TestFCMKLDNNOp4(TestFCMKLDNNOp):
8784
def init_op_type(self):
88-
self.with_bias = False
8985
self.matrix = MatrixGenerate(2, 32, 48, 2, 2)
9086

9187

9288
class TestFCMKLDNNOp4(TestFCMKLDNNOp):
9389
def init_op_type(self):
94-
self.with_bias = False
9590
self.matrix = MatrixGenerate(2, 32, 1000, 6, 6)
9691

9792

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
import numpy as np
17+
from op_test import OpTest
18+
19+
20+
def fc_refer(matrix, with_bias):
21+
in_n, in_c, in_h, in_w = matrix.input.shape
22+
w_i, w_o = matrix.weights.shape
23+
24+
x_data = np.reshape(matrix.input, [in_n, in_c * in_h * in_w])
25+
w_data = np.reshape(matrix.weights, [w_i, w_o])
26+
b_data = np.reshape(matrix.bias, [1, w_o])
27+
result = None
28+
29+
if with_bias:
30+
result = np.dot(x_data, w_data) + b_data
31+
else:
32+
result = np.dot(x_data, w_data)
33+
34+
return result
35+
36+
37+
class MatrixGenerate:
38+
def __init__(self, mb, ic, oc, h, w):
39+
self.input = np.random.random((mb, ic, h, w)).astype("float32")
40+
self.weights = np.random.random((ic * h * w, oc)).astype("float32")
41+
self.bias = np.random.random((1, oc)).astype("float32")
42+
43+
44+
class TestFCOp(OpTest):
45+
def setUp(self):
46+
self.op_type = "fc"
47+
self.matrix = MatrixGenerate(1, 10, 15, 3, 3)
48+
49+
self.with_bias = True
50+
if self.with_bias:
51+
self.inputs = {
52+
'Input': self.matrix.input,
53+
'W': self.matrix.weights,
54+
'Bias': self.matrix.bias
55+
}
56+
else:
57+
self.inputs = {'Input': self.matrix.input, 'W': self.matrix.weights}
58+
59+
self.attrs = {'use_mkldnn': False}
60+
61+
self.outputs = {'Out': fc_refer(self.matrix, self.with_bias)}
62+
63+
def test_check_output(self):
64+
self.check_output()
65+
66+
67+
class TestFCOpBiasBoth(TestFCOp):
68+
def init_shapes(self, mb, ic, oc, h, w):
69+
for with_bias in {True, False}:
70+
self.with_bias = with_bias
71+
self.matrix = MatrixGenerate(mb, ic, oc, h, w)
72+
73+
74+
class TestFCOp1(TestFCOpBiasBoth):
75+
def init_op_type(self):
76+
self.init_shapes(2, 8, 10, 1, 1)
77+
78+
79+
class TestFCOp2(TestFCOpBiasBoth):
80+
def init_op_type(self):
81+
self.init_shapes(4, 5, 6, 2, 2)
82+
83+
84+
class TestFCOp4(TestFCOpBiasBoth):
85+
def init_op_type(self):
86+
self.init_shapes(1, 32, 64, 3, 3)
87+
88+
89+
if __name__ == "__main__":
90+
unittest.main()

0 commit comments

Comments
 (0)