Skip to content

Commit 642cf6c

Browse files
authored
Merge pull request #13418 from sneaxiy/dam_save_memory
Modify some ops to save memory
2 parents aa79bcc + fd3e32e commit 642cf6c

File tree

9 files changed

+88
-11
lines changed

9 files changed

+88
-11
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name
124124
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
125125
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
126126
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
127-
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'name'], varargs=None, keywords=None, defaults=(False, False, None))
127+
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
128128
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
129129
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False))
130130
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)

paddle/fluid/framework/grad_op_desc_maker.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ class GradOpDescMakerBase {
129129

130130
std::string ForwardOpType() const { return this->fwd_op_.Type(); }
131131

132+
protected:
133+
const OpDesc& ForwardOp() const { return fwd_op_; }
134+
132135
private:
133136
const OpDesc& fwd_op_;
134137
const std::unordered_set<std::string>& no_grad_set_;

paddle/fluid/operators/elementwise_mul_op.cc

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,45 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/operators/elementwise_mul_op.h"
16+
#include <string>
1617
#include "paddle/fluid/operators/elementwise_op.h"
18+
19+
namespace paddle {
20+
namespace operators {
21+
22+
class ElementwiseMulOpGradDescMaker : public framework::SingleGradOpDescMaker {
23+
public:
24+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
25+
26+
protected:
27+
std::unique_ptr<framework::OpDesc> Apply() const override {
28+
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
29+
op->SetType("elementwise_mul_grad");
30+
op->SetInput("X", Input("X"));
31+
op->SetInput("Y", Input("Y"));
32+
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
33+
op->SetAttrMap(Attrs());
34+
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
35+
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
36+
return op;
37+
}
38+
};
39+
40+
class ElementwiseMulOpMaker : public ElementwiseOpMaker {
41+
protected:
42+
virtual std::string GetName() const { return "Mul"; }
43+
virtual std::string GetEquation() const { return "Out = X \\\\odot Y"; }
44+
};
45+
46+
} // namespace operators
47+
} // namespace paddle
48+
1749
namespace ops = paddle::operators;
18-
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
50+
REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp,
51+
ops::ElementwiseMulOpMaker, ops::ElementwiseOpInferVarType,
52+
ops::ElementwiseMulOpGradDescMaker);
53+
REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad);
54+
1955
REGISTER_OP_CPU_KERNEL(
2056
elementwise_mul,
2157
ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,

paddle/fluid/operators/elementwise_mul_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> {
9393

9494
auto* x = ctx.Input<Tensor>("X");
9595
auto* y = ctx.Input<Tensor>("Y");
96-
auto* out = ctx.Input<Tensor>("Out");
9796
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
97+
auto* out = dout; // out is not necessary
9898
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
9999
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
100100
int axis = ctx.Attr<int>("axis");

paddle/fluid/operators/matmul_op.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> {
5959
RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X"));
6060
auto mat_dim_b = math::CreateMatrixDescriptor(
6161
ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y"));
62-
blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0));
62+
auto scale = static_cast<T>(context.Attr<float>("alpha"));
63+
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0));
6364
}
6465
};
6566

@@ -185,7 +186,8 @@ class MatMulGradKernel : public framework::OpKernel<T> {
185186
auto blas = math::GetBlas<DeviceContext, T>(context);
186187
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
187188
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
188-
blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0));
189+
blas.MatMul(a, mat_dim_a, b, mat_dim_b,
190+
static_cast<T>(context.Attr<float>("alpha")), out, T(0));
189191
}
190192

191193
void CalcInputGrad(const framework::ExecutionContext &context,
@@ -334,6 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
334336
R"DOC(If true, use the transpose of `Y`.
335337
)DOC")
336338
.SetDefault(false);
339+
AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
337340
AddComment(R"DOC(
338341
MatMul Operator.
339342

paddle/fluid/operators/mul_op.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,29 @@ class MulGradOp : public framework::OperatorWithKernel {
156156
}
157157
};
158158

159+
class MulOpGradMaker : public framework::SingleGradOpDescMaker {
160+
public:
161+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
162+
163+
protected:
164+
std::unique_ptr<framework::OpDesc> Apply() const override {
165+
std::unique_ptr<framework::OpDesc> retv(new framework::OpDesc());
166+
retv->SetType("mul_grad");
167+
retv->SetInput("X", Input("X"));
168+
retv->SetInput("Y", Input("Y"));
169+
retv->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
170+
retv->SetOutput(framework::GradVarName("X"), InputGrad("X"));
171+
retv->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
172+
retv->SetAttrMap(Attrs());
173+
return retv;
174+
}
175+
};
176+
159177
} // namespace operators
160178
} // namespace paddle
161179

162180
namespace ops = paddle::operators;
163-
REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker,
164-
paddle::framework::DefaultGradOpDescMaker<true>);
181+
REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker);
165182
REGISTER_OPERATOR(mul_grad, ops::MulGradOp);
166183
REGISTER_OP_CPU_KERNEL(
167184
mul, ops::MulKernel<paddle::platform::CPUDeviceContext, float>,

paddle/fluid/operators/scale_op.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ Multiply the input tensor with a float scalar to scale the input tensor.
5252
)DOC");
5353
AddAttr<float>("scale", "The scaling factor of the scale operator.")
5454
.SetDefault(1.0);
55+
AddAttr<float>("bias", "The bias of the scale operator.").SetDefault(0.0);
56+
AddAttr<bool>(
57+
"bias_after_scale",
58+
"Apply bias addition after or before scaling. It is useful for "
59+
"numeric stability in some circumstances.")
60+
.SetDefault(true);
5561
}
5662
};
5763

@@ -80,6 +86,8 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
8086
grad_op->SetInput("X", OutputGrad("Out"));
8187
grad_op->SetOutput("Out", InputGrad("X"));
8288
grad_op->SetAttr("scale", GetAttr("scale"));
89+
grad_op->SetAttr("bias", 0.0f);
90+
grad_op->SetAttr("bias_after_scale", true);
8391
return std::unique_ptr<framework::OpDesc>(grad_op);
8492
}
8593
};

paddle/fluid/operators/scale_op.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class ScaleKernel : public framework::OpKernel<T> {
3434
"in and out should have the same dim");
3535

3636
auto scale = static_cast<T>(ctx.Attr<float>("scale"));
37+
auto bias = static_cast<T>(ctx.Attr<float>("bias"));
38+
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
3739

3840
if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
3941
auto& in_slr = in_var->Get<framework::SelectedRows>();
@@ -45,7 +47,11 @@ class ScaleKernel : public framework::OpKernel<T> {
4547
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
4648
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
4749
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
48-
eigen_out.device(dev) = scale * eigen_in;
50+
if (bias_after_scale) {
51+
eigen_out.device(dev) = scale * eigen_in + bias;
52+
} else {
53+
eigen_out.device(dev) = scale * (eigen_in + bias);
54+
}
4955
}
5056
};
5157

python/paddle/fluid/layers/nn.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3499,7 +3499,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
34993499
return out
35003500

35013501

3502-
def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
3502+
def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
35033503
"""
35043504
Applies matrix multiplication to two tensors.
35053505
@@ -3533,6 +3533,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
35333533
y (Variable): The input variable which is a Tensor or LoDTensor.
35343534
transpose_x (bool): Whether to transpose :math:`x` before multiplication.
35353535
transpose_y (bool): Whether to transpose :math:`y` before multiplication.
3536+
alpha (float): The scale of output. Default 1.0.
35363537
name(str|None): A name for this layer(optional). If set None, the layer
35373538
will be named automatically.
35383539
@@ -3600,8 +3601,11 @@ def __check_input(x, y):
36003601
inputs={'X': x,
36013602
'Y': y},
36023603
outputs={'Out': out},
3603-
attrs={'transpose_X': transpose_x,
3604-
'transpose_Y': transpose_y})
3604+
attrs={
3605+
'transpose_X': transpose_x,
3606+
'transpose_Y': transpose_y,
3607+
'alpha': alpha,
3608+
})
36053609
return out
36063610

36073611

0 commit comments

Comments
 (0)