Skip to content

Commit 0718113

Browse files
committed
modification
1 parent d9942cd commit 0718113

File tree

6 files changed

+20
-21
lines changed

6 files changed

+20
-21
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name
130130
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
131131
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
132132
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
133-
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'scale', 'bias', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, 0.0, None))
133+
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
134134
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
135135
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False))
136136
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)

paddle/fluid/operators/elementwise_mul_op.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> {
9393

9494
auto* x = ctx.Input<Tensor>("X");
9595
auto* y = ctx.Input<Tensor>("Y");
96-
// auto* out = ctx.Input<Tensor>("Out");
9796
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
9897
auto* out = dout; // out is not necessary
9998
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));

paddle/fluid/operators/matmul_op.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> {
5959
RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X"));
6060
auto mat_dim_b = math::CreateMatrixDescriptor(
6161
ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y"));
62-
auto scale = static_cast<T>(context.Attr<float>("scale"));
63-
auto bias = static_cast<T>(context.Attr<float>("bias"));
64-
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, bias);
62+
auto scale = static_cast<T>(context.Attr<float>("alpha"));
63+
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0));
6564
}
6665
};
6766

@@ -188,7 +187,7 @@ class MatMulGradKernel : public framework::OpKernel<T> {
188187
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
189188
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
190189
blas.MatMul(a, mat_dim_a, b, mat_dim_b,
191-
static_cast<T>(context.Attr<float>("scale")), out, T(0));
190+
static_cast<T>(context.Attr<float>("alpha")), out, T(0));
192191
}
193192

194193
void CalcInputGrad(const framework::ExecutionContext &context,
@@ -337,8 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
337336
R"DOC(If true, use the transpose of `Y`.
338337
)DOC")
339338
.SetDefault(false);
340-
AddAttr<float>("scale", "Scale").SetDefault(1.0f);
341-
AddAttr<float>("bias", "Bias").SetDefault(0.0f);
339+
AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
342340
AddComment(R"DOC(
343341
MatMul Operator.
344342

paddle/fluid/operators/scale_op.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ Multiply the input tensor with a float scalar to scale the input tensor.
5353
AddAttr<float>("scale", "The scaling factor of the scale operator.")
5454
.SetDefault(1.0);
5555
AddAttr<float>("bias", "The bias of the scale operator.").SetDefault(0.0);
56+
AddAttr<bool>(
57+
"bias_after_scale",
58+
"Apply bias addition after or before scaling. It is useful for "
59+
"numeric stability in some circumstances.")
60+
.SetDefault(true);
5661
}
5762
};
5863

@@ -82,6 +87,7 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
8287
grad_op->SetOutput("Out", InputGrad("X"));
8388
grad_op->SetAttr("scale", GetAttr("scale"));
8489
grad_op->SetAttr("bias", 0.0f);
90+
grad_op->SetAttr("bias_after_scale", true);
8591
return std::unique_ptr<framework::OpDesc>(grad_op);
8692
}
8793
};

paddle/fluid/operators/scale_op.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class ScaleKernel : public framework::OpKernel<T> {
3535

3636
auto scale = static_cast<T>(ctx.Attr<float>("scale"));
3737
auto bias = static_cast<T>(ctx.Attr<float>("bias"));
38+
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
3839

3940
if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
4041
auto& in_slr = in_var->Get<framework::SelectedRows>();
@@ -46,8 +47,11 @@ class ScaleKernel : public framework::OpKernel<T> {
4647
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
4748
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
4849
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
49-
eigen_out.device(dev) =
50-
static_cast<T>(scale) * eigen_in + static_cast<T>(bias);
50+
if (bias_after_scale) {
51+
eigen_out.device(dev) = scale * eigen_in + bias;
52+
} else {
53+
eigen_out.device(dev) = scale * (eigen_in + bias);
54+
}
5155
}
5256
};
5357

python/paddle/fluid/layers/nn.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3388,13 +3388,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
33883388
return out
33893389

33903390

3391-
def matmul(x,
3392-
y,
3393-
transpose_x=False,
3394-
transpose_y=False,
3395-
scale=1.0,
3396-
bias=0.0,
3397-
name=None):
3391+
def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
33983392
"""
33993393
Applies matrix multiplication to two tensors.
34003394
@@ -3428,8 +3422,7 @@ def matmul(x,
34283422
y (Variable): The input variable which is a Tensor or LoDTensor.
34293423
transpose_x (bool): Whether to transpose :math:`x` before multiplication.
34303424
transpose_y (bool): Whether to transpose :math:`y` before multiplication.
3431-
scale (float): The scale of output. Default 1.0.
3432-
bias (float): The bias added to output. Default 0.0.
3425+
alpha (float): The scale of output. Default 1.0.
34333426
name(str|None): A name for this layer(optional). If set None, the layer
34343427
will be named automatically.
34353428
@@ -3500,8 +3493,7 @@ def __check_input(x, y):
35003493
attrs={
35013494
'transpose_X': transpose_x,
35023495
'transpose_Y': transpose_y,
3503-
'scale': scale,
3504-
'bias': bias
3496+
'alpha': alpha,
35053497
})
35063498
return out
35073499

0 commit comments

Comments
 (0)