Skip to content

Commit 2c7a8b9

Browse files
authored
Merge branch 'develop' into mac/fix_unittest_279_395
2 parents e8e762f + dffc457 commit 2c7a8b9

26 files changed

+839
-85
lines changed

doc/fluid/api/initializer.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ Normal
3232
:members:
3333
:noindex:
3434

35+
.. _api_fluid_initializer_Normal:
36+
37+
TruncatedNormal
38+
------
39+
40+
.. autoclass:: paddle.fluid.initializer.TruncatedNormal
41+
:members:
42+
:noindex:
43+
3544
.. _api_fluid_initializer_Xavier:
3645

3746
Xavier

paddle/fluid/API.spec

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program
7979
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
8080
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
8181
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
82+
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
8283
paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0))
8384
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
8485
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
@@ -124,7 +125,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name
124125
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
125126
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
126127
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
127-
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'name'], varargs=None, keywords=None, defaults=(False, False, None))
128+
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
128129
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
129130
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False))
130131
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
@@ -168,6 +169,7 @@ paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=Non
168169
paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
169170
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
170171
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
172+
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
171173
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
172174
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
173175
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))

paddle/fluid/framework/data_device_transform.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
2525
in.place().which(), dst_place.which(),
2626
"Currently, model parallelism is only supported between CPU and CUDA");
2727

28+
// NOTE(yy): TransDataDevice should wait for computation of input.
29+
platform::DeviceContextPool::Instance().Get(in.place())->Wait();
30+
platform::DeviceContextPool::Instance().Get(dst_place)->Wait();
31+
2832
// FIXME(zcd): TransDataDevice is used to transform data from GPU to CPU and
2933
// the enforced checkings have been done in GetDeviceContext, so the
3034
// `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program

paddle/fluid/framework/grad_op_desc_maker.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ class GradOpDescMakerBase {
129129

130130
std::string ForwardOpType() const { return this->fwd_op_.Type(); }
131131

132+
protected:
133+
const OpDesc& ForwardOp() const { return fwd_op_; }
134+
132135
private:
133136
const OpDesc& fwd_op_;
134137
const std::unordered_set<std::string>& no_grad_set_;

paddle/fluid/framework/prune.cc

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -183,28 +183,5 @@ void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) {
183183
output->clear_blocks();
184184
prune_impl(input, output, 0, -1, &dependent_vars);
185185
}
186-
187-
void inference_optimize_impl(proto::ProgramDesc* input, int block_id) {
188-
auto* op_field = input->mutable_blocks(block_id)->mutable_ops();
189-
for (auto& op_desc : *op_field) {
190-
for (auto& attr : *op_desc.mutable_attrs()) {
191-
if (attr.name() == "is_test") {
192-
attr.set_b(true);
193-
break;
194-
}
195-
}
196-
}
197-
}
198-
199-
void InferenceOptimize(const proto::ProgramDesc& input,
200-
proto::ProgramDesc* output) {
201-
*output = input;
202-
int num_blocks = output->blocks_size();
203-
PADDLE_ENFORCE_GT(num_blocks, 0, "ProgramDesc must have at least one block");
204-
for (int i = 0; i < num_blocks; ++i) {
205-
inference_optimize_impl(output, i);
206-
}
207-
}
208-
209186
} // namespace framework
210187
} // namespace paddle

paddle/fluid/framework/prune.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,5 @@ namespace framework {
2222

2323
void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output);
2424

25-
void InferenceOptimize(const proto::ProgramDesc& input,
26-
proto::ProgramDesc* output);
27-
2825
} // namespace framework
2926
} // namespace paddle

paddle/fluid/operators/array_to_lod_tensor_op.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace operators {
2525

2626
using LoD = framework::LoD;
2727

28-
class ArrayToLoDFunctor;
28+
struct ArrayToLoDFunctor;
2929
template <typename DeviceContext>
3030
struct ArrayToLoDFunctorImpl {
3131
const ArrayToLoDFunctor *prev_functor_;

paddle/fluid/operators/elementwise_mul_op.cc

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,45 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/operators/elementwise_mul_op.h"
16+
#include <string>
1617
#include "paddle/fluid/operators/elementwise_op.h"
18+
19+
namespace paddle {
20+
namespace operators {
21+
22+
class ElementwiseMulOpGradDescMaker : public framework::SingleGradOpDescMaker {
23+
public:
24+
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
25+
26+
protected:
27+
std::unique_ptr<framework::OpDesc> Apply() const override {
28+
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
29+
op->SetType("elementwise_mul_grad");
30+
op->SetInput("X", Input("X"));
31+
op->SetInput("Y", Input("Y"));
32+
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
33+
op->SetAttrMap(Attrs());
34+
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
35+
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
36+
return op;
37+
}
38+
};
39+
40+
class ElementwiseMulOpMaker : public ElementwiseOpMaker {
41+
protected:
42+
virtual std::string GetName() const { return "Mul"; }
43+
virtual std::string GetEquation() const { return "Out = X \\\\odot Y"; }
44+
};
45+
46+
} // namespace operators
47+
} // namespace paddle
48+
1749
namespace ops = paddle::operators;
18-
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
50+
REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp,
51+
ops::ElementwiseMulOpMaker, ops::ElementwiseOpInferVarType,
52+
ops::ElementwiseMulOpGradDescMaker);
53+
REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad);
54+
1955
REGISTER_OP_CPU_KERNEL(
2056
elementwise_mul,
2157
ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,

paddle/fluid/operators/elementwise_mul_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> {
9393

9494
auto* x = ctx.Input<Tensor>("X");
9595
auto* y = ctx.Input<Tensor>("Y");
96-
auto* out = ctx.Input<Tensor>("Out");
9796
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
97+
auto* out = dout; // out is not necessary
9898
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
9999
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
100100
int axis = ctx.Attr<int>("axis");

paddle/fluid/operators/matmul_op.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> {
5959
RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X"));
6060
auto mat_dim_b = math::CreateMatrixDescriptor(
6161
ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y"));
62-
blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0));
62+
auto scale = static_cast<T>(context.Attr<float>("alpha"));
63+
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0));
6364
}
6465
};
6566

@@ -185,7 +186,8 @@ class MatMulGradKernel : public framework::OpKernel<T> {
185186
auto blas = math::GetBlas<DeviceContext, T>(context);
186187
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
187188
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
188-
blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0));
189+
blas.MatMul(a, mat_dim_a, b, mat_dim_b,
190+
static_cast<T>(context.Attr<float>("alpha")), out, T(0));
189191
}
190192

191193
void CalcInputGrad(const framework::ExecutionContext &context,
@@ -334,6 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
334336
R"DOC(If true, use the transpose of `Y`.
335337
)DOC")
336338
.SetDefault(false);
339+
AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
337340
AddComment(R"DOC(
338341
MatMul Operator.
339342

0 commit comments

Comments
 (0)