PaddlePaddle
diff --git a/‎paddle/fluid/operators/optimizers/adam_op.cc
Lines changed: 55 additions & 21 deletions b/‎paddle/fluid/operators/optimizers/adam_op.cc
Lines changed: 55 additions & 21 deletions
diff --git a/‎paddle/fluid/operators/optimizers/adam_op.h
Lines changed: 21 additions & 2 deletions b/‎paddle/fluid/operators/optimizers/adam_op.h
Lines changed: 21 additions & 2 deletions
diff --git a/‎paddle/fluid/operators/scale_op.cc
Lines changed: 18 additions & 2 deletions b/‎paddle/fluid/operators/scale_op.cc
Lines changed: 18 additions & 2 deletions
diff --git a/‎paddle/fluid/operators/scale_op.h
Lines changed: 17 additions & 1 deletion b/‎paddle/fluid/operators/scale_op.h
Lines changed: 17 additions & 1 deletion
diff --git a/‎python/paddle/fluid/layers/layer_function_generator.py
Lines changed: 2 additions & 0 deletions b/‎python/paddle/fluid/layers/layer_function_generator.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/paddle/fluid/layers/nn.py
Lines changed: 33 additions & 9 deletions b/‎python/paddle/fluid/layers/nn.py
Lines changed: 33 additions & 9 deletions
@@ -20,27 +20,50 @@ namespace operators {
 using Tensor = framework::Tensor;
 
 void AdamOp::InferShape(framework::InferShapeContext* ctx) const {
-  PADDLE_ENFORCE(ctx->HasInput("Param"),
-                 "Input(Param) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("Grad"),
-                 "Input(Grad) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("Moment1"),
-                 "Input(Moment1) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("Moment2"),
-                 "Input(Moment2) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
-                 "Input(LearningRate) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
-                 "Input(Beta1Pow) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"),
-                 "Input(Beta2Pow) of AdamOp should not be null.");
-
-  PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
-                 "Output(ParamOut) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"),
-                 "Output(Moment1Out) of AdamOp should not be null.");
-  PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"),
-                 "Output(Moment2Out) of AdamOp should not be null.");
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("Param"), true,
+      platform::errors::NotFound("Input(Param) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(
+      ctx->HasInput("Grad"), true,
+      platform::errors::NotFound("Input(Grad) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("Moment1"), true,
+                    platform::errors::NotFound(
+                        "Input(Moment1) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("Moment2"), true,
+                    platform::errors::NotFound(
+                        "Input(Moment2) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
+                    platform::errors::NotFound(
+                        "Input(LearningRate) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("Beta1Pow"), true,
+                    platform::errors::NotFound(
+                        "Input(Beta1Pow) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasInput("Beta2Pow"), true,
+                    platform::errors::NotFound(
+                        "Input(Beta2Pow) of AdamOp should not be null."));
+
+  if (ctx->IsRuntime() && ctx->HasInput("Beta1Tensor")) {
+    auto beta1 = ctx->Inputs("Beta1Tensor");
+    PADDLE_ENFORCE_EQ(
+        beta1.size(), 1,
+        platform::errors::InvalidArgument("Input(Beta1Tensor) size must be 1"));
+  }
+  if (ctx->IsRuntime() && ctx->HasInput("Beta2Tensor")) {
+    auto beta2 = ctx->Inputs("Beta2Tensor");
+    PADDLE_ENFORCE_EQ(
+        beta2.size(), 1,
+        platform::errors::InvalidArgument("Input(Beta2Tensor) size must be 1"));
+  }
+
+  PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
+                    platform::errors::NotFound(
+                        "Output(ParamOut) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment1Out"), true,
+                    platform::errors::NotFound(
+                        "Output(Moment1Out) of AdamOp should not be null."));
+  PADDLE_ENFORCE_EQ(ctx->HasOutput("Moment2Out"), true,
+                    platform::errors::NotFound(
+                        "Output(Moment2Out) of AdamOp should not be null."));
 
   auto lr_dims = ctx->GetInputDim("LearningRate");
   PADDLE_ENFORCE_NE(framework::product(lr_dims), 0,
@@ -93,6 +116,17 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
     AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
 
+    AddInput("Beta1Tensor",
+             "(Tensor<float32>, optional) If provided, Adam will use this "
+             "as beta1, this has a higher priority than attr(beta1), the "
+             "shape of this tensor MUST BE [1].")
+        .AsDispensable();
+    AddInput("Beta2Tensor",
+             "(Tensor<float32>, optional) If provided, Adam will use this "
+             "as beta2, this has a higher priority than attr(beta2), the "
+             "shape of this tensor MUST BE [1].")
+        .AsDispensable();
+
     AddOutput("ParamOut", "(Tensor) Output parameter");
     AddOutput("Moment1Out", "(Tensor) Output first moment");
     AddOutput("Moment2Out", "(Tensor) Output second moment");
 
@@ -29,6 +29,16 @@ namespace operators {
 
 namespace scatter = paddle::operators::math::scatter;
 
+static inline float GetAttrFromTensor(const framework::Tensor* tensor) {
+  const float* tensor_data = tensor->data<float>();
+  framework::Tensor cpu_tensor;
+  if (platform::is_gpu_place(tensor->place())) {
+    TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
+    tensor_data = cpu_tensor.data<float>();
+  }
+  return tensor_data[0];
+}
+
 class AdamOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
@@ -367,8 +377,6 @@ class AdamOpKernel : public framework::OpKernel<T> {
     int64_t min_row_size_to_use_multithread =
         ctx.Attr<int64_t>("min_row_size_to_use_multithread");
     bool lazy_mode = ctx.Attr<bool>("lazy_mode");
-    T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
-    T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
     T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
     auto& param = Ref(ctx.Input<LoDTensor>("Param"), "Must set Param");
     // auto& grad = Ref(ctx.Input<LoDTensor>("Grad"), "Must set Grad");
@@ -390,6 +398,17 @@ class AdamOpKernel : public framework::OpKernel<T> {
     auto& mom2_out =
         Ref(ctx.Output<LoDTensor>("Moment2Out"), "Must set Moment1Out");
 
+    T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
+    if (ctx.HasInput("Beta1Tensor")) {
+      auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
+      beta1 = static_cast<T>(GetAttrFromTensor(beta1_tensor));
+    }
+    T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
+    if (ctx.HasInput("Beta2Tensor")) {
+      auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor");
+      beta2 = static_cast<T>(GetAttrFromTensor(beta2_tensor));
+    }
+
     if (grad_var->IsType<framework::LoDTensor>()) {
       auto& grad = Ref(ctx.Input<LoDTensor>("Grad"), "Must set Grad");
 
 
@@ -34,6 +34,14 @@ class ScaleOp : public framework::OperatorWithKernel {
                    "Input(X) of ScaleOp should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
                    "Output(Out) of ScaleOp should not be null.");
+
+    if (ctx->IsRuntime() && ctx->HasInput("ScaleTensor")) {
+      auto scale = ctx->Inputs("ScaleTensor");
+      PADDLE_ENFORCE_EQ(scale.size(), 1,
+                        platform::errors::InvalidArgument(
+                            "Input(ScaleTensor) size must be 1"));
+    }
+
     ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
     ctx->ShareLoD("X", /*->*/ "Out");
   }
@@ -43,6 +51,11 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X", "(Tensor) Input tensor of scale operator.");
+    AddInput("ScaleTensor",
+             "(Tensor) If provided, use this as "
+             "scale factor, this has a higher priority than "
+             "attr(scale), the shape of this tensor MUST BE 1.")
+        .AsDispensable();
     AddOutput("Out", "(Tensor) Output tensor of scale operator.");
     AddComment(R"DOC(
 **Scale operator**
@@ -89,6 +102,9 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
     auto *grad_op = new framework::OpDesc();
     grad_op->SetType("scale");
     grad_op->SetInput("X", OutputGrad("Out"));
+    if (ForwardOp().Inputs().count("ScaleTensor") > 0) {
+      grad_op->SetInput("ScaleTensor", Input("ScaleTensor"));
+    }
     grad_op->SetOutput("Out", InputGrad("X"));
     grad_op->SetAttr("scale", GetAttr("scale"));
     grad_op->SetAttr("bias", 0.0f);
@@ -97,14 +113,14 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
   }
 };
 
-using ScaleOpInplace = framework::SingleOpInplaceInToOut;
+DECLARE_INPLACE_OP_INFERER(ScaleOpInplaceInferer, {"X", "Out"});
 }  // namespace operators
 }  // namespace paddle
 
 namespace ops = paddle::operators;
 
 REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradMaker,
-                  ops::ScaleOpVarTypeInference, ops::ScaleOpInplace);
+                  ops::ScaleOpVarTypeInference, ops::ScaleOpInplaceInferer);
 REGISTER_OP_CPU_KERNEL(
     scale, ops::ScaleKernel<paddle::platform::CPUDeviceContext, float>,
     ops::ScaleKernel<paddle::platform::CPUDeviceContext, double>,
 
@@ -19,17 +19,33 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
+
+static inline float GetAttrFromTensor(const framework::Tensor* tensor) {
+  const float* tensor_data = tensor->data<float>();
+  framework::Tensor cpu_tensor;
+  if (platform::is_gpu_place(tensor->place())) {
+    TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
+    tensor_data = cpu_tensor.data<float>();
+  }
+  return tensor_data[0];
+}
+
 template <typename DeviceContext, typename T>
 class ScaleKernel : public framework::OpKernel<T> {
  public:
   virtual void Compute(const framework::ExecutionContext& ctx) const {
     auto* in_var = ctx.InputVar("X");
     auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var);
 
-    auto scale = static_cast<T>(ctx.Attr<float>("scale"));
     auto bias = static_cast<T>(ctx.Attr<float>("bias"));
     auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
 
+    auto scale = static_cast<T>(ctx.Attr<float>("scale"));
+    if (ctx.HasInput("ScaleTensor")) {
+      auto* scale_tensor = ctx.Input<framework::Tensor>("ScaleTensor");
+      scale = GetAttrFromTensor(scale_tensor);
+    }
+
     auto* out_var = ctx.OutputVar("Out");
     if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
       auto& in_slr = in_var->Get<framework::SelectedRows>();
 
@@ -174,6 +174,8 @@ def infer_and_check_dtype(op_proto, *args, **kwargs):
             if not isinstance(val, list) and not isinstance(val, tuple):
                 val = [val]
             if len(val) == 0:
+                if len(args) == 0:
+                    continue
                 val = [args[0]]
                 args = args[1:]
 
 
@@ -14074,7 +14074,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
 
     Args:
         x(Variable): Input N-D Tensor of scale operator. Data type can be float32, float64, int8, int16, int32, int64, uint8.
-        scale(float): The scale factor of the input.
+        scale(float|Variable): The scale factor of the input, it should be a float number or a Variable with shape [1] and data type as float32.
         bias(float): The bias to be put on the input.
         bias_after_scale(bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances.
         act(str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu.
@@ -14099,6 +14099,27 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
 
             res = exe.run(fluid.default_main_program(), feed={'x':img}, fetch_list=[output])
             print(res) # [array([[ 3.,  5.,  7.], [ 9., 11., 13.]], dtype=float32)]
+
+        .. code-block:: python
+
+            # scale with parameter scale as Variable
+            import paddle.fluid as fluid
+            import numpy as np
+
+            inputs = fluid.layers.data(name="x", shape=[2, 3], dtype='float32')
+            scale = fluid.layers.data(name="scale", shape=[1], dtype='float32',
+                                      append_batch_size=False)
+            output = fluid.layers.scale(inputs, scale = scale, bias = 1.0)
+
+            exe = fluid.Executor(fluid.CPUPlace())
+            exe.run(fluid.default_startup_program())
+
+            img = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
+            scale_np = np.array([2.]).astype(np.float32)
+
+            res = exe.run(fluid.default_main_program(), feed={'x':img, 'scale':scale_np}, fetch_list=[output])
+            print(res) # [array([[ 3.,  5.,  7.], [ 9., 11., 13.]], dtype=float32)]
+
     """
 
     helper = LayerHelper('scale', **locals())
@@ -14108,15 +14129,18 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
         out = helper.create_variable(
             name=name, dtype=x.dtype, persistable=False)
 
+    inputs = {'X': x}
+    attrs = {
+        'bias': float(bias),
+        'bias_after_scale': bias_after_scale,
+    }
+    if isinstance(scale, Variable):
+        inputs['ScaleTensor'] = scale
+    else:
+        attrs['scale'] = float(scale)
+
     helper.append_op(
-        type='scale',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={
-            'scale': float(scale),
-            'bias': float(bias),
-            'bias_after_scale': bias_after_scale
-        })
+        type='scale', inputs=inputs, outputs={'Out': out}, attrs=attrs)
     return helper.append_activation(out)