PaddlePaddle
diff --git a/‎paddle/fluid/operators/mish_op.cc
Lines changed: 121 additions & 0 deletions b/‎paddle/fluid/operators/mish_op.cc
Lines changed: 121 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/mish_op.cu
Lines changed: 173 additions & 0 deletions b/‎paddle/fluid/operators/mish_op.cu
Lines changed: 173 additions & 0 deletions
@@ -0,0 +1,121 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/mish_op.h"
+#include <memory>
+#include <string>
+
+namespace paddle {
+namespace operators {
+
+class MishOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "mish");
+    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "mish");
+
+    ctx->ShareDim("X", /*->*/ "Out");
+    ctx->ShareLoD("X", /*->*/ "Out");
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
+        ctx.device_context());
+  }
+};
+
+class MishOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "Input of Mish operator");
+    AddOutput("Out", "Output of Mish operator");
+    AddAttr<float>(
+        "threshold",
+        "Constant threshold of softplus in Mish operator. Approximate value "
+        "of softplus will be used if absolute value of input is greater than "
+        ":attr:`threshold`")
+        .SetDefault(20.f);
+    AddComment(R"DOC(
+Mish Activation Operator.
+
+..  math::
+    softplus = \begin{cases}
+            x, \text{if } x > \text{threshold} \\
+            e^{x}, \text{if } x < -\text{threshold} \\
+            \ln(1 + e^{x}),  \text{otherwise}
+          \end{cases}
+
+    out = x * \tanh(softplus)
+
+)DOC");
+  }
+};
+
+// The operator to calculate gradients of a prelu operator.
+class MishGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "mish");
+    OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input",
+                   "Out@GRAD", "mish");
+
+    auto x_grad_name = framework::GradVarName("X");
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("X"));
+    }
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
+        ctx.device_context());
+  }
+};
+
+template <typename T>
+class MishGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+
+ protected:
+  void Apply(GradOpPtr<T> op) const override {
+    op->SetType("mish_grad");
+    op->SetInput("X", this->Input("X"));
+    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
+    op->SetAttrMap(this->Attrs());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(mish, ops::MishOp, ops::MishOpMaker,
+                  ops::MishGradOpMaker<paddle::framework::OpDesc>,
+                  ops::MishGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OPERATOR(mish_grad, ops::MishGradOp);
+REGISTER_OP_CPU_KERNEL(
+    mish, ops::MishFP32CPUKernel<paddle::platform::CPUDeviceContext>,
+    ops::MishCPUKernel<paddle::platform::CPUDeviceContext, double>);
+REGISTER_OP_CPU_KERNEL(
+    mish_grad, ops::MishGradFP32CPUKernel<paddle::platform::CPUDeviceContext>,
+    ops::MishGradCPUKernel<paddle::platform::CPUDeviceContext, double>);
@@ -0,0 +1,173 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/mish_op.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+#include "paddle/fluid/platform/gpu_launch_config.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T>
+__global__ void KeMishFw(const T* in, T* out, const int numel,
+                         const float threshold) {
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int stride = blockDim.x * gridDim.x;
+  for (; tid < numel; tid += stride) {
+    T x = in[tid];
+    T sp = CalcSoftplus<T>(x, threshold);
+    out[tid] = x * tanh(sp);
+  }
+}
+
+// expf instead of exp should be used for float type, complement
+// and register float kernel separatelly
+__global__ void KeMishFwFP32(const float* in, float* out, const int numel,
+                             const float threshold) {
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int stride = blockDim.x * gridDim.x;
+  for (; tid < numel; tid += stride) {
+    float x = in[tid];
+    float sp = CalcSoftplusFP32(x, threshold);
+    out[tid] = x * tanhf(sp);
+  }
+}
+
+template <typename T>
+__global__ void KeMishBw(const T* in, const T* dout, T* din, const int numel,
+                         const float threshold) {
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int stride = blockDim.x * gridDim.x;
+  for (; tid < numel; tid += stride) {
+    T x = in[tid];
+    T sp = CalcSoftplus<T>(x, threshold);
+    T tsp = tanh(sp);
+    T grad_sp = -expm1(-sp);
+    T grad_tsp = (static_cast<T>(1) - tsp * tsp) * grad_sp;
+    din[tid] = dout[tid] * (x * grad_tsp + tsp);
+  }
+}
+
+__global__ void KeMishBwFP32(const float* in, const float* dout, float* din,
+                             const int numel, const float threshold) {
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int stride = blockDim.x * gridDim.x;
+  for (; tid < numel; tid += stride) {
+    float x = in[tid];
+    float sp = CalcSoftplusFP32(x, threshold);
+    float tsp = tanhf(sp);
+    float grad_sp = -expm1f(-sp);
+    float grad_tsp = (static_cast<float>(1) - tsp * tsp) * grad_sp;
+    din[tid] = dout[tid] * (x * grad_tsp + tsp);
+  }
+}
+
+template <typename DeviceContext, typename T>
+class MishCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    const float threshold = ctx.Attr<float>("threshold");
+
+    const T* x_data = x->data<T>();
+    T* out_data = out->mutable_data<T>(ctx.GetPlace());
+
+    const int numel = x->numel();
+
+    platform::GpuLaunchConfig config = platform::getGpuLaunchConfig(numel, ctx);
+    KeMishFw<T><<<config.blocks, config.threads, 0,
+                  ctx.cuda_device_context().stream()>>>(x_data, out_data, numel,
+                                                        threshold);
+  }
+};
+
+template <typename DeviceContext>
+class MishFP32CUDAKernel : public framework::OpKernel<float> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* out = ctx.Output<Tensor>("Out");
+
+    const float threshold = ctx.Attr<float>("threshold");
+
+    const float* x_data = x->data<float>();
+    float* out_data = out->mutable_data<float>(ctx.GetPlace());
+
+    const int numel = x->numel();
+
+    platform::GpuLaunchConfig config = platform::getGpuLaunchConfig(numel, ctx);
+    KeMishFwFP32<<<config.blocks, config.threads, 0,
+                   ctx.cuda_device_context().stream()>>>(x_data, out_data,
+                                                         numel, threshold);
+  }
+};
+
+template <typename DeviceContext, typename T>
+class MishGradCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+
+    auto threshold = ctx.Attr<float>("threshold");
+
+    const T* x_data = x->data<T>();
+    const T* dout_data = dout->data<T>();
+    T* dx_data = dx->mutable_data<T>(ctx.GetPlace());
+
+    const int numel = x->numel();
+
+    platform::GpuLaunchConfig config = platform::getGpuLaunchConfig(numel, ctx);
+    KeMishBw<T><<<config.blocks, config.threads, 0,
+                  ctx.cuda_device_context().stream()>>>(
+        x_data, dout_data, dx_data, numel, threshold);
+  }
+};
+
+template <typename DeviceContext>
+class MishGradFP32CUDAKernel : public framework::OpKernel<float> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
+
+    auto threshold = ctx.Attr<float>("threshold");
+
+    const float* x_data = x->data<float>();
+    const float* dout_data = dout->data<float>();
+    float* dx_data = dx->mutable_data<float>(ctx.GetPlace());
+
+    const int numel = x->numel();
+
+    platform::GpuLaunchConfig config = platform::getGpuLaunchConfig(numel, ctx);
+    KeMishBwFP32<<<config.blocks, config.threads, 0,
+                   ctx.cuda_device_context().stream()>>>(
+        x_data, dout_data, dx_data, numel, threshold);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    mish, ops::MishFP32CUDAKernel<paddle::platform::CUDADeviceContext>,
+    ops::MishCUDAKernel<paddle::platform::CUDADeviceContext, double>)
+REGISTER_OP_CUDA_KERNEL(
+    mish_grad, ops::MishGradFP32CUDAKernel<paddle::platform::CUDADeviceContext>,
+    ops::MishGradCUDAKernel<paddle::platform::CUDADeviceContext, double>)