PaddlePaddle
diff --git a/‎paddle/fluid/API.spec
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/API.spec
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/operators/affine_grid_cudnn_op.cu.cc
Lines changed: 112 additions & 0 deletions b/‎paddle/fluid/operators/affine_grid_cudnn_op.cu.cc
Lines changed: 112 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/affine_grid_op.cc
Lines changed: 233 additions & 0 deletions b/‎paddle/fluid/operators/affine_grid_op.cc
Lines changed: 233 additions & 0 deletions
@@ -174,6 +174,7 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
 paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
 paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
 paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
 
@@ -0,0 +1,112 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/cudnn_helper.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using ScopedSpatialTransformerDescriptor =
+    platform::ScopedSpatialTransformerDescriptor;
+
+template <typename T>
+class CUDNNAffineGridOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+                   "It must use CUDAPlace.");
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    auto handle = dev_ctx.cudnn_handle();
+    auto* theta = ctx.Input<Tensor>("Theta");
+    auto* output = ctx.Output<Tensor>("Output");
+    const T* theta_data = theta->data<T>();
+
+    int n = theta->dims()[0];
+    auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
+    Tensor h_sizes;
+    int* h_size_data;
+    if (size_attr.size() == 0) {
+      auto* output_shape = ctx.Input<Tensor>("OutputShape");
+      framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
+      h_size_data = h_sizes.data<int>();
+    } else {
+      h_size_data = h_sizes.mutable_data<int>({4}, platform::CPUPlace());
+      h_size_data[0] = n;
+      h_size_data[1] = size_attr[1];
+      h_size_data[2] = size_attr[2];
+      h_size_data[3] = size_attr[3];
+    }
+
+    T* output_data = output->mutable_data<T>(
+        {n, h_size_data[2], h_size_data[3], 2}, ctx.GetPlace());
+    ScopedSpatialTransformerDescriptor st_desc;
+    cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
+        st_desc.descriptor<T>(4, h_size_data);
+
+    PADDLE_ENFORCE(platform::dynload::cudnnSpatialTfGridGeneratorForward(
+        handle, cudnn_st_desc, theta_data, output_data));
+  }
+};
+
+template <typename T>
+class CUDNNAffineGridGradOpKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+                   "It must use CUDAPlace.");
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    auto handle = dev_ctx.cudnn_handle();
+    auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
+    auto theta_grad = ctx.Output<Tensor>(framework::GradVarName("Theta"));
+
+    int n = output_grad->dims()[0];
+    auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
+    Tensor h_sizes;
+    int* h_size_data;
+    if (size_attr.size() == 0) {
+      auto* output_shape = ctx.Input<Tensor>("OutputShape");
+      framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
+      h_size_data = h_sizes.data<int>();
+    } else {
+      h_size_data = h_sizes.mutable_data<int>({4}, platform::CPUPlace());
+      h_size_data[0] = n;
+      h_size_data[1] = size_attr[1];
+      h_size_data[2] = size_attr[2];
+      h_size_data[3] = size_attr[3];
+    }
+
+    ScopedSpatialTransformerDescriptor st_desc;
+    cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
+        st_desc.descriptor<T>(4, h_size_data);
+
+    const T* output_grad_data = output_grad->data<T>();
+    T* theta_grad_data = theta_grad->mutable_data<T>(ctx.GetPlace());
+
+    PADDLE_ENFORCE(platform::dynload::cudnnSpatialTfGridGeneratorBackward(
+        handle, cudnn_st_desc, output_grad_data, theta_grad_data));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace plat = paddle::platform;
+REGISTER_OP_KERNEL(affine_grid, CUDNN, plat::CUDAPlace,
+                   paddle::operators::CUDNNAffineGridOpKernel<float>,
+                   paddle::operators::CUDNNAffineGridOpKernel<double>);
+REGISTER_OP_KERNEL(affine_grid_grad, CUDNN, plat::CUDAPlace,
+                   paddle::operators::CUDNNAffineGridGradOpKernel<float>,
+                   paddle::operators::CUDNNAffineGridGradOpKernel<double>);
@@ -0,0 +1,233 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/affine_grid_op.h"
+#include <string>
+#include "paddle/fluid/framework/op_registry.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/platform/cudnn_helper.h"
+#endif
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T>
+struct Linspace<paddle::platform::CPUDeviceContext, T> {
+  framework::Tensor operator()(T start, T end, int count,
+                               const framework::ExecutionContext& ctx) {
+    Tensor numbers;
+    T* number_data = numbers.mutable_data<T>({count}, platform::CPUPlace());
+    T slice = (end - start) / (T)(count - 1);
+    for (int i = 0; i < count; ++i) {
+      number_data[i] = start + (T)i * slice;
+    }
+    return numbers;
+  }
+};
+
+class AffineGridOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Theta"),
+                   "Input(Theta) of AffineGridOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Output"),
+                   "Output(Output) of AffineGridOp should not be null.");
+    auto theta_dims = ctx->GetInputDim("Theta");
+    PADDLE_ENFORCE(theta_dims.size() == 3,
+                   "AffineGrid's Input(Theta) should be 3-D tensor.");
+
+    auto output_shape = ctx->Attrs().Get<std::vector<int>>("output_shape");
+    if (output_shape.size() == 0) {
+      PADDLE_ENFORCE(ctx->HasInput("OutputShape"),
+                     "Input(OutputShape) of AffineGridOp should not be null if "
+                     "attr(output_shape) is not configured.");
+      auto output_shape_dims = ctx->GetInputDim("OutputShape");
+      PADDLE_ENFORCE(output_shape_dims.size() == 1,
+                     "AffineGrid's Input(OutputShape) should be 1-D tensor.");
+    } else {
+      PADDLE_ENFORCE(output_shape.size() == 4,
+                     "The size of attr(output_shape) should be 4.");
+    }
+
+    PADDLE_ENFORCE(theta_dims[1] == 2, "Input(theta) dims[1] should be 2.");
+    PADDLE_ENFORCE(theta_dims[2] == 3, "Input(theta) dims[2] should be 3.");
+    // N * H * W * 2
+    ctx->SetOutputDim("Output",
+                      framework::make_ddim({theta_dims[0], -1, -1, 2}));
+    ctx->ShareLoD("Theta", "Output");
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    framework::LibraryType library{framework::LibraryType::kPlain};
+#ifdef PADDLE_WITH_CUDA
+    if (platform::CanCUDNNBeUsed(ctx)) {
+      library = framework::LibraryType::kCUDNN;
+    }
+#endif
+    auto data_type = framework::ToDataType(ctx.Input<Tensor>("Theta")->type());
+    return framework::OpKernelType(data_type, ctx.GetPlace(),
+                                   framework::DataLayout::kAnyLayout, library);
+  }
+};
+
+class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput(
+        "Theta",
+        "(Tensor) A batch of affine transform parameters with shape [N, 2, 3]. "
+        "It is used to transform coordinate (x_0, y_0) to coordinate (x_1, "
+        "y_1).");
+    AddInput("OutputShape",
+             "(Tensor) The shape of target image with format [N, C, H, W].")
+        .AsDispensable();
+    AddOutput("Output", "(Tensor) Output Tensor with shape [N, H, W, 2].");
+    AddAttr<bool>(
+        "use_cudnn",
+        "(bool, default false) Only used in cudnn kernel, need install cudnn")
+        .SetDefault(true);
+    AddAttr<std::vector<int>>(
+        "output_shape",
+        "The target output image shape with format [N, C, H, W].")
+        .SetDefault(std::vector<int>());
+
+    AddComment(R"DOC(
+    It generates a grid of (x,y) coordinates using the parameters of the
+    affine transformation that correspond to a set of points where the input
+    feature map should be sampled to produce the transformed output feature map.
+
+    Given:
+        Theta = [[[x_11, x_12, x_13]
+                  [x_14, x_15, x_16]]
+                 [[x_21, x_22, x_23]
+                  [x_24, x_25, x_26]]]
+    
+        OutputShape = [2, 3, 5, 5]
+
+    Step 1:
+
+        Generate relative coordinates according to OutputShape.
+        The values of relative coordinates are in the interval between -1 and 1.
+        The shape of the relative coordinates is [2, H, W] as below:
+    
+        C = [[[-1.  -1.  -1.  -1.  -1. ]
+              [-0.5 -0.5 -0.5 -0.5 -0.5]
+              [ 0.   0.   0.   0.   0. ]
+              [ 0.5  0.5  0.5  0.5  0.5]
+              [ 1.   1.   1.   1.   1. ]] 
+             [[-1.  -0.5  0.   0.5  1. ]
+              [-1.  -0.5  0.   0.5  1. ]
+              [-1.  -0.5  0.   0.5  1. ]
+              [-1.  -0.5  0.   0.5  1. ]
+              [-1.  -0.5  0.   0.5  1. ]]]
+        C[0] is the coordinates in height axis and  C[1] is the coordinates in width axis.
+    
+    Step2:
+        Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get:
+        C_ = [[-1.  -1.   1. ]
+              [-0.5 -1.   1. ]
+              [ 0.  -1.   1. ]
+              [ 0.5 -1.   1. ]
+              [ 1.  -1.   1. ]
+              [-1.  -0.5  1. ]
+              [-0.5 -0.5  1. ]
+              [ 0.  -0.5  1. ]
+              [ 0.5 -0.5  1. ]
+              [ 1.  -0.5  1. ]
+              [-1.   0.   1. ]
+              [-0.5  0.   1. ]
+              [ 0.   0.   1. ]
+              [ 0.5  0.   1. ]
+              [ 1.   0.   1. ]
+              [-1.   0.5  1. ]
+              [-0.5  0.5  1. ]
+              [ 0.   0.5  1. ]
+              [ 0.5  0.5  1. ]
+              [ 1.   0.5  1. ]
+              [-1.   1.   1. ]
+              [-0.5  1.   1. ]
+              [ 0.   1.   1. ]
+              [ 0.5  1.   1. ]
+              [ 1.   1.   1. ]]
+    Step3:
+        Compute output by equation $$Output[i] = C_ * Theta[i]^T$$
+    )DOC");
+  }
+};
+
+class AffineGridOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    auto theta_dims = ctx->GetInputDim("Theta");
+    if (ctx->HasOutput(framework::GradVarName("Theta"))) {
+      ctx->SetOutputDim(framework::GradVarName("Theta"), theta_dims);
+    }
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    framework::LibraryType library_{framework::LibraryType::kPlain};
+#ifdef PADDLE_WITH_CUDA
+    if (platform::CanCUDNNBeUsed(ctx)) {
+      library_ = framework::LibraryType::kCUDNN;
+    }
+#endif
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("Theta")->type()),
+        ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
+  }
+};
+
+class AffineGridGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op = new framework::OpDesc();
+    op->SetType("affine_grid_grad");
+    op->SetInput("Theta", Input("Theta"));
+    op->SetInput("OutputShape", Input("OutputShape"));
+    op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
+
+    op->SetAttrMap(Attrs());
+
+    op->SetOutput(framework::GradVarName("Theta"), InputGrad("Theta"));
+    return std::unique_ptr<framework::OpDesc>(op);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(affine_grid, ops::AffineGridOp, ops::AffineGridOpMaker,
+                  ops::AffineGridGradMaker);
+REGISTER_OPERATOR(affine_grid_grad, ops::AffineGridOpGrad);
+
+REGISTER_OP_CPU_KERNEL(
+    affine_grid,
+    ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, double>);
+REGISTER_OP_CPU_KERNEL(
+    affine_grid_grad,
+    ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, double>);