PaddlePaddle
diff --git a/‎paddle/fluid/operators/random_crop_op.cc
Lines changed: 32 additions & 16 deletions b/‎paddle/fluid/operators/random_crop_op.cc
Lines changed: 32 additions & 16 deletions
diff --git a/‎paddle/fluid/operators/random_crop_op.h
Lines changed: 88 additions & 80 deletions b/‎paddle/fluid/operators/random_crop_op.h
Lines changed: 88 additions & 80 deletions
@@ -12,36 +12,52 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/operators/random_crop_op.h"
-#include <vector>
 
 namespace paddle {
 namespace operators {
+
+class RandomCropOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
+        ctx.device_context());
+  }
+};
+
 class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X", "");
-    AddOutput("Y", "");
+    AddOutput("Out", "");
     AddInput("Seed", "");
     AddOutput("SeedOut", "").AsDispensable();
     AddAttr<std::vector<int>>("shape", "");
+    AddComment("");
   }
 };
 
 class RandomCropOpInferShape : public framework::InferShapeBase {
  public:
-  void operator()(framework::InferShapeContext* context) const override {
-    auto shape = context->Attrs().Get<std::vector<int>>("shape");
-    auto x_dim = context->GetInputDim("X");
-    PADDLE_ENFORCE_EQ(x_dim.size(), static_cast<int64_t>(shape.size()));
-    for (size_t i = 0; i < shape.size(); ++i) {
-      if (shape[i] == -1) {
-        shape[i] = static_cast<int>(x_dim[i]);
-      } else {
-        PADDLE_ENFORCE_GE(x_dim[i], shape[i]);
-      }
+  void operator()(framework::InferShapeContext* ctx) const override {
+    auto seed_dim = ctx->GetInputDim("Seed");
+    PADDLE_ENFORCE(seed_dim.size() == 1 && seed_dim[0] == 1);
+    auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
+    auto x_dim = ctx->GetInputDim("X");
+    PADDLE_ENFORCE_GT(x_dim.size(), static_cast<int64_t>(shape.size()));
+    auto out_dim = framework::vectorize2int(x_dim);
+    for (size_t i = 1; i <= shape.size(); ++i) {
+      size_t x_i = x_dim.size() - i;
+      size_t shape_i = shape.size() - i;
+      PADDLE_ENFORCE_GE(x_dim[x_i], shape[shape_i]);
+      out_dim[x_i] = shape[shape_i];
     }
-    context->SetOutputDim("Y", framework::make_ddim(shape));
-    context->SetOutputDim("SeedOut", framework::make_ddim({1}));
+    ctx->SetOutputDim("Out", framework::make_ddim(out_dim));
+    ctx->SetOutputDim("SeedOut", framework::make_ddim({1}));
   }
 };
 
@@ -50,8 +66,8 @@ class RandomCropOpInferShape : public framework::InferShapeBase {
 
 namespace ops = paddle::operators;
 namespace f = paddle::framework;
-REGISTER_OPERATOR(random_crop, f::OperatorWithKernel, ops::RandomCropOpMaker,
-                  ops::RandomCropOpInferShape);
+REGISTER_OPERATOR(random_crop, ops::RandomCropOp, ops::RandomCropOpMaker,
+                  ops::RandomCropOpInferShape, f::EmptyGradOpMaker);
 template <typename T>
 using Kernel = ops::RandomCropKernel<paddle::platform::CPUDeviceContext, T>;
 
 
@@ -14,11 +14,14 @@
 
 #pragma once
 
+#include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/detail/safe_ref.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/for_range.h"
-#include "thrust/random.h"
+#ifdef PADDLE_WITH_CUDA
+#include <thrust/random.h>
+#endif
 
 namespace paddle {
 namespace operators {
@@ -34,36 +37,39 @@ struct Random<platform::CPUDeviceContext> {
   using UniformIntDist = std::uniform_int_distribution<T>;
 };
 
+#ifdef PADDLE_WITH_CUDA
 template <>
 struct Random<platform::CUDADeviceContext> {
   using Engine = thrust::minstd_rand;
 
   template <typename T>
   using UniformIntDist = thrust::uniform_int_distribution<T>;
 };
+#endif
 
 template <typename T>
-HOSTDEVICE inline void RandomCropImpl(const T* x, size_t* x_dim, T* out,
-                                      size_t* out_dim, int i, int rank,
-                                      int64_t prod_x_remain,
-                                      int64_t prod_out_remain, size_t* offset) {
-  size_t x_length = x_dim[rank];
-  size_t out_length = out_dim[rank];
-
-  int64_t x_stride = prod_x_remain / x_length;
-  int64_t out_stride = prod_out_remain / out_length;
-  size_t offset_i = offset[i];
-  if (x_stride == 1 && out_stride == 1) {
-    // In the final stage, copy from offset.
+HOSTDEVICE inline void StridedMemcpy(const T* x, const size_t* x_dims, T* out,
+                                     const size_t* out_dims, int i, int rank,
+                                     size_t prod_x_remain,
+                                     size_t prod_out_remain,
+                                     const size_t* offsets) {
+  size_t x_dim_i = x_dims[i];
+  size_t out_dim_i = out_dims[i];
+  size_t x_stride = prod_x_remain / x_dim_i;
+  size_t out_stride = prod_out_remain / out_dim_i;
+  size_t offset_i = offsets[i];
+
+  if (i == rank - 1) {
+    PADDLE_ENFORCE(x_stride == 1 && out_stride == 1);
     x += offset_i;
-    for (size_t i = 0; i < out_length; ++i) {
+    for (size_t j = 0; j < out_dim_i; ++j) {
       *out++ = *x++;
     }
   } else {
     x += offset_i * x_stride;
-    for (size_t i = 0; i < out_length; ++i) {
-      RandomCropImpl<T>(x, x_dim, out, out_dim, i + 1, rank, x_stride,
-                        out_stride, offset);
+    for (size_t j = 0; j < x_dim_i; ++j) {
+      StridedMemcpy<T>(x, x_dims, out, out_dims, i + 1, rank, x_stride,
+                       out_stride, offsets);
       x += x_stride;
       out += out_stride;
     }
@@ -74,94 +80,96 @@ template <typename DeviceContext, typename T>
 struct RandomCropFunctor {
   const T* x_;
   T* out_;
-  size_t x_dim_[9];
-  size_t out_dim_[9];
-  size_t prod_same_dim_;
-
-  size_t prod_x_dim_;
-  size_t prod_out_dim_;
-
-  int num_same_dim_;
+  size_t x_dims_[9];
+  size_t out_dims_[9];
+  int num_batchsize_dims_;
   int rank_;
-
   int64_t seed_;
 
-  RandomCropFunctor(const T* x, T* out, int64_t seed)
+  size_t prod_x_dims_;
+  size_t prod_out_dims_;
+  size_t prod_batchsize_dims_;
+  size_t prod_x_ins_dims_;
+  size_t prod_out_ins_dims_;
+
+  RandomCropFunctor(const T* x, T* out, const framework::DDim& x_dims,
+                    const framework::DDim& out_dims, int num_batchsize_dims,
+                    int64_t seed)
       : x_(x),
         out_(out),
-        prod_same_dim_(1),
-        prod_x_dim_(1),
-        prod_out_dim_(1),
+        num_batchsize_dims_(num_batchsize_dims),
+        rank_(x_dims.size()),
         seed_(seed) {
-    std::fill(x_dim_, x_dim_ + sizeof(x_dim_) / sizeof(size_t), 0);
-    std::fill(out_dim_, out_dim_ + sizeof(out_dim_) / sizeof(size_t), 0);
+    PADDLE_ENFORCE_EQ(x_dims.size(), out_dims.size());
+    PADDLE_ENFORCE_GT(rank_, num_batchsize_dims_);
+    prod_batchsize_dims_ = 1;
+    prod_x_ins_dims_ = 1;
+    prod_out_ins_dims_ = 1;
+    for (size_t i = 0; i < rank_; ++i) {
+      size_t x_dim_i = x_dims[i];
+      size_t out_dim_i = out_dims[i];
+      x_dims_[i] = x_dim_i;
+      out_dims_[i] = out_dim_i;
+      if (i < num_batchsize_dims_) {
+        PADDLE_ENFORCE_EQ(x_dim_i, out_dim_i);
+        prod_batchsize_dims_ *= x_dim_i;
+      } else {
+        prod_x_ins_dims_ *= x_dim_i;
+        prod_out_ins_dims_ *= out_dim_i;
+      }
+    }
+    prod_x_dims_ = prod_batchsize_dims_ * prod_x_ins_dims_;
+    prod_out_dims_ = prod_batchsize_dims_ * prod_out_ins_dims_;
   }
 
-  HOSTDEVICE void operator()(size_t i) {
+  HOSTDEVICE void operator()(size_t ins_idx) {
     typename Random<DeviceContext>::Engine engine(seed_);
-    engine.discard(i * (rank_ - num_same_dim_));
-
-    int64_t prod_x_unsame = (prod_x_dim_ / prod_same_dim_);
-    int64_t prod_out_unsame = (prod_out_dim_ / prod_same_dim_);
-
-    const T* x = x_ + i * prod_x_unsame;
-    T* out = out_ + i * prod_out_unsame;
-
-    size_t offset[9];
-    for (int i = num_same_dim_; i < rank_; ++i) {
+    engine.discard(ins_idx * (rank_ - num_batchsize_dims_));
+    size_t offsets[9];
+    for (int i = num_batchsize_dims_; i < rank_; ++i) {
       typename Random<DeviceContext>::template UniformIntDist<size_t> dist(
-          0, x_dim_[i] - out_dim_[i]);
-      offset[i] = dist(engine);
+          0, x_dims_[i] - out_dims_[i]);
+      offsets[i] = dist(engine);
     }
-    RandomCropImpl<T>(x, x_dim_, out, out_dim_, num_same_dim_, rank_,
-                      prod_x_unsame, prod_out_unsame, offset);
+
+    const T* x = x_ + ins_idx * prod_x_ins_dims_;
+    T* out = out_ + ins_idx * prod_out_ins_dims_;
+
+    StridedMemcpy<T>(x, x_dims_ + num_batchsize_dims_, out,
+                     out_dims_ + num_batchsize_dims_, 0,
+                     rank_ - num_batchsize_dims_, prod_x_ins_dims_,
+                     prod_out_ins_dims_, offsets);
   }
 };
 
 template <typename DeviceContext, typename T>
 class RandomCropKernel : public framework::OpKernel<T> {
  public:
-  virtual void Compute(const framework::ExecutionContext& context) const {
-    int64_t seed =
-        *context.Input<framework::LoDTensor>("Seed")->data<int64_t>();
-    auto& x = detail::Ref(context.Input<framework::LoDTensor>("X"));
-    auto& out = detail::Ref(context.Output<framework::LoDTensor>("Out"));
-
-    RandomCropFunctor<DeviceContext, T> functor{
-        x.data<T>(), out.mutable_data<T>(context.GetPlace()), seed};
-
-    auto& out_dim = out.dims();
-    auto& x_dim = x.dims();
-
-    auto rank = x_dim.size();
-    while (rank-- > 0) {
-      functor.x_dim_[rank] = x_dim[rank];
-      functor.out_dim_[rank] = out_dim[rank];
-      functor.prod_x_dim_ *= x_dim[rank];
-      functor.prod_out_dim_ *= out_dim[rank];
-      if (x_dim[rank] != out_dim[rank]) {
-        PADDLE_ENFORCE_EQ(functor.prod_same_dim_, 1);
-        functor.num_same_dim_ = rank;
-      } else {
-        functor.prod_same_dim_ *= out_dim[rank];
-      }
-    }
-    functor.rank_ = x_dim.size();
-
+  virtual void Compute(const framework::ExecutionContext& ctx) const {
+    int64_t seed = *ctx.Input<framework::LoDTensor>("Seed")->data<int64_t>();
+    auto shape = ctx.Attr<std::vector<int>>("shape");
+    auto& x = detail::Ref(ctx.Input<framework::LoDTensor>("X"));
+    auto& out = detail::Ref(ctx.Output<framework::LoDTensor>("Out"));
+
+    int num_batchsize_dims = x.dims().size() - shape.size();
+    RandomCropFunctor<DeviceContext, T> functor(
+        x.data<T>(), out.mutable_data<T>(ctx.GetPlace()), x.dims(), out.dims(),
+        num_batchsize_dims, seed);
     platform::ForRange<DeviceContext> for_range(
-        context.template device_context<DeviceContext>(),
-        functor.prod_same_dim_);
+        ctx.template device_context<DeviceContext>(),
+        functor.prod_batchsize_dims_);
 
     for_range(functor);
 
     Random<platform::CPUDeviceContext>::Engine engine(seed);
-    engine.discard(functor.prod_same_dim_ *
-                   (functor.rank_ - functor.num_same_dim_));
-
-    *context.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>(
+    engine.discard(functor.prod_batchsize_dims_ *
+                   (functor.rank_ - functor.num_batchsize_dims_));
+    *ctx.Output<framework::LoDTensor>("SeedOut")->mutable_data<int64_t>(
         platform::CPUPlace()) = engine();
   }
 };
 
+// TODO(fengjiayi): Backward of random crop op
+
 }  // namespace operators
 }  // namespace paddle