PaddlePaddle
diff --git a/‎paddle/api/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎paddle/api/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/framework/CMakeLists.txt
Lines changed: 8 additions & 6 deletions b/‎paddle/framework/CMakeLists.txt
Lines changed: 8 additions & 6 deletions
diff --git a/‎paddle/framework/executor_test.cc
Lines changed: 10 additions & 0 deletions b/‎paddle/framework/executor_test.cc
Lines changed: 10 additions & 0 deletions
diff --git a/‎paddle/framework/operator.h
Lines changed: 9 additions & 0 deletions b/‎paddle/framework/operator.h
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/operators/conv2d_op.cc
Lines changed: 73 additions & 93 deletions b/‎paddle/operators/conv2d_op.cc
Lines changed: 73 additions & 93 deletions
diff --git a/‎paddle/operators/conv2d_op.cu
Lines changed: 1 addition & 1 deletion b/‎paddle/operators/conv2d_op.cu
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/operators/gemm_conv2d_op.h renamed to ‎paddle/operators/conv2d_op.h
Lines changed: 32 additions & 1 deletion b/‎paddle/operators/gemm_conv2d_op.h renamed to ‎paddle/operators/conv2d_op.h
Lines changed: 32 additions & 1 deletion
diff --git a/‎paddle/operators/conv_cudnn_op.cc
Lines changed: 47 additions & 0 deletions b/‎paddle/operators/conv_cudnn_op.cc
Lines changed: 47 additions & 0 deletions
@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
 SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
 
 SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
-SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign")
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign -ftls-model=global-dynamic")
 
 SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
     paddle_parameter
 
@@ -42,12 +42,14 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
 cc_library(backward SRCS backward.cc DEPS net_op)
 cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
 
-cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward ${GLOB_OP_LIB})
-#if(WITH_GPU)
-#    nv_test(executor_test SRCS executor_test.cc DEPS executor)
-#else()
-#    cc_test(executor_test SRCS executor_test.cc DEPS executor)
-#endif()
+cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward)
+set(EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op
+    mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op)
+if(WITH_GPU)
+    nv_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
+else()
+    cc_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
+endif()
 
 cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
 cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
@@ -25,6 +25,16 @@ limitations under the License. */
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/operator.h"
 
+USE_OP(elementwise_add);
+USE_OP(gaussian_random);
+USE_OP(feed);
+USE_OP(fetch);
+USE_OP(mul);
+USE_OP(sum);
+USE_OP(squared_l2_distance);
+USE_OP(fill_constant);
+USE_OP(sgd);
+
 using namespace paddle::platform;
 using namespace paddle::framework;
 
 
@@ -289,6 +289,15 @@ class ExecutionContext {
     return device_context_;
   }
 
+#ifdef PADDLE_WITH_CUDA
+  const platform::CUDADeviceContext& cuda_device_context() const {
+    PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
+    auto cuda_ctx =
+        reinterpret_cast<const platform::CUDADeviceContext*>(&device_context_);
+    return *cuda_ctx;
+  }
+#endif
+
  private:
   const OperatorBase& op_;
   const Scope& scope_;
 
@@ -12,111 +12,91 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#include "paddle/operators/gemm_conv2d_op.h"
+#include "paddle/operators/conv2d_op.h"
 
 namespace paddle {
 namespace operators {
 
-int outputSize(int input_size, int filter_size, int padding, int stride) {
-  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
-  return output_size;
+void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
+  PADDLE_ENFORCE(ctx->HasInput("Input"),
+                 "Input(Input) of Conv2DOp should not be null.");
+  PADDLE_ENFORCE(ctx->HasInput("Filter"),
+                 "Input(Filter) of Conv2DOp should not be null.");
+  PADDLE_ENFORCE(ctx->HasOutput("Output"),
+                 "Output(Output) of Conv2DOp should not be null.");
+
+  auto in_dims = ctx->GetInputDim("Input");
+  auto filter_dims = ctx->GetInputDim("Filter");
+  std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
+  std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
+  int groups = ctx->Attrs().Get<int>("groups");
+  int input_channels = in_dims[1];
+  int output_channels = filter_dims[0];
+
+  PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D.");
+  PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D.");
+  PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
+                    "The number of input channels should be equal to filter "
+                    "channels * groups.");
+  PADDLE_ENFORCE_EQ(
+      output_channels % groups, 0,
+      "The number of output channels should be divided by groups.");
+
+  auto output_height =
+      OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
+  auto output_width =
+      OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
+  ctx->SetOutputDim("Output",
+                    {in_dims[0], filter_dims[0], output_height, output_width});
 }
 
-class Conv2DOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("Input"),
-                   "Input(Input) of Conv2DOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("Filter"),
-                   "Input(Filter) of Conv2DOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("Output"),
-                   "Output(Output) of Conv2DOp should not be null.");
-
-    auto in_dims = ctx->GetInputDim("Input");
-    auto filter_dims = ctx->GetInputDim("Filter");
-    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
-    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
-    int groups = ctx->Attrs().Get<int>("groups");
-    int input_channels = in_dims[1];
-    int output_channels = filter_dims[0];
-
-    PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D.");
-    PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D.");
-    PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
-                      "The number of input channels should be equal to filter "
-                      "channels * groups.");
-    PADDLE_ENFORCE_EQ(
-        output_channels % groups, 0,
-        "The number of output channels should be divided by groups.");
-
-    auto output_height =
-        outputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
-    auto output_width =
-        outputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
-    ctx->SetOutputDim(
-        "Output", {in_dims[0], filter_dims[0], output_height, output_width});
-  }
-};
-
-class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  Conv2DOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput(
-        "Input",
-        "The input tensor of convolution operator. "
-        "The format of input tensor is NCHW. Where N is batch size, C is the "
-        "number of channels, H and W is the height and width of image.");
-    AddInput(
-        "Filter",
-        "The filter tensor of convolution operator."
-        "The format of the filter tensor is MCHW, where M is the number of "
-        "output image channels, C is the number of input image channels, "
-        "H and W is height and width of filter. "
-        "If the groups attribute is greater than 1, C equal the number of "
-        "input image channels divided by the groups.");
-    AddOutput("Output",
-              "The output tensor of convolution operator."
-              "The format of output tensor is also NCHW.");
-    AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
-        .SetDefault({1, 1});
-    AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
-        .SetDefault({0, 0});
-    AddAttr<int>(
-        "groups",
-        "group size of convolution operator. "
-        "Refer to grouped convolution in Alex Krizhevsky's paper: "
-        "when group=2, the first half of the filters are only connected to the "
-        "first half of the input channels, and the second half only connected "
-        "to the second half.")
-        .SetDefault(1);
-    AddComment(R"DOC(
+Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
+                             framework::OpAttrChecker* op_checker)
+    : OpProtoAndCheckerMaker(proto, op_checker) {
+  AddInput(
+      "Input",
+      "The input tensor of convolution operator. "
+      "The format of input tensor is NCHW. Where N is batch size, C is the "
+      "number of channels, H and W is the height and width of image.");
+  AddInput("Filter",
+           "The filter tensor of convolution operator."
+           "The format of the filter tensor is MCHW, where M is the number of "
+           "output image channels, C is the number of input image channels, "
+           "H and W is height and width of filter. "
+           "If the groups attribute is greater than 1, C equal the number of "
+           "input image channels divided by the groups.");
+  AddOutput("Output",
+            "The output tensor of convolution operator."
+            "The format of output tensor is also NCHW.");
+  AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
+      .SetDefault({1, 1});
+  AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
+      .SetDefault({0, 0});
+  AddAttr<int>(
+      "groups",
+      "group size of convolution operator. "
+      "Refer to grouped convolution in Alex Krizhevsky's paper: "
+      "when group=2, the first half of the filters are only connected to the "
+      "first half of the input channels, and the second half only connected "
+      "to the second half.")
+      .SetDefault(1);
+  AddComment(R"DOC(
 The convolution operation calculates the output based on the input, filter
 and strides, paddings, groups parameters. The size of each dimension of the
 parameters is checked in the infer-shape.
 )DOC");
-  }
-};
-
-class Conv2DOpGrad : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
+}
 
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    auto in_dims = ctx->GetInputDim("Input");
-    auto filter_dims = ctx->GetInputDim("Filter");
-    if (ctx->HasOutput(framework::GradVarName("Input"))) {
-      ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
-    }
-    if (ctx->HasOutput(framework::GradVarName("Filter"))) {
-      ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
-    }
+void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
+  auto in_dims = ctx->GetInputDim("Input");
+  auto filter_dims = ctx->GetInputDim("Filter");
+  if (ctx->HasOutput(framework::GradVarName("Input"))) {
+    ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
   }
-};
+  if (ctx->HasOutput(framework::GradVarName("Filter"))) {
+    ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
+  }
+}
 
 }  // namespace operators
 }  // namespace paddle
 
@@ -12,7 +12,7 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#include "paddle/operators/gemm_conv2d_op.h"
+#include "paddle/operators/conv2d_op.h"
 
 namespace ops = paddle::operators;
 
 
@@ -24,6 +24,38 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
+// Base convolution operator definations for other conv
+// like operators to reuse the implementation.
+inline int OutputSize(int input_size, int filter_size, int padding,
+                      int stride) {
+  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+  return output_size;
+}
+
+// Define Op classes in .h file so that other conv
+// operator implementations can reuse the code.
+class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  Conv2DOpMaker(framework::OpProto* proto,
+                framework::OpAttrChecker* op_checker);
+};
+
+class Conv2DOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override;
+};
+
+class Conv2DOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override;
+};
+
 template <typename Place, typename T>
 class GemmConv2DKernel : public framework::OpKernel<T> {
  public:
@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
 
     framework::DDim output_matrix_shape = {output_channels,
                                            output_height * output_width};
-
     // convolution operator: im2col + gemm
     int in_step = input_channels / groups;
     int out_step = output_channels / groups;
 
@@ -0,0 +1,47 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/operators/conv2d_op.h"
+
+namespace paddle {
+namespace operators {
+
+class CudnnConvOpMaker : public Conv2DOpMaker {
+ public:
+  CudnnConvOpMaker(framework::OpProto* proto,
+                   framework::OpAttrChecker* op_checker)
+      : Conv2DOpMaker(proto, op_checker) {
+    AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
+        .SetDefault(std::vector<int>{1, 1});
+    AddAttr<int>("workspace_size_MB",
+                 "workspace size for cudnn, in MB, "
+                 "workspace is a section of GPU memory which will be "
+                 "allocated/freed each time the operator runs, larger "
+                 "workspace size can increase performance but also requires "
+                 "better hardward. This size should be carefully setted.")
+        .SetDefault(4096);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
+            ops::Conv2DOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    conv_cudnn, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    conv_cudnn_grad,
+    ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);