add conv depthwise layer

snoopyisadog · snoopyisadog · commit 8d38a4b75e18 · 2020-11-30T08:30:14.000+01:00
diff --git a/include/caffe/layers/conv_depthwise_layer.hpp b/include/caffe/layers/conv_depthwise_layer.hpp
@@ -0,0 +1,85 @@
+#ifndef CAFFE_CONV_DEPTHWISE_LAYER_HPP_
+#define CAFFE_CONV_DEPTHWISE_LAYER_HPP_
+
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/base_conv_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Convolves the input image with a bank of learned filters,
+ *        and (optionally) adds biases.
+ *
+ *   Caffe convolves by reduction to matrix multiplication. This achieves
+ *   high-throughput and generality of input and filter dimensions but comes at
+ *   the cost of memory for matrices. This makes use of efficiency in BLAS.
+ *
+ *   The input is "im2col" transformed to a channel K' x H x W data matrix
+ *   for multiplication with the N x K' x H x W filter matrix to yield a
+ *   N' x H x W output matrix that is then "col2im" restored. K' is the
+ *   input channel * kernel height * kernel width dimension of the unrolled
+ *   inputs so that the im2col matrix has a column for each input region to
+ *   be filtered. col2im restores the output spatial structure by rolling up
+ *   the output channel N' columns of the output matrix.
+ */
+template <typename Dtype>
+class ConvolutionDepthwiseLayer : public BaseConvolutionLayer<Dtype> {
+ public:
+  /**
+   * @param param provides ConvolutionParameter convolution_param,
+   *    with ConvolutionLayer options:
+   *  - num_output. The number of filters.
+   *  - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
+   *  kernel_size for square filters or kernel_h and kernel_w for rectangular
+   *  filters.
+   *  - stride / stride_h / stride_w (\b optional, default 1). The filter
+   *  stride, given by stride_size for equal dimensions or stride_h and stride_w
+   *  for different strides. By default the convolution is dense with stride 1.
+   *  - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
+   *  convolution, given by pad for equal dimensions or pad_h and pad_w for
+   *  different padding. Input padding is computed implicitly instead of
+   *  actually padding.
+   *  - dilation (\b optional, default 1). The filter
+   *  dilation, given by dilation_size for equal dimensions for different
+   *  dilation. By default the convolution has dilation 1.
+   *  - group (\b optional, default 1). The number of filter groups. Group
+   *  convolution is a method for reducing parameterization by selectively
+   *  connecting input and output channels. The input and output channel dimensions must be divisible
+   *  by the number of groups. For group @f$ \geq 1 @f$, the
+   *  convolutional filters' input and output channels are separated s.t. each
+   *  group takes 1 / group of the input channels and makes 1 / group of the
+   *  output channels. Concretely 4 input channels, 8 output channels, and
+   *  2 groups separate input channels 1-2 and output channels 1-4 into the
+   *  first group and input channels 3-4 and output channels 5-8 into the second
+   *  group.
+   *  - bias_term (\b optional, default true). Whether to have a bias.
+   *  - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
+   *    kernels + stream parallelism) engines.
+   */
+  explicit ConvolutionDepthwiseLayer(const LayerParameter& param)
+      : BaseConvolutionLayer<Dtype>(param) {}
+
+  virtual inline const char* type() const { return "ConvolutionDepthwise"; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+//  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+//      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+//  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+//      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual inline bool reverse_dimensions() { return false; }
+  virtual void compute_output_shape();
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_CONV_DEPTHWISE_LAYER_HPP_
diff --git a/src/caffe/layers/conv_depthwise_layer.cpp b/src/caffe/layers/conv_depthwise_layer.cpp
@@ -0,0 +1,111 @@
+#include <vector>
+
+#include "caffe/layers/conv_depthwise_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ConvolutionDepthwiseLayer<Dtype>::compute_output_shape() {
+  /* Blob<int> kernel_shape_, stride_, pad_, ..., 
+       num_spatial_axes_, ... all declared in base_conv_layer.hpp */
+  const int* kernel_shape_data = this->kernel_shape_.cpu_data();
+  const int* stride_data = this->stride_.cpu_data();
+  const int* pad_data = this->pad_.cpu_data();
+  const int pad_type = this->pad_type_; //CUSTOMIZATION
+  const int pad_l = this->pad_l_; //CUSTOMIZATION
+  const int pad_r = this->pad_r_; //CUSTOMIZATION
+  const int pad_t = this->pad_t_; //CUSTOMIZATION
+  const int pad_b = this->pad_b_; //CUSTOMIZATION
+  const int* dilation_data = this->dilation_.cpu_data();
+  this->output_shape_.clear();
+  for (int i = 0; i < this->num_spatial_axes_; ++i) {
+    // i + 1 to skip channel axis
+    const int input_dim = this->input_shape(i + 1);
+    const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
+    int output_dim;
+    //<--CUSTOMIZATION
+    if (pad_l!=0 || pad_r!=0 || pad_t!=0 || pad_b!=0){ //only support 2D
+      if (i==0) {
+        output_dim = (input_dim + pad_t + pad_b - kernel_extent) / stride_data[i] + 1;
+      }
+      if (i==1) {
+        output_dim = (input_dim + pad_l + pad_r - kernel_extent) / stride_data[i] + 1;
+      }
+    }
+    else{
+      switch (pad_type) {
+        case 0:
+          output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1;
+          break;
+        case 1:
+          output_dim = ceil(float(input_dim) / float(stride_data[i]));
+          break;
+        default:
+          LOG(FATAL)<< "Unknown padding type.";
+          break;
+      }
+    //CUSTOMIZATION-->
+    }
+    this->output_shape_.push_back(output_dim);
+  }
+}
+
+template <typename Dtype>
+void ConvolutionDepthwiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  const Dtype* weight = this->blobs_[0]->cpu_data();
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->cpu_data();
+    Dtype* top_data = top[i]->mutable_cpu_data();
+    for (int n = 0; n < this->num_; ++n) { // num_ seems to be 'batch size'
+      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
+          top_data + n * this->top_dim_);
+      if (this->bias_term_) {
+        const Dtype* bias = this->blobs_[1]->cpu_data();
+        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
+      }
+    }
+  }
+}
+
+template <typename Dtype>
+void ConvolutionDepthwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  const Dtype* weight = this->blobs_[0]->cpu_data();
+  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->cpu_diff();  // the gradient dLoss/dY
+    const Dtype* bottom_data = bottom[i]->cpu_data();
+    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
+    // Bias gradient, if necessary.
+    if (this->bias_term_ && this->param_propagate_down_[1]) {
+      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
+      for (int n = 0; n < this->num_; ++n) {
+        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
+      }
+    }
+    if (this->param_propagate_down_[0] || propagate_down[i]) {
+      for (int n = 0; n < this->num_; ++n) {
+        // gradient w.r.t. weight. Note that we will accumulate diffs.  // dLoss/dW = dLoss/dY * dY/dW
+        if (this->param_propagate_down_[0]) {
+          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
+              top_diff + n * this->top_dim_, weight_diff);
+        }
+        // gradient w.r.t. bottom data, if necessary.
+        if (propagate_down[i]) {
+          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
+              bottom_diff + n * this->bottom_dim_);
+        }
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ConvolutionDepthwiseLayer);
+#endif
+
+INSTANTIATE_CLASS(ConvolutionDepthwiseLayer);
+REGISTER_LAYER_CLASS(ConvolutionDepthwise);
+
+}  // namespace caffe