Skip to content

Commit 8d38a4b

Browse files
committed
add conv depthwise layer
1 parent d9bc0bc commit 8d38a4b

File tree

2 files changed

+196
-0
lines changed

2 files changed

+196
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#ifndef CAFFE_CONV_DEPTHWISE_LAYER_HPP_
2+
#define CAFFE_CONV_DEPTHWISE_LAYER_HPP_
3+
4+
5+
#include <vector>
6+
7+
#include "caffe/blob.hpp"
8+
#include "caffe/layer.hpp"
9+
#include "caffe/proto/caffe.pb.h"
10+
11+
#include "caffe/layers/base_conv_layer.hpp"
12+
13+
namespace caffe {
14+
15+
/**
16+
* @brief Convolves the input image with a bank of learned filters,
17+
* and (optionally) adds biases.
18+
*
19+
* Caffe convolves by reduction to matrix multiplication. This achieves
20+
* high-throughput and generality of input and filter dimensions but comes at
21+
* the cost of memory for matrices. This makes use of efficiency in BLAS.
22+
*
23+
* The input is "im2col" transformed to a channel K' x H x W data matrix
24+
* for multiplication with the N x K' x H x W filter matrix to yield a
25+
* N' x H x W output matrix that is then "col2im" restored. K' is the
26+
* input channel * kernel height * kernel width dimension of the unrolled
27+
* inputs so that the im2col matrix has a column for each input region to
28+
* be filtered. col2im restores the output spatial structure by rolling up
29+
* the output channel N' columns of the output matrix.
30+
*/
31+
template <typename Dtype>
32+
class ConvolutionDepthwiseLayer : public BaseConvolutionLayer<Dtype> {
33+
public:
34+
/**
35+
* @param param provides ConvolutionParameter convolution_param,
36+
* with ConvolutionLayer options:
37+
* - num_output. The number of filters.
38+
* - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
39+
* kernel_size for square filters or kernel_h and kernel_w for rectangular
40+
* filters.
41+
* - stride / stride_h / stride_w (\b optional, default 1). The filter
42+
* stride, given by stride_size for equal dimensions or stride_h and stride_w
43+
* for different strides. By default the convolution is dense with stride 1.
44+
* - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
45+
* convolution, given by pad for equal dimensions or pad_h and pad_w for
46+
* different padding. Input padding is computed implicitly instead of
47+
* actually padding.
48+
* - dilation (\b optional, default 1). The filter
49+
* dilation, given by dilation_size for equal dimensions for different
50+
* dilation. By default the convolution has dilation 1.
51+
* - group (\b optional, default 1). The number of filter groups. Group
52+
* convolution is a method for reducing parameterization by selectively
53+
* connecting input and output channels. The input and output channel dimensions must be divisible
54+
* by the number of groups. For group @f$ \geq 1 @f$, the
55+
* convolutional filters' input and output channels are separated s.t. each
56+
* group takes 1 / group of the input channels and makes 1 / group of the
57+
* output channels. Concretely 4 input channels, 8 output channels, and
58+
* 2 groups separate input channels 1-2 and output channels 1-4 into the
59+
* first group and input channels 3-4 and output channels 5-8 into the second
60+
* group.
61+
* - bias_term (\b optional, default true). Whether to have a bias.
62+
* - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
63+
* kernels + stream parallelism) engines.
64+
*/
65+
explicit ConvolutionDepthwiseLayer(const LayerParameter& param)
66+
: BaseConvolutionLayer<Dtype>(param) {}
67+
68+
virtual inline const char* type() const { return "ConvolutionDepthwise"; }
69+
70+
protected:
71+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
72+
const vector<Blob<Dtype>*>& top);
73+
// virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
74+
// const vector<Blob<Dtype>*>& top);
75+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
76+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
77+
// virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
78+
// const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
79+
virtual inline bool reverse_dimensions() { return false; }
80+
virtual void compute_output_shape();
81+
};
82+
83+
} // namespace caffe
84+
85+
#endif // CAFFE_CONV_DEPTHWISE_LAYER_HPP_
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#include <vector>
2+
3+
#include "caffe/layers/conv_depthwise_layer.hpp"
4+
5+
namespace caffe {
6+
7+
template <typename Dtype>
8+
void ConvolutionDepthwiseLayer<Dtype>::compute_output_shape() {
9+
/* Blob<int> kernel_shape_, stride_, pad_, ...,
10+
num_spatial_axes_, ... all declared in base_conv_layer.hpp */
11+
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
12+
const int* stride_data = this->stride_.cpu_data();
13+
const int* pad_data = this->pad_.cpu_data();
14+
const int pad_type = this->pad_type_; //CUSTOMIZATION
15+
const int pad_l = this->pad_l_; //CUSTOMIZATION
16+
const int pad_r = this->pad_r_; //CUSTOMIZATION
17+
const int pad_t = this->pad_t_; //CUSTOMIZATION
18+
const int pad_b = this->pad_b_; //CUSTOMIZATION
19+
const int* dilation_data = this->dilation_.cpu_data();
20+
this->output_shape_.clear();
21+
for (int i = 0; i < this->num_spatial_axes_; ++i) {
22+
// i + 1 to skip channel axis
23+
const int input_dim = this->input_shape(i + 1);
24+
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
25+
int output_dim;
26+
//<--CUSTOMIZATION
27+
if (pad_l!=0 || pad_r!=0 || pad_t!=0 || pad_b!=0){ //only support 2D
28+
if (i==0) {
29+
output_dim = (input_dim + pad_t + pad_b - kernel_extent) / stride_data[i] + 1;
30+
}
31+
if (i==1) {
32+
output_dim = (input_dim + pad_l + pad_r - kernel_extent) / stride_data[i] + 1;
33+
}
34+
}
35+
else{
36+
switch (pad_type) {
37+
case 0:
38+
output_dim = (input_dim + 2 * pad_data[i] - kernel_extent) / stride_data[i] + 1;
39+
break;
40+
case 1:
41+
output_dim = ceil(float(input_dim) / float(stride_data[i]));
42+
break;
43+
default:
44+
LOG(FATAL)<< "Unknown padding type.";
45+
break;
46+
}
47+
//CUSTOMIZATION-->
48+
}
49+
this->output_shape_.push_back(output_dim);
50+
}
51+
}
52+
53+
template <typename Dtype>
54+
void ConvolutionDepthwiseLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
55+
const vector<Blob<Dtype>*>& top) {
56+
const Dtype* weight = this->blobs_[0]->cpu_data();
57+
for (int i = 0; i < bottom.size(); ++i) {
58+
const Dtype* bottom_data = bottom[i]->cpu_data();
59+
Dtype* top_data = top[i]->mutable_cpu_data();
60+
for (int n = 0; n < this->num_; ++n) { // num_ seems to be 'batch size'
61+
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
62+
top_data + n * this->top_dim_);
63+
if (this->bias_term_) {
64+
const Dtype* bias = this->blobs_[1]->cpu_data();
65+
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
66+
}
67+
}
68+
}
69+
}
70+
71+
template <typename Dtype>
72+
void ConvolutionDepthwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
73+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
74+
const Dtype* weight = this->blobs_[0]->cpu_data();
75+
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
76+
for (int i = 0; i < top.size(); ++i) {
77+
const Dtype* top_diff = top[i]->cpu_diff(); // the gradient dLoss/dY
78+
const Dtype* bottom_data = bottom[i]->cpu_data();
79+
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
80+
// Bias gradient, if necessary.
81+
if (this->bias_term_ && this->param_propagate_down_[1]) {
82+
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
83+
for (int n = 0; n < this->num_; ++n) {
84+
this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
85+
}
86+
}
87+
if (this->param_propagate_down_[0] || propagate_down[i]) {
88+
for (int n = 0; n < this->num_; ++n) {
89+
// gradient w.r.t. weight. Note that we will accumulate diffs. // dLoss/dW = dLoss/dY * dY/dW
90+
if (this->param_propagate_down_[0]) {
91+
this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
92+
top_diff + n * this->top_dim_, weight_diff);
93+
}
94+
// gradient w.r.t. bottom data, if necessary.
95+
if (propagate_down[i]) {
96+
this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
97+
bottom_diff + n * this->bottom_dim_);
98+
}
99+
}
100+
}
101+
}
102+
}
103+
104+
#ifdef CPU_ONLY
105+
STUB_GPU(ConvolutionDepthwiseLayer);
106+
#endif
107+
108+
INSTANTIATE_CLASS(ConvolutionDepthwiseLayer);
109+
REGISTER_LAYER_CLASS(ConvolutionDepthwise);
110+
111+
} // namespace caffe

0 commit comments

Comments
 (0)