|
| 1 | +#include <algorithm> |
| 2 | +#include <cfloat> |
| 3 | +#include <vector> |
| 4 | + |
| 5 | +#include "caffe/layers/pooling3d_layer.hpp" |
| 6 | +#include "caffe/util/math_functions.hpp" |
| 7 | + |
| 8 | +namespace caffe { |
| 9 | + |
| 10 | +using std::min; |
| 11 | +using std::max; |
| 12 | + |
| 13 | +template <typename Dtype> |
| 14 | +void Pooling3DLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype> *> &bottom, |
| 15 | + const vector<Blob<Dtype> *> &top) { |
| 16 | + Pooling3DParameter pool3d_param = this->layer_param_.pooling3d_param(); |
| 17 | + global_pooling_ = pool3d_param.global_pooling(); |
| 18 | + ceil_mode_ = pool3d_param.ceil_mode(); |
| 19 | + |
| 20 | + CHECK_EQ(bottom[0]->num_axes(), 5) << "Input must have 5 axes."; |
| 21 | + |
| 22 | + if (global_pooling_) { |
| 23 | + // TODO: only support data_format in NCDHW now |
| 24 | + kernel_d_ = bottom[0]->shape(2); |
| 25 | + kernel_h_ = bottom[0]->shape(3); |
| 26 | + kernel_w_ = bottom[0]->shape(4); |
| 27 | + } else { |
| 28 | + if (pool3d_param.has_kernel_size()) |
| 29 | + kernel_d_ = kernel_h_ = kernel_w_ = pool3d_param.kernel_size(); |
| 30 | + else { |
| 31 | + kernel_d_ = pool3d_param.kernel_d(); |
| 32 | + kernel_h_ = pool3d_param.kernel_h(); |
| 33 | + kernel_w_ = pool3d_param.kernel_w(); |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + if (pool3d_param.has_stride()) |
| 38 | + stride_d_ = stride_h_ = stride_w_ = pool3d_param.stride(); |
| 39 | + else { |
| 40 | + stride_d_ = pool3d_param.stride_d(); |
| 41 | + stride_h_ = pool3d_param.stride_h(); |
| 42 | + stride_w_ = pool3d_param.stride_w(); |
| 43 | + } |
| 44 | + |
| 45 | + if (pool3d_param.has_pad()) |
| 46 | + pad_h0_ = pad_w0_ = pad_d0_ = pad_h1_ = pad_w1_ = pad_d1_ = pool3d_param.pad(); |
| 47 | + else { |
| 48 | + pad_h0_ = pool3d_param.pad_h0(); |
| 49 | + pad_h1_ = pool3d_param.pad_h1(); |
| 50 | + pad_w0_ = pool3d_param.pad_w0(); |
| 51 | + pad_w1_ = pool3d_param.pad_w1(); |
| 52 | + pad_d0_ = pool3d_param.pad_d0(); |
| 53 | + pad_d1_ = pool3d_param.pad_d1(); |
| 54 | + } |
| 55 | +} |
| 56 | + |
| 57 | +template <typename Dtype> |
| 58 | +void Pooling3DLayer<Dtype>::Reshape(const vector<Blob<Dtype> *> &bottom, |
| 59 | + const vector<Blob<Dtype> *> &top) { |
| 60 | + // TODO: only support data_format in NCDHW now |
| 61 | + num_ = bottom[0]->shape(0); |
| 62 | + channels_ = bottom[0]->shape(1); |
| 63 | + depth_ = bottom[0]->shape(2); |
| 64 | + height_ = bottom[0]->shape(3); |
| 65 | + width_ = bottom[0]->shape(4); |
| 66 | + |
| 67 | + if (ceil_mode_) { |
| 68 | + pooled_height_ = static_cast<int>(ceil(static_cast<float>( |
| 69 | + height_ + pad_h0_ + pad_h1_ - kernel_h_) / stride_h_)) + 1; |
| 70 | + pooled_width_ = static_cast<int>(ceil(static_cast<float>( |
| 71 | + width_ + pad_w0_ + pad_w1_ - kernel_w_) / stride_w_)) + 1; |
| 72 | + pooled_depth_ = static_cast<int>(ceil(static_cast<float>( |
| 73 | + depth_ + pad_d0_ + pad_d1_ - kernel_d_) / stride_d_)) + 1; |
| 74 | + } else { |
| 75 | + pooled_height_ = static_cast<int>(floor(static_cast<float>( |
| 76 | + height_ + pad_h0_ + pad_h1_ - kernel_h_) / stride_h_)) + 1; |
| 77 | + pooled_width_ = static_cast<int>(floor(static_cast<float>( |
| 78 | + width_ + pad_w0_ + pad_w1_ - kernel_w_) / stride_w_)) + 1; |
| 79 | + pooled_depth_ = static_cast<int>(floor(static_cast<float>( |
| 80 | + depth_ + pad_d0_ + pad_d1_ - kernel_d_) / stride_d_)) + 1; |
| 81 | + } |
| 82 | + |
| 83 | + if (pad_h0_ || pad_h1_ || pad_w0_ || pad_w1_ || pad_d0_ || pad_d1_) { |
| 84 | + // If we have padding, ensure that the last pooling starts strictly |
| 85 | + // inside the image (instead of at the padding); otherwise clip the last. |
| 86 | + if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h0_) { |
| 87 | + --pooled_height_; |
| 88 | + } |
| 89 | + if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w0_) { |
| 90 | + --pooled_width_; |
| 91 | + } |
| 92 | + if ((pooled_depth_ - 1) * stride_d_ >= depth_ + pad_d0_) { |
| 93 | + --pooled_depth_; |
| 94 | + } |
| 95 | + CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h0_); |
| 96 | + CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w0_); |
| 97 | + CHECK_LT((pooled_depth_ - 1) * stride_d_, depth_ + pad_d0_); |
| 98 | + } |
| 99 | + |
| 100 | + vector<int> shape(5); |
| 101 | + // TODO: only support data_format in NCDHW now |
| 102 | + shape[0] = num_; |
| 103 | + shape[1] = channels_; |
| 104 | + shape[2] = pooled_depth_; |
| 105 | + shape[3] = pooled_height_; |
| 106 | + shape[4] = pooled_width_; |
| 107 | + top[0]->Reshape(shape); |
| 108 | +} |
| 109 | + |
| 110 | +template <typename Dtype> |
| 111 | +void Pooling3DLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype> *> &bottom, |
| 112 | + const vector<Blob<Dtype> *> &top) { |
| 113 | + const Dtype* bottom_data = bottom[0]->cpu_data(); |
| 114 | + Dtype* top_data = top[0]->mutable_cpu_data(); |
| 115 | + const int top_count = top[0]->count(); |
| 116 | + |
| 117 | + switch (this->layer_param_.pooling3d_param().pool()) { |
| 118 | + case Pooling3DParameter_PoolMethod_MAX: |
| 119 | + caffe_set(top_count, Dtype(-FLT_MAX), top_data); |
| 120 | + // The main loop |
| 121 | + for (int n = 0; n < num_; ++n) { |
| 122 | + for (int c = 0; c < channels_; ++c) { |
| 123 | + for (int pd = 0; pd < pooled_depth_; ++pd) { |
| 124 | + for (int ph = 0; ph < pooled_height_; ++ph) { |
| 125 | + for (int pw = 0; pw < pooled_width_; ++pw) { |
| 126 | + int dstart = pd * stride_d_ - pad_d0_; |
| 127 | + int hstart = ph * stride_h_ - pad_h0_; |
| 128 | + int wstart = pw * stride_w_ - pad_w0_; |
| 129 | + int dend = min(dstart + kernel_d_, depth_); |
| 130 | + int hend = min(hstart + kernel_h_, height_); |
| 131 | + int wend = min(wstart + kernel_w_, width_); |
| 132 | + dstart = max(dstart, 0); |
| 133 | + hstart = max(hstart, 0); |
| 134 | + wstart = max(wstart, 0); |
| 135 | + const int pool_index = pd * pooled_height_ * pooled_width_ + ph * pooled_width_ + pw; |
| 136 | + for (int d = dstart; d < dend; ++d) { |
| 137 | + for (int h = hstart; h < hend; ++h) { |
| 138 | + for (int w = wstart; w < wend; ++w) { |
| 139 | + const int index = d * height_ * width_ + h * width_ + w; |
| 140 | + if (bottom_data[index] > top_data[pool_index]) { |
| 141 | + top_data[pool_index] = bottom_data[index]; |
| 142 | + |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | + } |
| 150 | + // compute offset(n, c, d, h, w): (((n * channels() + c) * depth + d) * height() + h) * width() + w |
| 151 | + // (0, 1, 0, 0, 0) |
| 152 | + int bottom_offset = depth_ * height_ * width_; |
| 153 | + int top_offset = pooled_depth_ * pooled_height_ * pooled_width_; |
| 154 | + bottom_data += bottom_offset; |
| 155 | + top_data += top_offset; |
| 156 | + } |
| 157 | + } |
| 158 | + break; |
| 159 | + case Pooling3DParameter_PoolMethod_AVE: |
| 160 | + for (int i = 0; i < top_count; ++i) { |
| 161 | + top_data[i] = 0; |
| 162 | + } |
| 163 | + |
| 164 | + break; |
| 165 | + case Pooling3DParameter_PoolMethod_AVE_EXC_PAD: |
| 166 | + for (int i = 0; i < top_count; ++i) { |
| 167 | + top_data[i] = 0; |
| 168 | + } |
| 169 | + |
| 170 | + break; |
| 171 | + default: |
| 172 | + LOG(FATAL) << "Unknown pooling method."; |
| 173 | + } |
| 174 | +} |
| 175 | + |
| 176 | +INSTANTIATE_CLASS(Pooling3DLayer); |
| 177 | +REGISTER_LAYER_CLASS(Pooling3D); |
| 178 | + |
| 179 | +} // namespace caffe |
0 commit comments