Skip to content

Commit 5ddc4da

Browse files
committed
add implementation for 3D MAX pooling
1 parent 8d4e2a5 commit 5ddc4da

File tree

3 files changed

+260
-1
lines changed

3 files changed

+260
-1
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef CAFFE_POOLING3D_LAYER_HPP_
2+
#define CAFFE_POOLING3D_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
namespace caffe {
11+
12+
template <typename Dtype> class Pooling3DLayer : public Layer<Dtype> {
13+
public:
14+
explicit Pooling3DLayer(const LayerParameter &param) : Layer<Dtype>(param) {}
15+
virtual void LayerSetUp(const vector<Blob<Dtype> *> &bottom,
16+
const vector<Blob<Dtype> *> &top);
17+
virtual void Reshape(const vector<Blob<Dtype> *> &bottom,
18+
const vector<Blob<Dtype> *> &top);
19+
20+
virtual inline const char *type() const { return "Pooling3D"; }
21+
virtual inline int ExactNumBottomBlobs() const { return 1; }
22+
virtual inline int ExactNumTopBlobs() const { return 1; }
23+
24+
protected:
25+
virtual void Forward_cpu(const vector<Blob<Dtype> *> &bottom,
26+
const vector<Blob<Dtype> *> &top);
27+
virtual void Backward_cpu(const vector<Blob<Dtype> *> &top,
28+
const vector<bool> &propagate_down,
29+
const vector<Blob<Dtype> *> &bottom) {
30+
NOT_IMPLEMENTED;
31+
}
32+
33+
int kernel_h_, kernel_w_, kernel_d_;
34+
int stride_h_, stride_w_, stride_d_;
35+
int pad_h0_, pad_w0_, pad_d0_, pad_h1_, pad_w1_, pad_d1_;
36+
int num_, channels_;
37+
int height_, width_, depth_;
38+
int pooled_height_, pooled_width_, pooled_depth_;
39+
bool global_pooling_;
40+
bool ceil_mode_;
41+
42+
};
43+
44+
} // namespace caffe
45+
46+
#endif // CAFFE_POOLING3D_LAYER_HPP_
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#include <algorithm>
2+
#include <cfloat>
3+
#include <vector>
4+
5+
#include "caffe/layers/pooling3d_layer.hpp"
6+
#include "caffe/util/math_functions.hpp"
7+
8+
namespace caffe {
9+
10+
using std::min;
11+
using std::max;
12+
13+
template <typename Dtype>
14+
void Pooling3DLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype> *> &bottom,
15+
const vector<Blob<Dtype> *> &top) {
16+
Pooling3DParameter pool3d_param = this->layer_param_.pooling3d_param();
17+
global_pooling_ = pool3d_param.global_pooling();
18+
ceil_mode_ = pool3d_param.ceil_mode();
19+
20+
CHECK_EQ(bottom[0]->num_axes(), 5) << "Input must have 5 axes.";
21+
22+
if (global_pooling_) {
23+
// TODO: only support data_format in NCDHW now
24+
kernel_d_ = bottom[0]->shape(2);
25+
kernel_h_ = bottom[0]->shape(3);
26+
kernel_w_ = bottom[0]->shape(4);
27+
} else {
28+
if (pool3d_param.has_kernel_size())
29+
kernel_d_ = kernel_h_ = kernel_w_ = pool3d_param.kernel_size();
30+
else {
31+
kernel_d_ = pool3d_param.kernel_d();
32+
kernel_h_ = pool3d_param.kernel_h();
33+
kernel_w_ = pool3d_param.kernel_w();
34+
}
35+
}
36+
37+
if (pool3d_param.has_stride())
38+
stride_d_ = stride_h_ = stride_w_ = pool3d_param.stride();
39+
else {
40+
stride_d_ = pool3d_param.stride_d();
41+
stride_h_ = pool3d_param.stride_h();
42+
stride_w_ = pool3d_param.stride_w();
43+
}
44+
45+
if (pool3d_param.has_pad())
46+
pad_h0_ = pad_w0_ = pad_d0_ = pad_h1_ = pad_w1_ = pad_d1_ = pool3d_param.pad();
47+
else {
48+
pad_h0_ = pool3d_param.pad_h0();
49+
pad_h1_ = pool3d_param.pad_h1();
50+
pad_w0_ = pool3d_param.pad_w0();
51+
pad_w1_ = pool3d_param.pad_w1();
52+
pad_d0_ = pool3d_param.pad_d0();
53+
pad_d1_ = pool3d_param.pad_d1();
54+
}
55+
}
56+
57+
template <typename Dtype>
58+
void Pooling3DLayer<Dtype>::Reshape(const vector<Blob<Dtype> *> &bottom,
59+
const vector<Blob<Dtype> *> &top) {
60+
// TODO: only support data_format in NCDHW now
61+
num_ = bottom[0]->shape(0);
62+
channels_ = bottom[0]->shape(1);
63+
depth_ = bottom[0]->shape(2);
64+
height_ = bottom[0]->shape(3);
65+
width_ = bottom[0]->shape(4);
66+
67+
if (ceil_mode_) {
68+
pooled_height_ = static_cast<int>(ceil(static_cast<float>(
69+
height_ + pad_h0_ + pad_h1_ - kernel_h_) / stride_h_)) + 1;
70+
pooled_width_ = static_cast<int>(ceil(static_cast<float>(
71+
width_ + pad_w0_ + pad_w1_ - kernel_w_) / stride_w_)) + 1;
72+
pooled_depth_ = static_cast<int>(ceil(static_cast<float>(
73+
depth_ + pad_d0_ + pad_d1_ - kernel_d_) / stride_d_)) + 1;
74+
} else {
75+
pooled_height_ = static_cast<int>(floor(static_cast<float>(
76+
height_ + pad_h0_ + pad_h1_ - kernel_h_) / stride_h_)) + 1;
77+
pooled_width_ = static_cast<int>(floor(static_cast<float>(
78+
width_ + pad_w0_ + pad_w1_ - kernel_w_) / stride_w_)) + 1;
79+
pooled_depth_ = static_cast<int>(floor(static_cast<float>(
80+
depth_ + pad_d0_ + pad_d1_ - kernel_d_) / stride_d_)) + 1;
81+
}
82+
83+
if (pad_h0_ || pad_h1_ || pad_w0_ || pad_w1_ || pad_d0_ || pad_d1_) {
84+
// If we have padding, ensure that the last pooling starts strictly
85+
// inside the image (instead of at the padding); otherwise clip the last.
86+
if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h0_) {
87+
--pooled_height_;
88+
}
89+
if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w0_) {
90+
--pooled_width_;
91+
}
92+
if ((pooled_depth_ - 1) * stride_d_ >= depth_ + pad_d0_) {
93+
--pooled_depth_;
94+
}
95+
CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h0_);
96+
CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w0_);
97+
CHECK_LT((pooled_depth_ - 1) * stride_d_, depth_ + pad_d0_);
98+
}
99+
100+
vector<int> shape(5);
101+
// TODO: only support data_format in NCDHW now
102+
shape[0] = num_;
103+
shape[1] = channels_;
104+
shape[2] = pooled_depth_;
105+
shape[3] = pooled_height_;
106+
shape[4] = pooled_width_;
107+
top[0]->Reshape(shape);
108+
}
109+
110+
template <typename Dtype>
111+
void Pooling3DLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype> *> &bottom,
112+
const vector<Blob<Dtype> *> &top) {
113+
const Dtype* bottom_data = bottom[0]->cpu_data();
114+
Dtype* top_data = top[0]->mutable_cpu_data();
115+
const int top_count = top[0]->count();
116+
117+
switch (this->layer_param_.pooling3d_param().pool()) {
118+
case Pooling3DParameter_PoolMethod_MAX:
119+
caffe_set(top_count, Dtype(-FLT_MAX), top_data);
120+
// The main loop
121+
for (int n = 0; n < num_; ++n) {
122+
for (int c = 0; c < channels_; ++c) {
123+
for (int pd = 0; pd < pooled_depth_; ++pd) {
124+
for (int ph = 0; ph < pooled_height_; ++ph) {
125+
for (int pw = 0; pw < pooled_width_; ++pw) {
126+
int dstart = pd * stride_d_ - pad_d0_;
127+
int hstart = ph * stride_h_ - pad_h0_;
128+
int wstart = pw * stride_w_ - pad_w0_;
129+
int dend = min(dstart + kernel_d_, depth_);
130+
int hend = min(hstart + kernel_h_, height_);
131+
int wend = min(wstart + kernel_w_, width_);
132+
dstart = max(dstart, 0);
133+
hstart = max(hstart, 0);
134+
wstart = max(wstart, 0);
135+
const int pool_index = pd * pooled_height_ * pooled_width_ + ph * pooled_width_ + pw;
136+
for (int d = dstart; d < dend; ++d) {
137+
for (int h = hstart; h < hend; ++h) {
138+
for (int w = wstart; w < wend; ++w) {
139+
const int index = d * height_ * width_ + h * width_ + w;
140+
if (bottom_data[index] > top_data[pool_index]) {
141+
top_data[pool_index] = bottom_data[index];
142+
143+
}
144+
}
145+
}
146+
}
147+
}
148+
}
149+
}
150+
// compute offset(n, c, d, h, w): (((n * channels() + c) * depth + d) * height() + h) * width() + w
151+
// (0, 1, 0, 0, 0)
152+
int bottom_offset = depth_ * height_ * width_;
153+
int top_offset = pooled_depth_ * pooled_height_ * pooled_width_;
154+
bottom_data += bottom_offset;
155+
top_data += top_offset;
156+
}
157+
}
158+
break;
159+
case Pooling3DParameter_PoolMethod_AVE:
160+
for (int i = 0; i < top_count; ++i) {
161+
top_data[i] = 0;
162+
}
163+
164+
break;
165+
case Pooling3DParameter_PoolMethod_AVE_EXC_PAD:
166+
for (int i = 0; i < top_count; ++i) {
167+
top_data[i] = 0;
168+
}
169+
170+
break;
171+
default:
172+
LOG(FATAL) << "Unknown pooling method.";
173+
}
174+
}
175+
176+
INSTANTIATE_CLASS(Pooling3DLayer);
177+
REGISTER_LAYER_CLASS(Pooling3D);
178+
179+
} // namespace caffe

src/caffe/proto/caffe.proto

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ message ParamSpec {
460460
// NOTE
461461
// Update the next available ID when you add a new LayerParameter field.
462462
//
463-
// LayerParameter next available layer-specific ID: 266 (last added: scaled_tanh_param)
463+
// LayerParameter next available layer-specific ID: 267 (last added: pooling3d_param)
464464
message LayerParameter {
465465
optional string name = 1; // the layer name
466466
optional string type = 2; // the layer type
@@ -558,6 +558,7 @@ message LayerParameter {
558558
optional ParameterParameter parameter_param = 145;
559559
optional PermuteParameter permute_param = 202;
560560
optional PoolingParameter pooling_param = 121;
561+
optional Pooling3DParameter pooling3d_param = 266;
561562
optional PowerParameter power_param = 122;
562563
optional PReLUParameter prelu_param = 131;
563564
optional PriorBoxParameter prior_box_param = 203;
@@ -2072,6 +2073,39 @@ message PoolingParameter {
20722073
optional bool ceil_mode = 14 [default = true]; // Specify floor/ceil mode rounding
20732074
}
20742075

2076+
message Pooling3DParameter {
2077+
enum PoolMethod {
2078+
MAX = 0;
2079+
AVE = 1;
2080+
AVE_EXC_PAD=2;
2081+
}
2082+
optional PoolMethod pool = 1 [default = MAX]; // The pooling method
2083+
2084+
// Pad, kernel size, and stride are all given as a single value for equal
2085+
// dimensions in all spatial dimensions, or once per spatial dimension.
2086+
optional uint32 pad = 2 [default = 0]; // The padding size (equal in Y, X, D)
2087+
optional uint32 pad_h0 = 5 [default = 0]; // The padding top
2088+
optional uint32 pad_h1 = 6 [default = 0]; // The padding bottom
2089+
optional uint32 pad_w0 = 7 [default = 0]; // The padding left
2090+
optional uint32 pad_w1 = 8 [default = 0]; // The padding right
2091+
optional uint32 pad_d0 = 9 [default = 0]; // The padding in
2092+
optional uint32 pad_d1 = 10 [default = 0]; // The padding out
2093+
optional uint32 kernel_size = 3; // The kernel size (equal in Y, X, D)
2094+
optional uint32 kernel_h = 11; // The kernel height
2095+
optional uint32 kernel_w = 12; // The kernel width
2096+
optional uint32 kernel_d = 13; // The kernel depth
2097+
optional uint32 stride = 4 [default = 1]; // The stride (equal in Y, X, D)
2098+
optional uint32 stride_h = 14; // The stride height
2099+
optional uint32 stride_w = 15; // The stride width
2100+
optional uint32 stride_d = 16; // The stride width
2101+
2102+
// If global_pooling then it will pool over the size of the bottom by doing
2103+
// kernel_h = bottom->height, kernel_w = bottom->width and kernel_d = bottom->depth
2104+
optional bool global_pooling = 17 [default = false];
2105+
2106+
optional bool ceil_mode = 18 [default = false]; // Specify floor/ceil mode rounding
2107+
}
2108+
20752109
message PowerParameter {
20762110
// PowerLayer computes outputs y = (shift + scale * x) ^ power.
20772111
optional float power = 1 [default = 1.0];

0 commit comments

Comments
 (0)