Skip to content

Commit f89f7b5

Browse files
committed
[Resample] CPU versoin added + correlation[subtract] fix
1 parent a5758aa commit f89f7b5

File tree

2 files changed

+197
-7
lines changed

2 files changed

+197
-7
lines changed

src/caffe/layers/correlation_layer.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ void CorrelateData(int num, int topwidth, int topheight, int topchannels, int to
5757
// Compute correlation
5858
for (int top_channel = 0; top_channel < topchannels; top_channel++)
5959
{
60-
double sum = 0;
60+
Dtype sum = 0;
6161

6262
int s2o = (top_channel % neighborhood_grid_width - neighborhood_grid_radius) * stride2;
6363
int s2p = (top_channel / neighborhood_grid_width - neighborhood_grid_radius) * stride2;
@@ -94,12 +94,12 @@ void CorrelateData(int num, int topwidth, int topheight, int topchannels, int to
9494
}
9595

9696
template <typename Dtype>
97-
void CorrelateDataSubtract(int num, int item, int topwidth, int topheight, int topchannels, int topcount,
97+
void CorrelateDataSubtract(const int nthreads, int num, int item, int topwidth, int topheight, int topchannels, int topcount,
9898
int max_displacement, int neighborhood_grid_radius, int neighborhood_grid_width, int kernel_radius, int stride1, int stride2,
9999
int bottomwidth, int bottomheight, int bottomchannels,
100100
const Dtype *bottom0, const Dtype *bottom1, Dtype *top)
101101
{
102-
for (int index = 0; index < topcount; index++)
102+
for (int index = 0; index < nthreads; index++)
103103
{
104104
int x = index % topwidth; //w-pos
105105
int y = (index / topwidth) % topheight; //h-pos
@@ -130,7 +130,7 @@ void CorrelateDataSubtract(int num, int item, int topwidth, int topheight, int t
130130
int idx2 = ((item * bottomheight + y2 + j) * bottomwidth + x2 + i) * bottomchannels + l;
131131

132132
// Do the correlation:
133-
sum += abs(bottom0[idx1] - bottom1[idx2]);
133+
sum += fabs(bottom0[idx1] - bottom1[idx2]);
134134
}
135135
}
136136
}
@@ -228,6 +228,9 @@ void CorrelationLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
228228
const int topcount = top_width_ * top_height_ * top_channels_;
229229
const int pheight = bheight + 2 * pad_size_;
230230
const int pwidth = bwidth + 2 * pad_size_;
231+
232+
memset(rbot1_->mutable_cpu_data(), 0, sizeof(Dtype) * rbot1_->count());
233+
memset(rbot2_->mutable_cpu_data(), 0, sizeof(Dtype) * rbot2_->count());
231234

232235
blob_rearrange_kernel2<Dtype>(rbot1_->mutable_cpu_data(), bottom[0]->cpu_data(), bnum, bchannels, bheight, bwidth, pheight, pwidth, pad_size_);
233236
blob_rearrange_kernel2<Dtype>(rbot2_->mutable_cpu_data(), bottom[1]->cpu_data(), bnum, bchannels, bheight, bwidth, pheight, pwidth, pad_size_);
@@ -252,8 +255,9 @@ void CorrelationLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
252255
// CorrelationLayer
253256
for(int n = 0; n < num; n++) {
254257

258+
int topThreadCount = topcount;
255259
CorrelateDataSubtract<Dtype>(
256-
num, n, top_width_, top_height_, top_channels_, topcount,
260+
topThreadCount, num, n, top_width_, top_height_, top_channels_, topcount,
257261
max_displacement_, neighborhood_grid_radius_, neighborhood_grid_width_, kernel_radius_,
258262
stride1_, stride2_,
259263
width, height, channels,

src/caffe/layers/resample_layer.cpp

Lines changed: 188 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,119 @@
1010

1111
namespace caffe {
1212

13+
static float bicubicCoeff(float x_)
14+
{
15+
float x = fabs(x_);
16+
if (x <= 1.0f) return x * x * (1.5f * x - 2.5f) + 1.0f;
17+
else if (x < 2.0f) return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
18+
else return 0.0f;
19+
}
20+
21+
static float boxCoeff(float x)
22+
{
23+
if (-0.5 <= x && x<0.5) return 1.0;
24+
return 0;
25+
}
26+
27+
static float triangleCoeff(float x)
28+
{
29+
if (-1<=x && x<0) return x+1;
30+
if (0<=x && x<=1) return 1-x;
31+
return 0;
32+
}
33+
34+
#define FILTER_BICUBIC 0
35+
#define FILTER_BOX 1
36+
#define FILTER_TRIANGLE 2
37+
38+
template <typename Dtype>
39+
void InterpolationKernel(
40+
const int nthreads,
41+
const int in_channelsize,
42+
const int out_channelsize,
43+
const Dtype* in_ptr,
44+
const int in_width,
45+
const int in_height,
46+
const float fx,
47+
const float fy,
48+
Dtype* out_ptr,
49+
const int out_width,
50+
const int out_height,
51+
int filter_type,
52+
int kernel_width,
53+
const bool antialias)
54+
{
55+
for (int index = 0; index < nthreads; ++index)
56+
{
57+
int c = index / out_channelsize;
58+
int x_out = (index % out_channelsize) % out_width;
59+
int y_out = (index % out_channelsize) / out_width;
60+
61+
float x_in = x_out * fx + fy / 2.0f - 0.5f;
62+
float y_in = y_out * fy + fx / 2.0f - 0.5f;
63+
64+
int x_in_round = round(x_in);
65+
int y_in_round = round(y_in);
66+
67+
Dtype sum = 0;
68+
Dtype wsum = 0;
69+
70+
float ax = 1.0f / (antialias ? fx : 1.0f);
71+
float ay = 1.0f / (antialias ? fy : 1.0f);
72+
int rx = (fx < 1.0f) ? 2 : ceil(float(kernel_width) / ax);
73+
int ry = (fy < 1.0f) ? 2 : ceil(float(kernel_width) / ay);
74+
75+
for (int y = y_in_round - ry; y <= y_in_round + ry; y++)
76+
for (int x = x_in_round - rx; x <= x_in_round + rx; x++)
77+
{
78+
if (y < 0 || x < 0) continue;
79+
if (y >= in_height || x >= in_width) continue;
80+
81+
float dx = x_in - x;
82+
float dy = y_in - y;
83+
84+
float w;
85+
if (filter_type == FILTER_BICUBIC) w = ax * bicubicCoeff(ax * dx) * ay * bicubicCoeff(ay * dy);
86+
else if (filter_type == FILTER_BOX) w = ax * boxCoeff(ax * dx) * ay * boxCoeff(ay * dy);
87+
else w = ax * triangleCoeff(ax * dx) * ay * triangleCoeff(ay * dy);
88+
sum += w * in_ptr[c * in_channelsize + y * in_width + x];
89+
wsum += w;
90+
}
91+
92+
out_ptr[index] = (!wsum) ? 0 : (sum / wsum);
93+
}
94+
}
95+
96+
template <typename Dtype>
97+
void NearestNeighborKernel(
98+
const int nthreads,
99+
const int in_channelsize,
100+
const int out_channelsize,
101+
const Dtype* in_ptr,
102+
const int in_width,
103+
const int in_height,
104+
const float fx,
105+
const float fy,
106+
Dtype* out_ptr,
107+
const int out_width,
108+
const int out_height)
109+
{
110+
for (int index = 0; index < nthreads; ++index)
111+
{
112+
int c = index / out_channelsize;
113+
int x_out = (index % out_channelsize) % out_width;
114+
int y_out = (index % out_channelsize) / out_width;
115+
116+
float x_in = x_out * fx + fy / 2.0f - 0.5f;
117+
float y_in = y_out * fy + fx / 2.0f - 0.5f;
118+
119+
int x_in_round = round(x_in);
120+
int y_in_round = round(y_in);
121+
122+
out_ptr[index] = in_ptr[c * in_channelsize + y_in_round * in_width + x_in_round];
123+
}
124+
}
125+
13126
template <typename Dtype>
14127
void ResampleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
15128
const vector<Blob<Dtype>*>& top) {
@@ -56,9 +169,82 @@ void ResampleLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
56169

57170
template <typename Dtype>
58171
void ResampleLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
59-
const vector<Blob<Dtype>*>& top)
172+
const vector<Blob<Dtype>*>& top)
60173
{
61-
NOT_IMPLEMENTED;
174+
175+
Dtype* top_data = top[0]->mutable_cpu_data(); // dest
176+
int topwidth = top[0]->width();
177+
int topheight = top[0]->height();
178+
int topchannels = top[0]->channels();
179+
// int topcount = top[0]->count();
180+
181+
Dtype* bottom_data = bottom[0]->mutable_cpu_data(); // source
182+
int bottomnum = (bottom)[0]->num();
183+
int bottomchannels = (bottom)[0]->channels();
184+
int bottomwidth = (bottom)[0]->width();
185+
int bottomheight = (bottom)[0]->height();
186+
// int bottomcount = (bottom)[0]->count();
187+
188+
CHECK_EQ(topchannels, bottomchannels) << "ResampleLayer top channel count must match bottom channel count";
189+
190+
float fx = float(bottomwidth) / float(topwidth);
191+
float fy = float(bottomheight) / float(topheight);
192+
193+
//int botsize = bottomwidth*bottomheight*bottomchannels*bottomnum;
194+
int topsize = topwidth * topheight * topchannels*bottomnum;
195+
int topchannelsize = topwidth*topheight;
196+
int botchannelsize = bottomwidth*bottomheight;
197+
198+
if (this->layer_param().resample_param().type() == ResampleParameter_ResampleType_NEAREST)
199+
{
200+
NearestNeighborKernel<Dtype>(
201+
topsize,
202+
botchannelsize,
203+
topchannelsize,
204+
(Dtype*) bottom_data,
205+
bottomwidth,
206+
bottomheight,
207+
fx,
208+
fy,
209+
(Dtype*) top_data,
210+
topwidth,
211+
topheight
212+
);
213+
}
214+
else if (this->layer_param().resample_param().type() == ResampleParameter_ResampleType_CUBIC || this->layer_param().resample_param().type() == ResampleParameter_ResampleType_LINEAR)
215+
{
216+
int filter_type;
217+
if (this->layer_param().resample_param().type() == ResampleParameter_ResampleType_CUBIC)
218+
filter_type = FILTER_BICUBIC;
219+
else if (this->layer_param().resample_param().type() == ResampleParameter_ResampleType_LINEAR)
220+
filter_type = FILTER_TRIANGLE;
221+
222+
bool isDownsample = (fx > 1) || (fy > 1);
223+
bool antialias = isDownsample && this->layer_param_.resample_param().antialias();
224+
225+
int kernel_width;
226+
if (filter_type == FILTER_BICUBIC) kernel_width = 4;
227+
else if (filter_type == FILTER_BOX) kernel_width = 1;
228+
else kernel_width = 2;
229+
230+
InterpolationKernel<Dtype>(
231+
topsize,
232+
botchannelsize,
233+
topchannelsize,
234+
(Dtype*) bottom_data,
235+
bottomwidth,
236+
bottomheight,
237+
fx,
238+
fy,
239+
(Dtype*) top_data,
240+
topwidth,
241+
topheight,
242+
filter_type,
243+
kernel_width,
244+
antialias);
245+
}
246+
else
247+
LOG(FATAL) << "unsupported downsampling type";
62248
}
63249

64250
template <typename Dtype>

0 commit comments

Comments
 (0)