Skip to content

Commit 8073c67

Browse files
committed
inference acc: AVE_TF repair mimic.
1 parent 6f628ad commit 8073c67

File tree

1 file changed

+69
-2
lines changed

1 file changed

+69
-2
lines changed

src/caffe/layers/pooling_layer.cu

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,81 @@ __global__ void AvePoolForward_TF(const int nthreads,
142142
if (output_shift_instead_division != Dtype(0)) {
143143
if (full_pool_size == pool_size)
144144
top_data[index] = aveval / output_shift_instead_division;
145-
else
146-
top_data[index] = aveval / output_shift_instead_division * full_pool_size / pool_size;
145+
else {
146+
//special fix: Non zero paddings for the case when:
147+
//1)the kernel runs off the edge only by 1 pixel
148+
//2)and the kernel_size-1 is a power of 2
149+
//refer to "Repair by changing padding" at
150+
//https://wwwin.synopsys.com/~tpennell/cnn_papers/29_average_pooling_repair_shop.htm
151+
bool wfix = (pw * stride_w - pad_left == -1) || (wstart + kernel_w - width == 1);
152+
bool hfix = (ph * stride_h - pad_top == -1) || (hstart + kernel_h - height == 1);
153+
154+
if (wfix && hfix)
155+
{
156+
Dtype aveval_fix;
157+
for (int h = hstart; h < hend; ++h) {
158+
aveval_fix = 0;
159+
for (int w = wstart; w < wend; ++w) {
160+
aveval_fix += bottom_slice[h * width + w];
161+
}
162+
aveval += rint(aveval_fix / (wend - wstart));
163+
}
164+
165+
for (int w = wstart; w < wend; ++w) {
166+
aveval_fix = 0;
167+
for (int h = hstart; h < hend; ++h) {
168+
aveval_fix += bottom_slice[h * width + w];
169+
}
170+
aveval += rint(aveval_fix / (hend - hstart));
171+
}
172+
173+
aveval_fix = 0;
174+
for (int h = hstart; h < hend; ++h) {
175+
for (int w = wstart; w < wend; ++w) {
176+
aveval_fix += bottom_slice[h * width + w];
177+
}
178+
}
179+
aveval += rint(aveval_fix / pool_size);
180+
181+
top_data[index] = aveval / output_shift_instead_division;
182+
}
183+
184+
else if (hfix && !wfix)
185+
{
186+
Dtype aveval_fix;
187+
for (int w = wstart; w < wend; ++w) {
188+
aveval_fix = 0;
189+
for (int h = hstart; h < hend; ++h) {
190+
aveval_fix += bottom_slice[h * width + w];
191+
}
192+
aveval += rint(aveval_fix / (hend - hstart));
193+
}
194+
top_data[index] = aveval / output_shift_instead_division;
195+
}
196+
197+
else if (wfix && !hfix)
198+
{
199+
Dtype aveval_fix;
200+
for (int h = hstart; h < hend; ++h) {
201+
aveval_fix = 0;
202+
for (int w = wstart; w < wend; ++w) {
203+
aveval_fix += bottom_slice[h * width + w];
204+
}
205+
aveval += rint(aveval_fix / (wend - wstart));
206+
}
207+
top_data[index] = aveval / output_shift_instead_division;
208+
}
209+
210+
else
211+
top_data[index] = aveval / output_shift_instead_division * full_pool_size / pool_size;
212+
}
147213
top_data[index] = rint(top_data[index]);
148214
if(top_data[index] > SATURATE_MAX)
149215
top_data[index] = SATURATE_MAX;
150216
if(top_data[index] < SATURATE_MIN)
151217
top_data[index] = SATURATE_MIN;
152218
}
219+
153220
else{
154221
top_data[index] = aveval / pool_size;
155222
}

0 commit comments

Comments
 (0)