@@ -142,14 +142,81 @@ __global__ void AvePoolForward_TF(const int nthreads,
142142 if (output_shift_instead_division != Dtype (0 )) {
143143 if (full_pool_size == pool_size)
144144 top_data[index] = aveval / output_shift_instead_division;
145- else
146- top_data[index] = aveval / output_shift_instead_division * full_pool_size / pool_size;
145+ else {
146+ // special fix: Non zero paddings for the case when:
147+ // 1)the kernel runs off the edge only by 1 pixel
148+ // 2)and the kernel_size-1 is a power of 2
149+ // refer to "Repair by changing padding" at
150+ // https://wwwin.synopsys.com/~tpennell/cnn_papers/29_average_pooling_repair_shop.htm
151+ bool wfix = (pw * stride_w - pad_left == -1 ) || (wstart + kernel_w - width == 1 );
152+ bool hfix = (ph * stride_h - pad_top == -1 ) || (hstart + kernel_h - height == 1 );
153+
154+ if (wfix && hfix)
155+ {
156+ Dtype aveval_fix;
157+ for (int h = hstart; h < hend; ++h) {
158+ aveval_fix = 0 ;
159+ for (int w = wstart; w < wend; ++w) {
160+ aveval_fix += bottom_slice[h * width + w];
161+ }
162+ aveval += rint (aveval_fix / (wend - wstart));
163+ }
164+
165+ for (int w = wstart; w < wend; ++w) {
166+ aveval_fix = 0 ;
167+ for (int h = hstart; h < hend; ++h) {
168+ aveval_fix += bottom_slice[h * width + w];
169+ }
170+ aveval += rint (aveval_fix / (hend - hstart));
171+ }
172+
173+ aveval_fix = 0 ;
174+ for (int h = hstart; h < hend; ++h) {
175+ for (int w = wstart; w < wend; ++w) {
176+ aveval_fix += bottom_slice[h * width + w];
177+ }
178+ }
179+ aveval += rint (aveval_fix / pool_size);
180+
181+ top_data[index] = aveval / output_shift_instead_division;
182+ }
183+
184+ else if (hfix && !wfix)
185+ {
186+ Dtype aveval_fix;
187+ for (int w = wstart; w < wend; ++w) {
188+ aveval_fix = 0 ;
189+ for (int h = hstart; h < hend; ++h) {
190+ aveval_fix += bottom_slice[h * width + w];
191+ }
192+ aveval += rint (aveval_fix / (hend - hstart));
193+ }
194+ top_data[index] = aveval / output_shift_instead_division;
195+ }
196+
197+ else if (wfix && !hfix)
198+ {
199+ Dtype aveval_fix;
200+ for (int h = hstart; h < hend; ++h) {
201+ aveval_fix = 0 ;
202+ for (int w = wstart; w < wend; ++w) {
203+ aveval_fix += bottom_slice[h * width + w];
204+ }
205+ aveval += rint (aveval_fix / (wend - wstart));
206+ }
207+ top_data[index] = aveval / output_shift_instead_division;
208+ }
209+
210+ else
211+ top_data[index] = aveval / output_shift_instead_division * full_pool_size / pool_size;
212+ }
147213 top_data[index] = rint (top_data[index]);
148214 if (top_data[index] > SATURATE_MAX)
149215 top_data[index] = SATURATE_MAX;
150216 if (top_data[index] < SATURATE_MIN)
151217 top_data[index] = SATURATE_MIN;
152218 }
219+
153220 else {
154221 top_data[index] = aveval / pool_size;
155222 }
0 commit comments