Skip to content

Commit 97c1f09

Browse files
committed
Merge pull request opencv#10955 from pengli:dnn
2 parents ec0bb66 + ef937dd commit 97c1f09

File tree

5 files changed

+38
-28
lines changed

5 files changed

+38
-28
lines changed

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -824,9 +824,6 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
824824
for (int i = 0; i < inputs.size(); ++i)
825825
CV_Assert(inputs[i].u != outputs[0].u);
826826

827-
if (padMode == "SAME")
828-
return false;
829-
830827
if (convolutionOp.empty())
831828
{
832829
OCL4DNNConvConfig config;

modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ class OCL4DNNConvSpatial
285285
int32_t width_;
286286
int32_t pad_h_;
287287
int32_t pad_w_;
288+
int32_t pad_bottom_;
289+
int32_t pad_right_;
288290
int32_t stride_h_;
289291
int32_t stride_w_;
290292
int32_t dilation_h_;

modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
103103
output_w_ = config.out_shape[dims - spatial_dims + 1];
104104
bottom_dim_ = channels_ * width_ * height_;
105105
top_dim_ = num_output_ * output_w_ * output_h_;
106+
int Ph = (output_h_ - 1) * stride_h_ + (dilation_h_ * (kernel_h_ - 1) + 1) - height_;
107+
int Pw = (output_w_ - 1) * stride_w_ + (dilation_w_ * (kernel_w_ - 1) + 1) - width_;
108+
Ph = (Ph > 0) ? Ph : 0;
109+
Pw = (Pw > 0) ? Pw : 0;
110+
pad_right_ = (Pw + 1) / 2;
111+
pad_bottom_ = (Ph + 1) / 2;
106112

107113
cache_path_ = utils::getConfigurationParameterString("OPENCV_OCL4DNN_CONFIG_PATH", "");
108114
dwconv_ = (num_output_ == channels_ && channels_ == group_);
@@ -379,6 +385,8 @@ void OCL4DNNConvSpatial<Dtype>::setupKernel()
379385
{
380386
addDef("INPUT_PAD_W", pad_w_);
381387
addDef("INPUT_PAD_H", pad_h_);
388+
addDef("INPUT_PAD_RIGHT", pad_right_);
389+
addDef("INPUT_PAD_BOTTOM", pad_bottom_);
382390
}
383391

384392
setupKernelDetails(kernelType_, blockM_, blockK_, blockN_);

modules/dnn/src/opencl/conv_layer_spatial.cl

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ convolve_simd(
238238
int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
239239
int curr_y = or * STRIDE_Y + curr_local_y;
240240
int curr_x = oc * STRIDE_X + curr_local_x;
241-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
241+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
242242
int saved_y = curr_y;
243243
#endif
244244
in_addr = input_batch_offset
@@ -256,19 +256,22 @@ convolve_simd(
256256
LOOP(INVEC_SIZE, reg,
257257
{
258258
if (curr_local_y + reg * TILE_Y_STRIDE < TILE_Y || INVEC_SIZE * TILE_Y_STRIDE <= (TILE_Y + 2) || reg < INVEC_SIZE - 1) {
259-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
259+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
260260
if (curr_y >= INPUT_PAD_H && curr_y < input_height + INPUT_PAD_H && curr_x + 3 >= INPUT_PAD_W && curr_x < input_width + INPUT_PAD_W) {
261261
if (curr_x < INPUT_PAD_W) {
262262
in_buf.in_vec[reg].s0 = 0;
263-
if (curr_x + 1 >= INPUT_PAD_W)
263+
if (curr_x + 1 >= INPUT_PAD_W && curr_x + 1 < input_width + INPUT_PAD_W)
264264
in_buf.in_vec[reg].s1 = *(inputs + in_offset + 1);
265265
else
266266
in_buf.in_vec[reg].s1 = 0;
267-
if (curr_x + 2 >= INPUT_PAD_W)
267+
if (curr_x + 2 >= INPUT_PAD_W && curr_x + 2 < input_width + INPUT_PAD_W)
268268
in_buf.in_vec[reg].s2 = *(inputs + in_offset + 2);
269269
else
270270
in_buf.in_vec[reg].s2 = 0;
271-
in_buf.in_vec[reg].s3 = *(inputs + in_offset + 3);
271+
if (curr_x + 3 < input_width + INPUT_PAD_W)
272+
in_buf.in_vec[reg].s3 = *(inputs + in_offset + 3);
273+
else
274+
in_buf.in_vec[reg].s3 = 0;
272275
} else {
273276
VLOAD4(in_buf.in_vec[reg], inputs + in_offset);
274277
if (curr_x + 1 >= input_width + INPUT_PAD_W)
@@ -289,7 +292,7 @@ convolve_simd(
289292
in_offset += input_width * TILE_Y_STRIDE;
290293
});
291294
in_addr += input_height * input_width;
292-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
295+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
293296
curr_y = saved_y;
294297
#endif
295298

@@ -492,7 +495,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
492495
// atile is M rows x K columns.
493496
int curr_x = ( global_y % output_width ) * STRIDE_X;
494497
int curr_y = ( global_y / output_width ) * STRIDE_Y;
495-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
498+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
496499
int saved_y = curr_y;
497500
#endif
498501
const __global Dtype *src0_read = src0
@@ -512,7 +515,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
512515
do
513516
{
514517
int patch_row = 0;
515-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
518+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
516519
curr_y = saved_y;
517520
#endif
518521

@@ -530,7 +533,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
530533
// ...
531534
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
532535

533-
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
536+
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
534537
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
535538
Dtype* pblockA00 = (Dtype*)(&blockA00);
536539
#else
@@ -646,7 +649,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
646649
// atile is M rows x K columns.
647650
int curr_x = ( global_y % output_width ) * STRIDE_X;
648651
int curr_y = ( global_y / output_width ) * STRIDE_Y;
649-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
652+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
650653
int saved_y = curr_y;
651654
#endif
652655
const __global Dtype *src0_read = src0
@@ -666,14 +669,14 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
666669
do
667670
{
668671
int patch_row = 0;
669-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
672+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
670673
curr_y = saved_y;
671674
#endif
672675
do
673676
{
674677
// Load atile and interleaved btile.
675678
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
676-
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
679+
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
677680
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
678681
Dtype* pblockA00 = (Dtype*)(&blockA00);
679682
#else
@@ -873,7 +876,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
873876
int curr_x1 = ( ( global_y * TILE_M + 1 ) % output_width ) * STRIDE_X;
874877
int curr_y0 = ( ( global_y * TILE_M + 0 ) / output_width ) * STRIDE_Y;
875878
int curr_y1 = ( ( global_y * TILE_M + 1 ) / output_width ) * STRIDE_Y;
876-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
879+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
877880
int saved_y0 = curr_y0;
878881
int saved_y1 = curr_y1;
879882
#endif
@@ -911,7 +914,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
911914
// (0, 2) (8, 2) (16, 2) (24, 2) ... ...
912915
// ...
913916
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
914-
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1
917+
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
915918
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
916919
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
917920
Dtype* pblockA00 = (Dtype*)(&blockA00);
@@ -997,7 +1000,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
9971000

9981001
//while( ++patch_row < 1 ); //debug
9991002
while( ++patch_row < KERNEL_HEIGHT );
1000-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1
1003+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
10011004
curr_y0 = saved_y0;
10021005
curr_y1 = saved_y1;
10031006
#endif
@@ -1073,7 +1076,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
10731076
int curr_x1 = ( ( global_y * TILE_M + 1 ) % output_width ) * STRIDE_X;
10741077
int curr_y0 = ( ( global_y * TILE_M + 0 ) / output_width ) * STRIDE_Y;
10751078
int curr_y1 = ( ( global_y * TILE_M + 1 ) / output_width ) * STRIDE_Y;
1076-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
1079+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
10771080
int saved_y0 = curr_y0;
10781081
int saved_y1 = curr_y1;
10791082
#endif
@@ -1102,7 +1105,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
11021105
{
11031106
// Load atile and interleaved btile.
11041107
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
1105-
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1
1108+
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
11061109
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
11071110
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
11081111
Dtype* pblockA00 = (Dtype*)(&blockA00);
@@ -1210,7 +1213,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
12101213

12111214
//while( ++patch_row < 1 ); //debug
12121215
while( ++patch_row < KERNEL_HEIGHT );
1213-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1
1216+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
12141217
curr_y0 = saved_y0;
12151218
curr_y1 = saved_y1;
12161219
#endif
@@ -1377,7 +1380,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
13771380
// atile is M rows x K columns.
13781381
int curr_x = ( global_y % output_width ) * STRIDE_X;
13791382
int curr_y = ( global_y / output_width ) * STRIDE_Y;
1380-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
1383+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
13811384
int saved_y = curr_y;
13821385
#endif
13831386
const __global Dtype *src0_read = src0
@@ -1419,7 +1422,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
14191422
do
14201423
{
14211424
int patch_row = 0;
1422-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
1425+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
14231426
curr_y = saved_y;
14241427
#endif
14251428
__attribute__((opencl_unroll_hint(1)))
@@ -1437,7 +1440,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
14371440
// ...
14381441
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
14391442

1440-
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1
1443+
#if INPUT_PAD_W == 0 && INPUT_PAD_H == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
14411444
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read )[ 0 ];
14421445
Dtype* pblockA00 = (Dtype*)(&blockA00);
14431446
#else
@@ -1580,7 +1583,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
15801583
int curr_x1 = ( ( global_y * TILE_M + 1 ) % output_width ) * STRIDE_X;
15811584
int curr_y0 = ( ( global_y * TILE_M + 0 ) / output_width ) * STRIDE_Y;
15821585
int curr_y1 = ( ( global_y * TILE_M + 1 ) / output_width ) * STRIDE_Y;
1583-
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1
1586+
#if INPUT_PAD_H != 0 || INPUT_PAD_W != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
15841587
int saved_y0 = curr_y0;
15851588
int saved_y1 = curr_y1;
15861589
#endif
@@ -1618,7 +1621,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
16181621
// (0, 2) (8, 2) (16, 2) (24, 2) ... ...
16191622
// ...
16201623
const bool kernel_width_is_odd = KERNEL_WIDTH % 2 == 1;
1621-
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1
1624+
#if INPUT_PAD_H == 0 && INPUT_PAD_W == 0 && DILATION_X == 1 && DILATION_Y == 1 && INPUT_PAD_BOTTOM == 0 && INPUT_PAD_RIGHT == 0
16221625
Dtype_t blockA00 = ( (const __global Dtype_t*)src0_read0 )[ 0 ]; src0_read0 += ROW_PITCH;
16231626
Dtype_t blockA01 = ( (const __global Dtype_t*)src0_read1 )[ 0 ]; src0_read1 += ROW_PITCH;
16241627
Dtype* pblockA00 = (Dtype*)(&blockA00);
@@ -1692,7 +1695,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
16921695

16931696
//while( ++patch_row < 1 ); //debug
16941697
while( ++patch_row < KERNEL_HEIGHT );
1695-
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1
1698+
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || DILATION_X != 1 || DILATION_Y != 1 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0
16961699
curr_y0 = saved_y0;
16971700
curr_y1 = saved_y1;
16981701
#endif

modules/dnn/test/test_tf_importer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD)
321321
std::vector<Mat> output;
322322
net.forward(output, outNames);
323323

324-
normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1));
324+
normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
325325
normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
326326
normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
327327
}

0 commit comments

Comments
 (0)