Skip to content

Commit 07e7ebe

Browse files
LielinJiangqingqing01
authored andcommitted
Fix depthwise conv gpu kernel bug (#18582) (#19392)
* fix depthwise conv gpu kernel bug, test=develop * add more depthwise conv test, test=develop
1 parent ec64f44 commit 07e7ebe

File tree

2 files changed

+48
-2
lines changed

2 files changed

+48
-2
lines changed

paddle/fluid/operators/math/depthwise_conv.cu

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,12 @@ class DepthwiseConvFunctor<platform::CUDADeviceContext, T,
487487
check_case(1, 2, 3);
488488
check_case(1, 2, 5);
489489
check_case(1, 2, -1);
490-
check_case(0, 0, 3);
491-
check_case(0, 0, 5);
490+
check_case(2, 1, 3);
491+
check_case(2, 1, 5);
492+
check_case(2, 1, -1);
493+
check_case(2, 2, 3);
494+
check_case(2, 2, 5);
495+
check_case(2, 2, -1);
492496
check_case(0, 0, -1);
493497
// NOTE(liangdun): 0,0 for other case
494498
// add other case if needed, e.g. check_case(2^n,1)

python/paddle/fluid/tests/unittests/test_conv2d_op.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,48 @@ def init_group(self):
208208
self.groups = 3
209209

210210

211+
class TestWithDepthWise3x3(TestConv2dOp):
212+
def init_test_case(self):
213+
self.pad = [1, 1]
214+
self.stride = [1, 1]
215+
self.input_size = [3, 4, 10, 10] # NCHW
216+
assert np.mod(self.input_size[1], self.groups) == 0
217+
f_c = self.input_size[1] // self.groups
218+
self.filter_size = [8, f_c, 3, 3]
219+
220+
def init_dilation(self):
221+
self.dilations = [2, 2]
222+
223+
def init_group(self):
224+
self.groups = 4
225+
226+
227+
class TestWithDepthWise5x5(TestConv2dOp):
228+
def init_test_case(self):
229+
self.pad = [0, 0]
230+
self.stride = [1, 1]
231+
self.input_size = [2, 4, 10, 10] # NCHW
232+
assert np.mod(self.input_size[1], self.groups) == 0
233+
f_c = self.input_size[1] // self.groups
234+
self.filter_size = [8, f_c, 5, 5]
235+
236+
def init_group(self):
237+
self.groups = 4
238+
239+
240+
class TestWithDepthWise7x7(TestConv2dOp):
241+
def init_test_case(self):
242+
self.pad = [1, 1]
243+
self.stride = [2, 2]
244+
self.input_size = [2, 8, 10, 10] # NCHW
245+
assert np.mod(self.input_size[1], self.groups) == 0
246+
f_c = self.input_size[1] // self.groups
247+
self.filter_size = [16, f_c, 7, 7]
248+
249+
def init_group(self):
250+
self.groups = 8
251+
252+
211253
class TestWithDilation(TestConv2dOp):
212254
def init_test_case(self):
213255
self.pad = [0, 0]

0 commit comments

Comments
 (0)