Skip to content

Commit 4fc26f7

Browse files
authored
fix prelu elementsise bug for 1x1 conv test=develop (#5801)
1 parent a1b8656 commit 4fc26f7

File tree

1 file changed

+39
-65
lines changed

1 file changed

+39
-65
lines changed

lite/backends/opencl/cl_kernel/image/conv2d_1x1_opt_kernel.cl

Lines changed: 39 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,6 @@ __kernel void conv2d_1x1_opt(
77
__read_only image2d_t input_image,
88
__read_only image2d_t filter,
99
__read_only image2d_t bias,
10-
#ifdef BATCH_NORM
11-
__read_only image2d_t new_scale,
12-
__read_only image2d_t new_biase,
13-
#endif
1410
__write_only image2d_t output_image,
1511
__private const int stride,
1612
__private const int offset,
@@ -63,12 +59,6 @@ __kernel void conv2d_1x1_opt(
6359
CL_DTYPE4 output1 = output0;
6460
CL_DTYPE4 output2 = output0;
6561
CL_DTYPE4 output3 = output0;
66-
#elif defined(BIASE_ELE)
67-
CL_DTYPE4 output0 = READ_IMG_TYPE(CL_DTYPE_CHAR, bias, SAMPLER, output_pos0);
68-
CL_DTYPE4 output1 = output0;
69-
CL_DTYPE4 output2 = output0;
70-
CL_DTYPE4 output3 = output0;
71-
7262
#else
7363
CL_DTYPE4 output0 = 0.0f;
7464
CL_DTYPE4 output1 = 0.0f;
@@ -234,24 +224,6 @@ __kernel void conv2d_1x1_opt(
234224
}
235225
}
236226

237-
#ifdef BATCH_NORM
238-
output0 = output0 * READ_IMG_TYPE(
239-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
240-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
241-
242-
output1 = output1 * READ_IMG_TYPE(
243-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
244-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
245-
246-
output2 = output2 * READ_IMG_TYPE(
247-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
248-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
249-
250-
output3 = output3 * READ_IMG_TYPE(
251-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
252-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
253-
#endif
254-
255227
CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
256228
#ifdef PRELU_CH //{
257229
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
@@ -260,10 +232,18 @@ CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
260232
alpha3 = alpha0;
261233
//}
262234
#elif defined(PRELU_ELE) //{
263-
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
264-
alpha1 = alpha0;
265-
alpha2 = alpha0;
266-
alpha3 = alpha0;
235+
if (out_w0 < old_w) {
236+
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
237+
}
238+
if (out_w1 < old_w) {
239+
alpha1 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos1);
240+
}
241+
if (out_w2 < old_w) {
242+
alpha2 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos2);
243+
}
244+
if (out_w3 < old_w) {
245+
alpha3 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos3);
246+
}
267247
//}
268248
#elif defined(PRELU_ALL) //{
269249
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
@@ -280,6 +260,13 @@ CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
280260
output2 = activation_type4(output2, alpha2);
281261
output3 = activation_type4(output3, alpha3);
282262

263+
#ifdef SCALE_ACTIVATION
264+
output0 = fuse_scale(output0, 1.f, 0.f, 0.f);
265+
output1 = fuse_scale(output1, 1.f, 0.f, 0.f);
266+
output2 = fuse_scale(output2, 1.f, 0.f, 0.f);
267+
output3 = fuse_scale(output3, 1.f, 0.f, 0.f);
268+
#endif
269+
283270
if (out_w0 < old_w) {
284271
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos0, output0);
285272
}
@@ -304,10 +291,6 @@ __kernel void conv2d_1x1_simple(
304291
__read_only image2d_t input_image,
305292
__read_only image2d_t filter,
306293
__read_only image2d_t bias,
307-
#ifdef BATCH_NORM
308-
__read_only image2d_t new_scale,
309-
__read_only image2d_t new_biase,
310-
#endif
311294
__write_only image2d_t output_image,
312295
__private const int stride,
313296
__private const int offset,
@@ -359,12 +342,6 @@ __kernel void conv2d_1x1_simple(
359342
CL_DTYPE4 output1 = output0;
360343
CL_DTYPE4 output2 = output0;
361344
CL_DTYPE4 output3 = output0;
362-
#elif defined(BIASE_ELE)
363-
CL_DTYPE4 output0 = READ_IMG_TYPE(CL_DTYPE_CHAR, bias, SAMPLER, output_pos0);
364-
CL_DTYPE4 output1 = output0;
365-
CL_DTYPE4 output2 = output0;
366-
CL_DTYPE4 output3 = output0;
367-
368345
#else
369346
CL_DTYPE4 output0 = 0.0f;
370347
CL_DTYPE4 output1 = 0.0f;
@@ -421,24 +398,6 @@ __kernel void conv2d_1x1_simple(
421398
output3 = mad(input3.w, weight3, output3);
422399
}
423400

424-
#ifdef BATCH_NORM
425-
output0 = output0 * READ_IMG_TYPE(
426-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
427-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
428-
429-
output1 = output1 * READ_IMG_TYPE(
430-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
431-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
432-
433-
output2 = output2 * READ_IMG_TYPE(
434-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
435-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
436-
437-
output3 = output3 * READ_IMG_TYPE(
438-
CL_DTYPE_CHAR, new_scale, SAMPLER, (int2)(out_c, 0)) +
439-
READ_IMG_TYPE(CL_DTYPE_CHAR, new_biase, SAMPLER, (int2)(out_c, 0));
440-
#endif
441-
442401
CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
443402
#ifdef PRELU_CH //{
444403
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(out_c, 0));
@@ -447,10 +406,18 @@ CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
447406
alpha3 = alpha0;
448407
//}
449408
#elif defined(PRELU_ELE) //{
450-
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
451-
alpha1 = alpha0;
452-
alpha2 = alpha0;
453-
alpha3 = alpha0;
409+
if (out_w0 < old_w) {
410+
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos0);
411+
}
412+
if (out_w1 < old_w) {
413+
alpha1 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos1);
414+
}
415+
if (out_w2 < old_w) {
416+
alpha2 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos2);
417+
}
418+
if (out_w3 < old_w) {
419+
alpha3 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, output_pos3);
420+
}
454421
//}
455422
#elif defined(PRELU_ALL) //{
456423
alpha0 = READ_IMG_TYPE(CL_DTYPE_CHAR, prelu_alpha, SAMPLER, (int2)(0, 0));
@@ -467,6 +434,13 @@ CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
467434
output2 = activation_type4(output2, alpha2);
468435
output3 = activation_type4(output3, alpha3);
469436

437+
#ifdef SCALE_ACTIVATION
438+
output0 = fuse_scale(output0, 1.f, 0.f, 0.f);
439+
output1 = fuse_scale(output1, 1.f, 0.f, 0.f);
440+
output2 = fuse_scale(output2, 1.f, 0.f, 0.f);
441+
output3 = fuse_scale(output3, 1.f, 0.f, 0.f);
442+
#endif
443+
470444
if (out_w0 < old_w) {
471445
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos0, output0);
472446
}
@@ -482,4 +456,4 @@ CL_DTYPE4 alpha0,alpha1,alpha2,alpha3;
482456
if (out_w3 < old_w) {
483457
WRITE_IMG_TYPE(CL_DTYPE_CHAR, output_image, output_pos3, output3);
484458
}
485-
}
459+
}

0 commit comments

Comments
 (0)