Skip to content

Commit 53305d4

Browse files
committed
Merge pull request opencv#10891 from pengli:dnn
2 parents 8b4871a + 2863f95 commit 53305d4

File tree

8 files changed

+94
-12
lines changed

8 files changed

+94
-12
lines changed

modules/dnn/include/opencv2/dnn/all_layers.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
406406
class CV_EXPORTS ReLU6Layer : public ActivationLayer
407407
{
408408
public:
409+
float minValue, maxValue;
410+
409411
static Ptr<ReLU6Layer> create(const LayerParams &params);
410412
};
411413

modules/dnn/src/dnn.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,7 @@ struct Net::Impl
14391439
nextData &&
14401440
((nextData->type == "ReLU") ||
14411441
(nextData->type == "ChannelsPReLU") ||
1442+
(nextData->type == "ReLU6") ||
14421443
(nextData->type == "TanH") ||
14431444
(nextData->type == "Power"))) )
14441445
{

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
860860
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
861861
}
862862

863+
Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
864+
if( !activ_relu6.empty() )
865+
{
866+
reluslope.resize(2);
867+
reluslope[0] = activ_relu6->minValue;
868+
reluslope[1] = activ_relu6->maxValue;
869+
activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
870+
}
871+
863872
Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
864873
if( !activ_chprelu.empty() )
865874
{
@@ -906,12 +915,17 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
906915
{
907916
convolutionOp->setActivTanh(true);
908917
}
918+
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
919+
{
920+
convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
921+
}
909922
else
910923
{
911924
convolutionOp->setActivReLU(false, 0);
912925
convolutionOp->setActivPReLU(false, reluslope);
913926
convolutionOp->setActivPower(false, 1.f);
914927
convolutionOp->setActivTanh(false);
928+
convolutionOp->setActivReLU6(false, 0, 0);
915929
}
916930
newActiv = false;
917931
}

modules/dnn/src/layers/elementwise_layers.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,30 @@ struct ReLU6Functor
381381
#ifdef HAVE_OPENCL
382382
bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
383383
{
384-
// TODO: implement OCL version
385-
return false;
384+
std::vector<UMat> inputs;
385+
std::vector<UMat> outputs;
386+
387+
inps.getUMatVector(inputs);
388+
outs.getUMatVector(outputs);
389+
String buildopt = oclGetTMacro(inputs[0]);
390+
391+
for (size_t i = 0; i < inputs.size(); i++)
392+
{
393+
UMat& src = inputs[i];
394+
UMat& dst = outputs[i];
395+
396+
ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
397+
kernel.set(0, (int)src.total());
398+
kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
399+
kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
400+
kernel.set(3, (float)minValue);
401+
kernel.set(4, (float)maxValue);
402+
403+
size_t gSize = src.total();
404+
CV_Assert(kernel.run(1, &gSize, NULL, false));
405+
}
406+
407+
return true;
386408
}
387409
#endif
388410

@@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
867889
float maxValue = params.get<float>("max_value", 6.0f);
868890
Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
869891
l->setParamsFrom(params);
892+
l->minValue = minValue;
893+
l->maxValue = maxValue;
894+
870895
return l;
871896
}
872897

modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ typedef enum {
7878
OCL4DNN_CONV_FUSED_ACTIV_RELU = 1,
7979
OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2,
8080
OCL4DNN_CONV_FUSED_ACTIV_POWER = 3,
81-
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4
81+
OCL4DNN_CONV_FUSED_ACTIV_TANH = 4,
82+
OCL4DNN_CONV_FUSED_ACTIV_RELU6 = 5
8283
} ocl4dnnFusedActiv_t;
8384

8485
template<typename Dtype>
@@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
9697
void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
9798
void setActivPower(bool fuse_activ, float power);
9899
void setActivTanh(bool fuse_activ);
100+
void setActivReLU6(bool fuse_activ, float min, float max);
99101
void setBias(bool bias_term);
100102

101103
private:
@@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
319321
cv::ocl::ProgramSource src_;
320322
int32_t prev_kernel_type_;
321323
float negative_slope_;
324+
float min_value_;
325+
float max_value_;
322326
UMat negative_slope_umat_;
323327
ocl4dnnFusedActiv_t fused_activ_;
324328
float power_;

modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
8282
fused_eltwise_ = false;
8383
power_ = 1.f;
8484
negative_slope_ = 0;
85+
min_value_ = 0;
86+
max_value_ = 0;
8587
prev_kernel_type_ = -1;
8688
tuned_ = false;
8789

@@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
162164
case OCL4DNN_CONV_FUSED_ACTIV_TANH:
163165
addDef("FUSED_CONV_TANH", 1);
164166
break;
167+
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
168+
addDef("FUSED_CONV_RELU6", 1);
169+
break;
165170
default:
166171
;
167172
}
@@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
184189
case OCL4DNN_CONV_FUSED_ACTIV_POWER:
185190
kernel.set(argIdx++, (float)power_);
186191
break;
192+
case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
193+
kernel.set(argIdx++, (float)min_value_);
194+
kernel.set(argIdx++, (float)max_value_);
195+
break;
187196
default:
188197
;
189198
}
@@ -393,6 +402,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
393402
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
394403
}
395404

405+
template<typename Dtype>
406+
void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
407+
{
408+
if ( fuse_activ )
409+
{
410+
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
411+
min_value_ = min;
412+
max_value_ = max;
413+
}
414+
else
415+
fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
416+
}
417+
396418
template<typename Dtype>
397419
void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
398420
{

modules/dnn/src/opencl/activations.cl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
5454
#endif
5555
}
5656

57+
__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
58+
const T minValue, const T maxValue)
59+
{
60+
int index = get_global_id(0);
61+
if(index < count)
62+
{
63+
T x = in[index];
64+
out[index] = clamp(x, minValue, maxValue);
65+
}
66+
}
67+
5768
__kernel void PReLUForward(const int count, const int channels, const int plane_size,
5869
__global const T* in, __global T* out, __global const T* slope_data)
5970
{

modules/dnn/src/opencl/conv_layer_spatial.cl

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,22 @@
4848

4949
#if defined(FUSED_CONV_RELU)
5050
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
51-
#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
51+
#define FUSED_ARG Dtype negative_slope,
5252
#elif defined(FUSED_CONV_PRELU)
5353
#define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
54-
#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
54+
#define FUSED_ARG __global const Dtype *negative_slope,
5555
#elif defined(FUSED_CONV_POWER)
5656
#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
57-
#define NEGATIVE_SLOPE_ARG Dtype power,
57+
#define FUSED_ARG Dtype power,
5858
#elif defined(FUSED_CONV_TANH)
5959
#define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
60-
#define NEGATIVE_SLOPE_ARG
60+
#define FUSED_ARG
61+
#elif defined(FUSED_CONV_RELU6)
62+
#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
63+
#define FUSED_ARG Dtype min_value, Dtype max_value,
6164
#else
6265
#define ACTIVATION_RELU_FUNCTION(x, c) (x)
63-
#define NEGATIVE_SLOPE_ARG
66+
#define FUSED_ARG
6467
#endif
6568

6669
#ifdef FUSED_CONV_ELTWISE
@@ -108,7 +111,7 @@
108111

109112
__kernel void ConvolveBasic(
110113
ELTWISE_DATA_ARG
111-
NEGATIVE_SLOPE_ARG
114+
FUSED_ARG
112115
__global Dtype* image_data,
113116
int image_offset,
114117
__global Dtype* kernel_data,
@@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
197200
__kernel void
198201
convolve_simd(
199202
ELTWISE_DATA_ARG
200-
NEGATIVE_SLOPE_ARG
203+
FUSED_ARG
201204
__global Dtype* inputs_base,
202205
filter_qualifier Dtype* weights_base,
203206
BIAS_KERNEL_ARG
@@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
417420

418421
#define GEMM_LIKE_KERNEL_ARGS \
419422
ELTWISE_DATA_ARG \
420-
NEGATIVE_SLOPE_ARG \
423+
FUSED_ARG \
421424
const __global Dtype *src0, \
422425
const __global Dtype *src1, \
423426
BIAS_KERNEL_ARG \
@@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
17311734

17321735
__kernel void DWCONV(
17331736
ELTWISE_DATA_ARG
1734-
NEGATIVE_SLOPE_ARG
1737+
FUSED_ARG
17351738
__global Dtype* image_data,
17361739
__global Dtype* kernel_data,
17371740
BIAS_KERNEL_ARG

0 commit comments

Comments
 (0)