Merge pull request opencv#10891 from pengli:dnn

alalek · alalek · commit 53305d4a7e01 · 2018-02-20T08:59:07.000Z
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     class CV_EXPORTS ReLU6Layer : public ActivationLayer
     {
     public:
+        float minValue, maxValue;
+
         static Ptr<ReLU6Layer> create(const LayerParams &params);
     };
 
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -1439,6 +1439,7 @@ struct Net::Impl
                          nextData &&
                         ((nextData->type == "ReLU") ||
                          (nextData->type == "ChannelsPReLU") ||
+                         (nextData->type == "ReLU6") ||
                          (nextData->type == "TanH") ||
                          (nextData->type == "Power"))) )
                 {
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -860,6 +860,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                 activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;
             }
 
+            Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();
+            if( !activ_relu6.empty() )
+            {
+                reluslope.resize(2);
+                reluslope[0] = activ_relu6->minValue;
+                reluslope[1] = activ_relu6->maxValue;
+                activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+            }
+
             Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();
             if( !activ_chprelu.empty() )
             {
@@ -906,12 +915,17 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
             {
                 convolutionOp->setActivTanh(true);
             }
+            else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)
+            {
+                convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);
+            }
             else
             {
                 convolutionOp->setActivReLU(false, 0);
                 convolutionOp->setActivPReLU(false, reluslope);
                 convolutionOp->setActivPower(false, 1.f);
                 convolutionOp->setActivTanh(false);
+                convolutionOp->setActivReLU6(false, 0, 0);
             }
             newActiv = false;
         }
diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -381,8 +381,30 @@ struct ReLU6Functor
 #ifdef HAVE_OPENCL
     bool applyOCL(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
     {
-        // TODO: implement OCL version
-        return false;
+        std::vector<UMat> inputs;
+        std::vector<UMat> outputs;
+
+        inps.getUMatVector(inputs);
+        outs.getUMatVector(outputs);
+        String buildopt = oclGetTMacro(inputs[0]);
+
+        for (size_t i = 0; i < inputs.size(); i++)
+        {
+            UMat& src = inputs[i];
+            UMat& dst = outputs[i];
+
+            ocl::Kernel kernel("ReLU6Forward", ocl::dnn::activations_oclsrc, buildopt);
+            kernel.set(0, (int)src.total());
+            kernel.set(1, ocl::KernelArg::PtrReadOnly(src));
+            kernel.set(2, ocl::KernelArg::PtrWriteOnly(dst));
+            kernel.set(3, (float)minValue);
+            kernel.set(4, (float)maxValue);
+
+            size_t gSize = src.total();
+            CV_Assert(kernel.run(1, &gSize, NULL, false));
+        }
+
+        return true;
     }
 #endif
 
@@ -867,6 +889,9 @@ Ptr<ReLU6Layer> ReLU6Layer::create(const LayerParams& params)
     float maxValue = params.get<float>("max_value", 6.0f);
     Ptr<ReLU6Layer> l(new ElementWiseLayer<ReLU6Functor>(ReLU6Functor(minValue, maxValue)));
     l->setParamsFrom(params);
+    l->minValue = minValue;
+    l->maxValue = maxValue;
+
     return l;
 }
 
diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
@@ -78,7 +78,8 @@ typedef enum {
     OCL4DNN_CONV_FUSED_ACTIV_RELU                 = 1,
     OCL4DNN_CONV_FUSED_ACTIV_PRELU                = 2,
     OCL4DNN_CONV_FUSED_ACTIV_POWER                = 3,
-    OCL4DNN_CONV_FUSED_ACTIV_TANH                 = 4
+    OCL4DNN_CONV_FUSED_ACTIV_TANH                 = 4,
+    OCL4DNN_CONV_FUSED_ACTIV_RELU6                = 5
 } ocl4dnnFusedActiv_t;
 
 template<typename Dtype>
@@ -96,6 +97,7 @@ class OCL4DNNConvSpatial
         void setActivPReLU(bool fuse_activ, std::vector<float> &slope);
         void setActivPower(bool fuse_activ, float power);
         void setActivTanh(bool fuse_activ);
+        void setActivReLU6(bool fuse_activ, float min, float max);
         void setBias(bool bias_term);
 
     private:
@@ -319,6 +321,8 @@ class OCL4DNNConvSpatial
         cv::ocl::ProgramSource src_;
         int32_t prev_kernel_type_;
         float negative_slope_;
+        float min_value_;
+        float max_value_;
         UMat negative_slope_umat_;
         ocl4dnnFusedActiv_t fused_activ_;
         float power_;
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@@ -82,6 +82,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
     fused_eltwise_ = false;
     power_ = 1.f;
     negative_slope_ = 0;
+    min_value_ = 0;
+    max_value_ = 0;
     prev_kernel_type_ = -1;
     tuned_ = false;
 
@@ -162,6 +164,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ,
         case OCL4DNN_CONV_FUSED_ACTIV_TANH:
             addDef("FUSED_CONV_TANH", 1);
             break;
+        case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+            addDef("FUSED_CONV_RELU6", 1);
+            break;
         default:
             ;
     }
@@ -184,6 +189,10 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bo
         case OCL4DNN_CONV_FUSED_ACTIV_POWER:
             kernel.set(argIdx++, (float)power_);
             break;
+        case OCL4DNN_CONV_FUSED_ACTIV_RELU6:
+            kernel.set(argIdx++, (float)min_value_);
+            kernel.set(argIdx++, (float)max_value_);
+            break;
         default:
             ;
     }
@@ -393,6 +402,19 @@ void OCL4DNNConvSpatial<Dtype>::setActivReLU(bool fuse_activ, float slope)
         fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
 }
 
+template<typename Dtype>
+void OCL4DNNConvSpatial<Dtype>::setActivReLU6(bool fuse_activ, float min, float max)
+{
+    if ( fuse_activ )
+    {
+        fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_RELU6;
+        min_value_ = min;
+        max_value_ = max;
+    }
+    else
+        fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE;
+}
+
 template<typename Dtype>
 void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float> &slope)
 {
diff --git a/modules/dnn/src/opencl/activations.cl b/modules/dnn/src/opencl/activations.cl
@@ -54,6 +54,17 @@ __kernel void ReLUForward(const int count, __global const T* in, __global T* out
 #endif
 }
 
+__kernel void ReLU6Forward(const int count, __global const T* in, __global T* out,
+                           const T minValue, const T maxValue)
+{
+  int index = get_global_id(0);
+  if(index < count)
+  {
+    T x = in[index];
+    out[index] = clamp(x, minValue, maxValue);
+  }
+}
+
 __kernel void PReLUForward(const int count, const int channels, const int plane_size,
                            __global const T* in, __global T* out, __global const T* slope_data)
 {
diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl
@@ -48,19 +48,22 @@
 
 #if defined(FUSED_CONV_RELU)
 #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope)))
-#define NEGATIVE_SLOPE_ARG Dtype negative_slope,
+#define FUSED_ARG Dtype negative_slope,
 #elif defined(FUSED_CONV_PRELU)
 #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c])))
-#define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope,
+#define FUSED_ARG __global const Dtype *negative_slope,
 #elif defined(FUSED_CONV_POWER)
 #define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power)
-#define NEGATIVE_SLOPE_ARG Dtype power,
+#define FUSED_ARG Dtype power,
 #elif defined(FUSED_CONV_TANH)
 #define ACTIVATION_RELU_FUNCTION(x, c) tanh(x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
+#elif defined(FUSED_CONV_RELU6)
+#define ACTIVATION_RELU_FUNCTION(x, c) (clamp((Dtype)(x), min_value, max_value))
+#define FUSED_ARG Dtype min_value, Dtype max_value,
 #else
 #define ACTIVATION_RELU_FUNCTION(x, c) (x)
-#define NEGATIVE_SLOPE_ARG
+#define FUSED_ARG
 #endif
 
 #ifdef FUSED_CONV_ELTWISE
@@ -108,7 +111,7 @@
 
 __kernel void ConvolveBasic(
     ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
     __global Dtype* image_data,
     int image_offset,
     __global Dtype* kernel_data,
@@ -197,7 +200,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
 __kernel void
 convolve_simd(
     ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
     __global Dtype* inputs_base,
     filter_qualifier Dtype* weights_base,
     BIAS_KERNEL_ARG
@@ -417,7 +420,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
 
 #define GEMM_LIKE_KERNEL_ARGS     \
     ELTWISE_DATA_ARG              \
-    NEGATIVE_SLOPE_ARG            \
+    FUSED_ARG                     \
     const __global Dtype *src0,   \
     const __global Dtype *src1,   \
     BIAS_KERNEL_ARG               \
@@ -1731,7 +1734,7 @@ __kernel void Conv_Interleaved(GEMM_LIKE_KERNEL_ARGS)
 
 __kernel void DWCONV(
     ELTWISE_DATA_ARG
-    NEGATIVE_SLOPE_ARG
+    FUSED_ARG
     __global Dtype* image_data,
     __global Dtype* kernel_data,
     BIAS_KERNEL_ARG

Original file line number	Diff line number	Diff line change
`@@ -406,6 +406,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN`
`406`	`406`	`class CV_EXPORTS ReLU6Layer : public ActivationLayer`
`407`	`407`	`{`
`408`	`408`	`public:`
	`409`	`+ float minValue, maxValue;`
	`410`	`+`
`409`	`411`	`static Ptr<ReLU6Layer> create(const LayerParams &params);`
`410`	`412`	`};`
`411`	`413`
Original file line number	Diff line number	Diff line change
`@@ -1439,6 +1439,7 @@ struct Net::Impl`
`1439`	`1439`	`nextData &&`
`1440`	`1440`	`((nextData->type == "ReLU") \|\|`
`1441`	`1441`	`(nextData->type == "ChannelsPReLU") \|\|`
	`1442`	`+ (nextData->type == "ReLU6") \|\|`
`1442`	`1443`	`(nextData->type == "TanH") \|\|`
`1443`	`1444`	`(nextData->type == "Power"))) )`
`1444`	`1445`	`{`
Original file line number	Diff line number	Diff line change
`@@ -860,6 +860,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl`
`860`	`860`	`activType = OCL4DNN_CONV_FUSED_ACTIV_RELU;`
`861`	`861`	`}`
`862`	`862`
	`863`	`+ Ptr<ReLU6Layer> activ_relu6 = activ.dynamicCast<ReLU6Layer>();`
	`864`	`+ if( !activ_relu6.empty() )`
	`865`	`+ {`
	`866`	`+ reluslope.resize(2);`
	`867`	`+ reluslope[0] = activ_relu6->minValue;`
	`868`	`+ reluslope[1] = activ_relu6->maxValue;`
	`869`	`+ activType = OCL4DNN_CONV_FUSED_ACTIV_RELU6;`
	`870`	`+ }`
	`871`	`+`
`863`	`872`	`Ptr<ChannelsPReLULayer> activ_chprelu = activ.dynamicCast<ChannelsPReLULayer>();`
`864`	`873`	`if( !activ_chprelu.empty() )`
`865`	`874`	`{`
`@@ -906,12 +915,17 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl`
`906`	`915`	`{`
`907`	`916`	`convolutionOp->setActivTanh(true);`
`908`	`917`	`}`
	`918`	`+ else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_RELU6)`
	`919`	`+ {`
	`920`	`+ convolutionOp->setActivReLU6(true, reluslope[0], reluslope[1]);`
	`921`	`+ }`
`909`	`922`	`else`
`910`	`923`	`{`
`911`	`924`	`convolutionOp->setActivReLU(false, 0);`
`912`	`925`	`convolutionOp->setActivPReLU(false, reluslope);`
`913`	`926`	`convolutionOp->setActivPower(false, 1.f);`
`914`	`927`	`convolutionOp->setActivTanh(false);`
	`928`	`+ convolutionOp->setActivReLU6(false, 0, 0);`
`915`	`929`	`}`
`916`	`930`	`newActiv = false;`
`917`	`931`	`}`