Merge pull request opencv#17967 from l-bat:non_const_weights_for_conv

l-bat · web-flow · commit d695208727c8 · 2020-08-03T18:02:49.000Z
* Supported convolution with non-const weights

* Fix opencl blobs

* Update tests
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -106,18 +106,19 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
 
-        CV_Assert(inputs.size() > 0);
+        CV_Assert((inputs.size() > outputs.size() && blobs.empty()) ||
+                  (!inputs.empty() && (blobs.size() == 1 || blobs.size() == 2)));
+        MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size;
 
-        CV_Assert(blobs.size() == 1 || blobs.size() == 2);
         CV_Assert(inputs[0].dims == outputs[0].dims);
-        CV_Assert(blobs[0].dims == kernel_size.size() + 2);
+        CV_Assert(weightShape.dims() == kernel_size.size() + 2);
         for (int i = 0; i < kernel_size.size(); i++) {
-            CV_Assert(blobs[0].size[i + 2] == kernel_size[i]);
+            CV_Assert(weightShape[i + 2] == kernel_size[i]);
         }
 
         const Mat &input = inputs[0];
         CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
-        for (size_t i = 0; i < inputs.size(); i++)
+        for (size_t i = 0; i < outputs.size(); i++)
         {
             CV_Assert(inputs[i].type() == input.type());
             CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]);
@@ -245,6 +246,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
 
     MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
     {
+        CV_Assert(!blobs.empty());
         int dims = inpShape.size();
         int inpD = dims == 5 ? inpShape[2] : 1;
         int inpH = inpShape[dims - 2];
@@ -262,29 +264,31 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
         {
             if (kernel_size.size() == 3)
                 return preferableTarget == DNN_TARGET_CPU;
+            if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty())
+                return false;
             return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height);
         }
         else
 #endif
             return (kernel_size.size() == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) ||
-                   (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE));
+                   (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && !blobs.empty())));
     }
 
     bool getMemoryShapes(const std::vector<MatShape> &inputs,
                          const int requiredOutputs,
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const CV_OVERRIDE
     {
-        CV_Assert(blobs.size() != 0);
-        CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]);
-        CV_Assert(inputs.size() == (size_t)1);
+        CV_Assert(!blobs.empty() || inputs.size() > 1);
+        const int* weightShape = blobs.empty() ? &inputs[1][0] : blobs[0].size.p;
+        CV_Assert(!hasBias() || blobs[1].total() == (size_t)weightShape[0]);
 
         internals.clear();
 
         CV_Assert(inputs.size() != 0);
         std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
 
-        int outCn = blobs[0].size[0];
+        int outCn = weightShape[0];
         std::vector<int> outShape;
         outShape.push_back(inputs[0][0]);
         outShape.push_back(outCn);
@@ -300,10 +304,10 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
             getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape);
         }
 
-        int ngroups = inpCn / blobs[0].size[1];
-        if (ngroups == 0 || ngroups * blobs[0].size[1] != inpCn)
+        int ngroups = inpCn / weightShape[1];
+        if (ngroups == 0 || ngroups * weightShape[1] != inpCn)
             CV_Error(Error::StsError, format("Number of input channels should "
-                     "be multiple of %d but got %d", blobs[0].size[1], inpCn));
+                     "be multiple of %d but got %d", weightShape[1], inpCn));
         CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0);
 
         outputs.resize(1, outShape);
@@ -315,34 +319,34 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
     {
         BaseConvolutionLayerImpl::finalize(inputs_arr, outputs_arr);
 
-        CV_Assert(!blobs.empty());
-        const int outCn = blobs[0].size[0];
+        std::vector<Mat> inputs;
+        inputs_arr.getMatVector(inputs);
         // prepare weightsMat where each row is aligned and has enough zero padding on the right to
         // use vectorized (i.e. with intrinsics) loops without tail processing
-        Mat wm = blobs[0].reshape(1, outCn);
+        Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput);
         if( wm.step1() % VEC_ALIGN != 0 )
         {
             int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
-            Mat wm_buffer = Mat(outCn, newcols, wm.type());
+            Mat wm_buffer = Mat(numOutput, newcols, wm.type());
             Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
             wm_padding.setTo(Scalar::all(0.));
             Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
             wm.copyTo(wm_aligned);
             wm = wm_aligned;
         }
         weightsMat = wm;
-        weightsMultipliers.assign(outCn, 1.0);
+        weightsMultipliers.assign(numOutput, 1.0);
 
-        Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
-        biasvec.resize(outCn+2);
+        Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat();
+        biasvec.resize(numOutput+2);
         if( biasMat.empty() )
         {
-            for(int i = 0; i < outCn; i++ )
+            for(int i = 0; i < numOutput; i++ )
                 biasvec[i] = 0.f;
         }
         else
         {
-            for(int i = 0; i < outCn; i++ )
+            for(int i = 0; i < numOutput; i++ )
                 biasvec[i] = biasMat.at<float>(i);
         }
 #ifdef HAVE_OPENCL
@@ -352,7 +356,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
 
     bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
     {
-        if (!activ.empty() && !layer.empty())
+        if ((!activ.empty() && !layer.empty()) || blobs.empty())
             return false;
 
         activ = layer;
@@ -537,37 +541,48 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
     virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
                                         const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
     {
-        CV_Assert_N(inputs.size() == 1, nodes.size() == 1);
+        CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
         auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
         std::vector<size_t> dims = ieInpNode->get_shape();
         CV_Assert(dims.size() == 4 || dims.size() == 5);
+        std::shared_ptr<ngraph::Node> ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
         const int inpCn = dims[1];
-        const int outCn = blobs[0].size[0];
-        const int inpGroupCn = blobs[0].size[1];
+        const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1];
         const int group = inpCn / inpGroupCn;
 
-        std::vector<size_t> kernel_shape = getShape<size_t>(blobs[0]);
+        std::vector<size_t> kernel_shape;
         if (group != 1)
         {
-            kernel_shape[0] /= group;
-            kernel_shape.insert(kernel_shape.begin(), group);
+            kernel_shape.push_back(group);
         }
+        kernel_shape.push_back(numOutput / group);
+        kernel_shape.push_back(inpCn / group);
+        std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape));
 
-        auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, blobs[0].data);
-        if (fusedWeights)
+        if (nodes.size() == 1)
         {
-            if (weightsMat.isContinuous())
-            {
-                ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, weightsMat.data);
-            }
-            else
+            ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, blobs[0].data);
+            if (fusedWeights)
             {
-                Mat newWeights;
-                Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / outCn);
-                cvWeights.copyTo(newWeights);
-                ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, newWeights.data);
+                if (weightsMat.isContinuous())
+                {
+                    ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, weightsMat.data);
+                }
+                else
+                {
+                    Mat newWeights;
+                    Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / numOutput);
+                    cvWeights.copyTo(newWeights);
+                    ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, newWeights.data);
+                }
             }
         }
+        else
+        {
+            auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                             ngraph::Shape{kernel_shape.size()}, kernel_shape.data());
+            ieWeights  = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true);
+        }
 
         ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
         if (!padMode.empty())
@@ -592,11 +607,21 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
                                 pad_type);
         }
 
-        if (hasBias() || fusedBias)
+        if (hasBias() || fusedBias || nodes.size() == 3)
         {
             std::vector<size_t> shape(conv_node->get_shape().size(), 1);
-            shape[1] = outCn;
-            auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), biasvec.data());
+            shape[1] = conv_node->get_shape()[1];
+            std::shared_ptr<ngraph::Node> bias;
+            if (nodes.size() == 3)
+            {
+                auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
+                                    ngraph::Shape{shape.size()}, shape.data());
+                bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2].dynamicCast<InfEngineNgraphNode>()->node, bias_shape, true);
+            }
+            else
+            {
+                bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), biasvec.data());
+            }
             auto conv_bias = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
             return Ptr<BackendNode>(new InfEngineNgraphNode(conv_bias));
         }
@@ -1103,6 +1128,26 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
         for (int i = 0; i < inputs.size(); ++i)
             CV_Assert(inputs[i].u != outputs[0].u);
 
+        if (blobs.empty())
+        {
+            size_t n = inputs.size() - 1;
+            umat_blobs.resize(n);
+            for (size_t i = 0; i < n; i++)
+            {
+                if (use_half)
+                {
+                    Mat matFP32;
+                    convertFp16(inputs[i + 1], matFP32);
+                    matFP32.copyTo(umat_blobs[i]);
+                }
+                else
+                {
+                    inputs[i + 1].copyTo(umat_blobs[i]);
+                }
+            }
+            inputs.resize(1);
+        }
+
         if (umat_blobs.empty())
         {
             size_t n = blobs.size();
@@ -1113,7 +1158,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
             }
         }
 
-        if (convolutionOp.empty())
+        if (convolutionOp.empty() || blobs.empty())
         {
             OCL4DNNConvConfig config;
             config.in_shape = shape(inputs[0]);
@@ -1123,7 +1168,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
             config.stride = stride;
             config.dilation = dilation;
             config.group = inputs[0].size[1] / umat_blobs[0].size[1];
-            config.bias_term = (hasBias()) ? true : false;
+            config.bias_term = umat_blobs.size() == 2;
             config.use_half = use_half;
 
             convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
@@ -1250,16 +1295,37 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
         inputs_arr.getMatVector(inputs);
         outputs_arr.getMatVector(outputs);
 
+        int outCn = blobs.empty() ? inputs[1].size[0] : blobs[0].size[0];
+        // Need to align non-const blobs
+        if (blobs.empty())
+        {
+            Mat wm = inputs[1].reshape(1, outCn);
+            if( wm.step1() % VEC_ALIGN != 0 )
+            {
+                wm.copyTo(weightsMat);
+                if (inputs.size() > 2)
+                {
+                    Mat biasMat = inputs[2].reshape(1, outCn);
+                    biasMat.col(0).copyTo(biasvec);
+                    biasvec.resize(outCn + 2);
+                }
+                else
+                {
+                    biasvec.resize(outCn + 2, 0);
+                }
+            }
+        }
+
         /*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
                name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3],
                kernel.width, kernel.height, pad.width, pad.height,
                stride.width, stride.height, dilation.width, dilation.height);*/
-        CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0,
+        int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1];
+        CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0,
                     outputs.size() == 1, inputs[0].data != outputs[0].data);
 
-        int ngroups = inputs[0].size[1]/blobs[0].size[1];
+        int ngroups = inputs[0].size[1] / inpGroupCn;
         CV_Assert(outputs[0].size[1] % ngroups == 0);
-        int outCn = blobs[0].size[0];
 
         reluslope.clear();
         if( activ )
@@ -1328,11 +1394,11 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                            const std::vector<MatShape> &outputs) const CV_OVERRIDE
     {
-        CV_Assert(inputs.size() == outputs.size());
+        CV_Assert(inputs.size() == outputs.size() || inputs.size() == outputs.size() + blobs.size());
 
         int64 flops = 0;
         int karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies<size_t>());
-        for (int i = 0; i < inputs.size(); i++)
+        for (int i = 0; i < outputs.size(); i++)
         {
             flops += total(outputs[i])*(CV_BIG_INT(2)*karea*inputs[i][1] + 1);
         }
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -1003,10 +1003,13 @@ void ONNXImporter::populateNet(Net dstNet)
             CV_Assert(node_proto.input_size() >= 2);
             layerParams.type = "Convolution";
             for (int j = 1; j < node_proto.input_size(); j++) {
-                layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
+                if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
+                {
+                    layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
+                }
             }
-            layerParams.set("num_output", layerParams.blobs[0].size[0]);
-            layerParams.set("bias_term", node_proto.input_size() == 3);
+            int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
+            layerParams.set("num_output", outCn);
         }
         else if (layer_type == "ConvTranspose")
         {
diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp

Original file line number	Diff line number	Diff line change
`@@ -1003,10 +1003,13 @@ void ONNXImporter::populateNet(Net dstNet)`
`1003`	`1003`	`CV_Assert(node_proto.input_size() >= 2);`
`1004`	`1004`	`layerParams.type = "Convolution";`
`1005`	`1005`	`for (int j = 1; j < node_proto.input_size(); j++) {`
`1006`		`- layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));`
	`1006`	`+ if (constBlobs.find(node_proto.input(j)) != constBlobs.end())`
	`1007`	`+ {`
	`1008`	`+ layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));`
	`1009`	`+ }`
`1007`	`1010`	`}`
`1008`		`- layerParams.set("num_output", layerParams.blobs[0].size[0]);`
`1009`		`- layerParams.set("bias_term", node_proto.input_size() == 3);`
	`1011`	`+ int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];`
	`1012`	`+ layerParams.set("num_output", outCn);`
`1010`	`1013`	`}`
`1011`	`1014`	`else if (layer_type == "ConvTranspose")`
`1012`	`1015`	`{`