Skip to content

Commit d695208

Browse files
authored
Merge pull request opencv#17967 from l-bat:non_const_weights_for_conv
* Supported convolution with non-const weights * Fix opencl blobs * Update tests
1 parent 65b02cc commit d695208

File tree

3 files changed

+178
-53
lines changed

3 files changed

+178
-53
lines changed

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 116 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,19 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
106106
inputs_arr.getMatVector(inputs);
107107
outputs_arr.getMatVector(outputs);
108108

109-
CV_Assert(inputs.size() > 0);
109+
CV_Assert((inputs.size() > outputs.size() && blobs.empty()) ||
110+
(!inputs.empty() && (blobs.size() == 1 || blobs.size() == 2)));
111+
MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size;
110112

111-
CV_Assert(blobs.size() == 1 || blobs.size() == 2);
112113
CV_Assert(inputs[0].dims == outputs[0].dims);
113-
CV_Assert(blobs[0].dims == kernel_size.size() + 2);
114+
CV_Assert(weightShape.dims() == kernel_size.size() + 2);
114115
for (int i = 0; i < kernel_size.size(); i++) {
115-
CV_Assert(blobs[0].size[i + 2] == kernel_size[i]);
116+
CV_Assert(weightShape[i + 2] == kernel_size[i]);
116117
}
117118

118119
const Mat &input = inputs[0];
119120
CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S));
120-
for (size_t i = 0; i < inputs.size(); i++)
121+
for (size_t i = 0; i < outputs.size(); i++)
121122
{
122123
CV_Assert(inputs[i].type() == input.type());
123124
CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]);
@@ -245,6 +246,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
245246

246247
MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
247248
{
249+
CV_Assert(!blobs.empty());
248250
int dims = inpShape.size();
249251
int inpD = dims == 5 ? inpShape[2] : 1;
250252
int inpH = inpShape[dims - 2];
@@ -262,29 +264,31 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
262264
{
263265
if (kernel_size.size() == 3)
264266
return preferableTarget == DNN_TARGET_CPU;
267+
if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty())
268+
return false;
265269
return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height);
266270
}
267271
else
268272
#endif
269273
return (kernel_size.size() == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) ||
270-
(kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE));
274+
(kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && !blobs.empty())));
271275
}
272276

273277
bool getMemoryShapes(const std::vector<MatShape> &inputs,
274278
const int requiredOutputs,
275279
std::vector<MatShape> &outputs,
276280
std::vector<MatShape> &internals) const CV_OVERRIDE
277281
{
278-
CV_Assert(blobs.size() != 0);
279-
CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]);
280-
CV_Assert(inputs.size() == (size_t)1);
282+
CV_Assert(!blobs.empty() || inputs.size() > 1);
283+
const int* weightShape = blobs.empty() ? &inputs[1][0] : blobs[0].size.p;
284+
CV_Assert(!hasBias() || blobs[1].total() == (size_t)weightShape[0]);
281285

282286
internals.clear();
283287

284288
CV_Assert(inputs.size() != 0);
285289
std::vector<int> inpShape(inputs[0].begin() + 2, inputs[0].end());
286290

287-
int outCn = blobs[0].size[0];
291+
int outCn = weightShape[0];
288292
std::vector<int> outShape;
289293
outShape.push_back(inputs[0][0]);
290294
outShape.push_back(outCn);
@@ -300,10 +304,10 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
300304
getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape);
301305
}
302306

303-
int ngroups = inpCn / blobs[0].size[1];
304-
if (ngroups == 0 || ngroups * blobs[0].size[1] != inpCn)
307+
int ngroups = inpCn / weightShape[1];
308+
if (ngroups == 0 || ngroups * weightShape[1] != inpCn)
305309
CV_Error(Error::StsError, format("Number of input channels should "
306-
"be multiple of %d but got %d", blobs[0].size[1], inpCn));
310+
"be multiple of %d but got %d", weightShape[1], inpCn));
307311
CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0);
308312

309313
outputs.resize(1, outShape);
@@ -315,34 +319,34 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
315319
{
316320
BaseConvolutionLayerImpl::finalize(inputs_arr, outputs_arr);
317321

318-
CV_Assert(!blobs.empty());
319-
const int outCn = blobs[0].size[0];
322+
std::vector<Mat> inputs;
323+
inputs_arr.getMatVector(inputs);
320324
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
321325
// use vectorized (i.e. with intrinsics) loops without tail processing
322-
Mat wm = blobs[0].reshape(1, outCn);
326+
Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput);
323327
if( wm.step1() % VEC_ALIGN != 0 )
324328
{
325329
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
326-
Mat wm_buffer = Mat(outCn, newcols, wm.type());
330+
Mat wm_buffer = Mat(numOutput, newcols, wm.type());
327331
Mat wm_padding = wm_buffer.colRange(wm.cols, newcols);
328332
wm_padding.setTo(Scalar::all(0.));
329333
Mat wm_aligned = wm_buffer.colRange(0, wm.cols);
330334
wm.copyTo(wm_aligned);
331335
wm = wm_aligned;
332336
}
333337
weightsMat = wm;
334-
weightsMultipliers.assign(outCn, 1.0);
338+
weightsMultipliers.assign(numOutput, 1.0);
335339

336-
Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat();
337-
biasvec.resize(outCn+2);
340+
Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat();
341+
biasvec.resize(numOutput+2);
338342
if( biasMat.empty() )
339343
{
340-
for(int i = 0; i < outCn; i++ )
344+
for(int i = 0; i < numOutput; i++ )
341345
biasvec[i] = 0.f;
342346
}
343347
else
344348
{
345-
for(int i = 0; i < outCn; i++ )
349+
for(int i = 0; i < numOutput; i++ )
346350
biasvec[i] = biasMat.at<float>(i);
347351
}
348352
#ifdef HAVE_OPENCL
@@ -352,7 +356,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
352356

353357
bool setActivation(const Ptr<ActivationLayer>& layer) CV_OVERRIDE
354358
{
355-
if (!activ.empty() && !layer.empty())
359+
if ((!activ.empty() && !layer.empty()) || blobs.empty())
356360
return false;
357361

358362
activ = layer;
@@ -537,37 +541,48 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
537541
virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
538542
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
539543
{
540-
CV_Assert_N(inputs.size() == 1, nodes.size() == 1);
544+
CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1);
541545
auto& ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
542546
std::vector<size_t> dims = ieInpNode->get_shape();
543547
CV_Assert(dims.size() == 4 || dims.size() == 5);
548+
std::shared_ptr<ngraph::Node> ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast<InfEngineNgraphNode>()->node : nullptr;
544549
const int inpCn = dims[1];
545-
const int outCn = blobs[0].size[0];
546-
const int inpGroupCn = blobs[0].size[1];
550+
const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1];
547551
const int group = inpCn / inpGroupCn;
548552

549-
std::vector<size_t> kernel_shape = getShape<size_t>(blobs[0]);
553+
std::vector<size_t> kernel_shape;
550554
if (group != 1)
551555
{
552-
kernel_shape[0] /= group;
553-
kernel_shape.insert(kernel_shape.begin(), group);
556+
kernel_shape.push_back(group);
554557
}
558+
kernel_shape.push_back(numOutput / group);
559+
kernel_shape.push_back(inpCn / group);
560+
std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape));
555561

556-
auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, blobs[0].data);
557-
if (fusedWeights)
562+
if (nodes.size() == 1)
558563
{
559-
if (weightsMat.isContinuous())
560-
{
561-
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, weightsMat.data);
562-
}
563-
else
564+
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, blobs[0].data);
565+
if (fusedWeights)
564566
{
565-
Mat newWeights;
566-
Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / outCn);
567-
cvWeights.copyTo(newWeights);
568-
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, newWeights.data);
567+
if (weightsMat.isContinuous())
568+
{
569+
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, weightsMat.data);
570+
}
571+
else
572+
{
573+
Mat newWeights;
574+
Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / numOutput);
575+
cvWeights.copyTo(newWeights);
576+
ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, kernel_shape, newWeights.data);
577+
}
569578
}
570579
}
580+
else
581+
{
582+
auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
583+
ngraph::Shape{kernel_shape.size()}, kernel_shape.data());
584+
ieWeights = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true);
585+
}
571586

572587
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
573588
if (!padMode.empty())
@@ -592,11 +607,21 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
592607
pad_type);
593608
}
594609

595-
if (hasBias() || fusedBias)
610+
if (hasBias() || fusedBias || nodes.size() == 3)
596611
{
597612
std::vector<size_t> shape(conv_node->get_shape().size(), 1);
598-
shape[1] = outCn;
599-
auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), biasvec.data());
613+
shape[1] = conv_node->get_shape()[1];
614+
std::shared_ptr<ngraph::Node> bias;
615+
if (nodes.size() == 3)
616+
{
617+
auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
618+
ngraph::Shape{shape.size()}, shape.data());
619+
bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2].dynamicCast<InfEngineNgraphNode>()->node, bias_shape, true);
620+
}
621+
else
622+
{
623+
bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape(shape), biasvec.data());
624+
}
600625
auto conv_bias = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
601626
return Ptr<BackendNode>(new InfEngineNgraphNode(conv_bias));
602627
}
@@ -1103,6 +1128,26 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
11031128
for (int i = 0; i < inputs.size(); ++i)
11041129
CV_Assert(inputs[i].u != outputs[0].u);
11051130

1131+
if (blobs.empty())
1132+
{
1133+
size_t n = inputs.size() - 1;
1134+
umat_blobs.resize(n);
1135+
for (size_t i = 0; i < n; i++)
1136+
{
1137+
if (use_half)
1138+
{
1139+
Mat matFP32;
1140+
convertFp16(inputs[i + 1], matFP32);
1141+
matFP32.copyTo(umat_blobs[i]);
1142+
}
1143+
else
1144+
{
1145+
inputs[i + 1].copyTo(umat_blobs[i]);
1146+
}
1147+
}
1148+
inputs.resize(1);
1149+
}
1150+
11061151
if (umat_blobs.empty())
11071152
{
11081153
size_t n = blobs.size();
@@ -1113,7 +1158,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
11131158
}
11141159
}
11151160

1116-
if (convolutionOp.empty())
1161+
if (convolutionOp.empty() || blobs.empty())
11171162
{
11181163
OCL4DNNConvConfig config;
11191164
config.in_shape = shape(inputs[0]);
@@ -1123,7 +1168,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
11231168
config.stride = stride;
11241169
config.dilation = dilation;
11251170
config.group = inputs[0].size[1] / umat_blobs[0].size[1];
1126-
config.bias_term = (hasBias()) ? true : false;
1171+
config.bias_term = umat_blobs.size() == 2;
11271172
config.use_half = use_half;
11281173

11291174
convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
@@ -1250,16 +1295,37 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
12501295
inputs_arr.getMatVector(inputs);
12511296
outputs_arr.getMatVector(outputs);
12521297

1298+
int outCn = blobs.empty() ? inputs[1].size[0] : blobs[0].size[0];
1299+
// Need to align non-const blobs
1300+
if (blobs.empty())
1301+
{
1302+
Mat wm = inputs[1].reshape(1, outCn);
1303+
if( wm.step1() % VEC_ALIGN != 0 )
1304+
{
1305+
wm.copyTo(weightsMat);
1306+
if (inputs.size() > 2)
1307+
{
1308+
Mat biasMat = inputs[2].reshape(1, outCn);
1309+
biasMat.col(0).copyTo(biasvec);
1310+
biasvec.resize(outCn + 2);
1311+
}
1312+
else
1313+
{
1314+
biasvec.resize(outCn + 2, 0);
1315+
}
1316+
}
1317+
}
1318+
12531319
/*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
12541320
name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3],
12551321
kernel.width, kernel.height, pad.width, pad.height,
12561322
stride.width, stride.height, dilation.width, dilation.height);*/
1257-
CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0,
1323+
int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1];
1324+
CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0,
12581325
outputs.size() == 1, inputs[0].data != outputs[0].data);
12591326

1260-
int ngroups = inputs[0].size[1]/blobs[0].size[1];
1327+
int ngroups = inputs[0].size[1] / inpGroupCn;
12611328
CV_Assert(outputs[0].size[1] % ngroups == 0);
1262-
int outCn = blobs[0].size[0];
12631329

12641330
reluslope.clear();
12651331
if( activ )
@@ -1328,11 +1394,11 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
13281394
virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
13291395
const std::vector<MatShape> &outputs) const CV_OVERRIDE
13301396
{
1331-
CV_Assert(inputs.size() == outputs.size());
1397+
CV_Assert(inputs.size() == outputs.size() || inputs.size() == outputs.size() + blobs.size());
13321398

13331399
int64 flops = 0;
13341400
int karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies<size_t>());
1335-
for (int i = 0; i < inputs.size(); i++)
1401+
for (int i = 0; i < outputs.size(); i++)
13361402
{
13371403
flops += total(outputs[i])*(CV_BIG_INT(2)*karea*inputs[i][1] + 1);
13381404
}

modules/dnn/src/onnx/onnx_importer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,10 +1003,13 @@ void ONNXImporter::populateNet(Net dstNet)
10031003
CV_Assert(node_proto.input_size() >= 2);
10041004
layerParams.type = "Convolution";
10051005
for (int j = 1; j < node_proto.input_size(); j++) {
1006-
layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
1006+
if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
1007+
{
1008+
layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j));
1009+
}
10071010
}
1008-
layerParams.set("num_output", layerParams.blobs[0].size[0]);
1009-
layerParams.set("bias_term", node_proto.input_size() == 3);
1011+
int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
1012+
layerParams.set("num_output", outCn);
10101013
}
10111014
else if (layer_type == "ConvTranspose")
10121015
{

0 commit comments

Comments
 (0)