@@ -106,18 +106,19 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
106
106
inputs_arr.getMatVector (inputs);
107
107
outputs_arr.getMatVector (outputs);
108
108
109
- CV_Assert (inputs.size () > 0 );
109
+ CV_Assert ((inputs.size () > outputs.size () && blobs.empty ()) ||
110
+ (!inputs.empty () && (blobs.size () == 1 || blobs.size () == 2 )));
111
+ MatSize weightShape = blobs.empty () ? inputs[1 ].size : blobs[0 ].size ;
110
112
111
- CV_Assert (blobs.size () == 1 || blobs.size () == 2 );
112
113
CV_Assert (inputs[0 ].dims == outputs[0 ].dims );
113
- CV_Assert (blobs[ 0 ] .dims == kernel_size.size () + 2 );
114
+ CV_Assert (weightShape .dims () == kernel_size.size () + 2 );
114
115
for (int i = 0 ; i < kernel_size.size (); i++) {
115
- CV_Assert (blobs[ 0 ]. size [i + 2 ] == kernel_size[i]);
116
+ CV_Assert (weightShape [i + 2 ] == kernel_size[i]);
116
117
}
117
118
118
119
const Mat &input = inputs[0 ];
119
120
CV_Assert ((input.dims == 4 || input.dims == 5 ) && (input.type () == CV_32F || input.type () == CV_16S));
120
- for (size_t i = 0 ; i < inputs .size (); i++)
121
+ for (size_t i = 0 ; i < outputs .size (); i++)
121
122
{
122
123
CV_Assert (inputs[i].type () == input.type ());
123
124
CV_Assert ((inputs[i].dims == 4 || inputs[i].dims == 5 ) && inputs[i].size [1 ] == input.size [1 ]);
@@ -245,6 +246,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
245
246
246
247
MatShape computeColRowShape (const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE
247
248
{
249
+ CV_Assert (!blobs.empty ());
248
250
int dims = inpShape.size ();
249
251
int inpD = dims == 5 ? inpShape[2 ] : 1 ;
250
252
int inpH = inpShape[dims - 2 ];
@@ -262,29 +264,31 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
262
264
{
263
265
if (kernel_size.size () == 3 )
264
266
return preferableTarget == DNN_TARGET_CPU;
267
+ if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty ())
268
+ return false ;
265
269
return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height );
266
270
}
267
271
else
268
272
#endif
269
273
return (kernel_size.size () == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) ||
270
- (kernel_size.size () == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE));
274
+ (kernel_size.size () == 2 && (backendId == DNN_BACKEND_OPENCV || ( backendId == DNN_BACKEND_HALIDE && !blobs. empty ()) ));
271
275
}
272
276
273
277
bool getMemoryShapes (const std::vector<MatShape> &inputs,
274
278
const int requiredOutputs,
275
279
std::vector<MatShape> &outputs,
276
280
std::vector<MatShape> &internals) const CV_OVERRIDE
277
281
{
278
- CV_Assert (blobs.size () != 0 );
279
- CV_Assert (! hasBias () || blobs [1 ]. total () == ( size_t ) blobs[0 ].size [ 0 ]) ;
280
- CV_Assert (inputs. size () == (size_t )1 );
282
+ CV_Assert (! blobs.empty () || inputs. size () > 1 );
283
+ const int * weightShape = blobs. empty () ? &inputs [1 ][ 0 ] : blobs[0 ].size . p ;
284
+ CV_Assert (! hasBias () || blobs[ 1 ]. total () == (size_t )weightShape[ 0 ] );
281
285
282
286
internals.clear ();
283
287
284
288
CV_Assert (inputs.size () != 0 );
285
289
std::vector<int > inpShape (inputs[0 ].begin () + 2 , inputs[0 ].end ());
286
290
287
- int outCn = blobs[ 0 ]. size [0 ];
291
+ int outCn = weightShape [0 ];
288
292
std::vector<int > outShape;
289
293
outShape.push_back (inputs[0 ][0 ]);
290
294
outShape.push_back (outCn);
@@ -300,10 +304,10 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
300
304
getConvPoolOutParams (inpShape, kernel_size, strides, padMode, dilations, outShape);
301
305
}
302
306
303
- int ngroups = inpCn / blobs[ 0 ]. size [1 ];
304
- if (ngroups == 0 || ngroups * blobs[ 0 ]. size [1 ] != inpCn)
307
+ int ngroups = inpCn / weightShape [1 ];
308
+ if (ngroups == 0 || ngroups * weightShape [1 ] != inpCn)
305
309
CV_Error (Error::StsError, format (" Number of input channels should "
306
- " be multiple of %d but got %d" , blobs[ 0 ]. size [1 ], inpCn));
310
+ " be multiple of %d but got %d" , weightShape [1 ], inpCn));
307
311
CV_Assert (ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0 );
308
312
309
313
outputs.resize (1 , outShape);
@@ -315,34 +319,34 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
315
319
{
316
320
BaseConvolutionLayerImpl::finalize (inputs_arr, outputs_arr);
317
321
318
- CV_Assert (!blobs. empty ()) ;
319
- const int outCn = blobs[ 0 ]. size [ 0 ] ;
322
+ std::vector<Mat> inputs ;
323
+ inputs_arr. getMatVector (inputs) ;
320
324
// prepare weightsMat where each row is aligned and has enough zero padding on the right to
321
325
// use vectorized (i.e. with intrinsics) loops without tail processing
322
- Mat wm = blobs[ 0 ].reshape (1 , outCn );
326
+ Mat wm = blobs. empty () ? inputs[ 1 ]. reshape ( 1 , numOutput) : blobs[ 0 ].reshape (1 , numOutput );
323
327
if ( wm.step1 () % VEC_ALIGN != 0 )
324
328
{
325
329
int newcols = (int )alignSize (wm.step1 (), VEC_ALIGN);
326
- Mat wm_buffer = Mat (outCn , newcols, wm.type ());
330
+ Mat wm_buffer = Mat (numOutput , newcols, wm.type ());
327
331
Mat wm_padding = wm_buffer.colRange (wm.cols , newcols);
328
332
wm_padding.setTo (Scalar::all (0 .));
329
333
Mat wm_aligned = wm_buffer.colRange (0 , wm.cols );
330
334
wm.copyTo (wm_aligned);
331
335
wm = wm_aligned;
332
336
}
333
337
weightsMat = wm;
334
- weightsMultipliers.assign (outCn , 1.0 );
338
+ weightsMultipliers.assign (numOutput , 1.0 );
335
339
336
- Mat biasMat = hasBias () ? blobs[1 ].reshape (1 , outCn ) : Mat ();
337
- biasvec.resize (outCn +2 );
340
+ Mat biasMat = hasBias () ? blobs[1 ].reshape (1 , numOutput ) : Mat ();
341
+ biasvec.resize (numOutput +2 );
338
342
if ( biasMat.empty () )
339
343
{
340
- for (int i = 0 ; i < outCn ; i++ )
344
+ for (int i = 0 ; i < numOutput ; i++ )
341
345
biasvec[i] = 0 .f ;
342
346
}
343
347
else
344
348
{
345
- for (int i = 0 ; i < outCn ; i++ )
349
+ for (int i = 0 ; i < numOutput ; i++ )
346
350
biasvec[i] = biasMat.at <float >(i);
347
351
}
348
352
#ifdef HAVE_OPENCL
@@ -352,7 +356,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
352
356
353
357
bool setActivation (const Ptr<ActivationLayer>& layer) CV_OVERRIDE
354
358
{
355
- if (!activ.empty () && !layer.empty ())
359
+ if (( !activ.empty () && !layer. empty ()) || blobs .empty ())
356
360
return false ;
357
361
358
362
activ = layer;
@@ -537,37 +541,48 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
537
541
virtual Ptr<BackendNode> initNgraph (const std::vector<Ptr<BackendWrapper> > &inputs,
538
542
const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
539
543
{
540
- CV_Assert_N (inputs.size () == 1 , nodes.size () = = 1 );
544
+ CV_Assert_N (inputs.size () >= 1 , nodes.size () > = 1 );
541
545
auto & ieInpNode = nodes[0 ].dynamicCast <InfEngineNgraphNode>()->node ;
542
546
std::vector<size_t > dims = ieInpNode->get_shape ();
543
547
CV_Assert (dims.size () == 4 || dims.size () == 5 );
548
+ std::shared_ptr<ngraph::Node> ieWeights = nodes.size () > 1 ? nodes[1 ].dynamicCast <InfEngineNgraphNode>()->node : nullptr ;
544
549
const int inpCn = dims[1 ];
545
- const int outCn = blobs[0 ].size [0 ];
546
- const int inpGroupCn = blobs[0 ].size [1 ];
550
+ const int inpGroupCn = nodes.size () > 1 ? ieWeights->get_shape ()[1 ] : blobs[0 ].size [1 ];
547
551
const int group = inpCn / inpGroupCn;
548
552
549
- std::vector<size_t > kernel_shape = getShape< size_t >(blobs[ 0 ]) ;
553
+ std::vector<size_t > kernel_shape;
550
554
if (group != 1 )
551
555
{
552
- kernel_shape[0 ] /= group;
553
- kernel_shape.insert (kernel_shape.begin (), group);
556
+ kernel_shape.push_back (group);
554
557
}
558
+ kernel_shape.push_back (numOutput / group);
559
+ kernel_shape.push_back (inpCn / group);
560
+ std::copy (kernel_size.begin (), kernel_size.end (), back_inserter (kernel_shape));
555
561
556
- auto ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, blobs[0 ].data );
557
- if (fusedWeights)
562
+ if (nodes.size () == 1 )
558
563
{
559
- if (weightsMat.isContinuous ())
560
- {
561
- ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, weightsMat.data );
562
- }
563
- else
564
+ ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, blobs[0 ].data );
565
+ if (fusedWeights)
564
566
{
565
- Mat newWeights;
566
- Mat cvWeights = weightsMat.colRange (0 , blobs[0 ].total () / outCn);
567
- cvWeights.copyTo (newWeights);
568
- ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, newWeights.data );
567
+ if (weightsMat.isContinuous ())
568
+ {
569
+ ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, weightsMat.data );
570
+ }
571
+ else
572
+ {
573
+ Mat newWeights;
574
+ Mat cvWeights = weightsMat.colRange (0 , blobs[0 ].total () / numOutput);
575
+ cvWeights.copyTo (newWeights);
576
+ ieWeights = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , kernel_shape, newWeights.data );
577
+ }
569
578
}
570
579
}
580
+ else
581
+ {
582
+ auto shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64 ,
583
+ ngraph::Shape{kernel_shape.size ()}, kernel_shape.data ());
584
+ ieWeights = std::make_shared<ngraph::op::v1::Reshape>(ieWeights, shape, true );
585
+ }
571
586
572
587
ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT;
573
588
if (!padMode.empty ())
@@ -592,11 +607,21 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
592
607
pad_type);
593
608
}
594
609
595
- if (hasBias () || fusedBias)
610
+ if (hasBias () || fusedBias || nodes. size () == 3 )
596
611
{
597
612
std::vector<size_t > shape (conv_node->get_shape ().size (), 1 );
598
- shape[1 ] = outCn;
599
- auto bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , ngraph::Shape (shape), biasvec.data ());
613
+ shape[1 ] = conv_node->get_shape ()[1 ];
614
+ std::shared_ptr<ngraph::Node> bias;
615
+ if (nodes.size () == 3 )
616
+ {
617
+ auto bias_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64 ,
618
+ ngraph::Shape{shape.size ()}, shape.data ());
619
+ bias = std::make_shared<ngraph::op::v1::Reshape>(nodes[2 ].dynamicCast <InfEngineNgraphNode>()->node , bias_shape, true );
620
+ }
621
+ else
622
+ {
623
+ bias = std::make_shared<ngraph::op::Constant>(ngraph::element::f32 , ngraph::Shape (shape), biasvec.data ());
624
+ }
600
625
auto conv_bias = std::make_shared<ngraph::op::v1::Add>(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY);
601
626
return Ptr<BackendNode>(new InfEngineNgraphNode (conv_bias));
602
627
}
@@ -1103,6 +1128,26 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
1103
1128
for (int i = 0 ; i < inputs.size (); ++i)
1104
1129
CV_Assert (inputs[i].u != outputs[0 ].u );
1105
1130
1131
+ if (blobs.empty ())
1132
+ {
1133
+ size_t n = inputs.size () - 1 ;
1134
+ umat_blobs.resize (n);
1135
+ for (size_t i = 0 ; i < n; i++)
1136
+ {
1137
+ if (use_half)
1138
+ {
1139
+ Mat matFP32;
1140
+ convertFp16 (inputs[i + 1 ], matFP32);
1141
+ matFP32.copyTo (umat_blobs[i]);
1142
+ }
1143
+ else
1144
+ {
1145
+ inputs[i + 1 ].copyTo (umat_blobs[i]);
1146
+ }
1147
+ }
1148
+ inputs.resize (1 );
1149
+ }
1150
+
1106
1151
if (umat_blobs.empty ())
1107
1152
{
1108
1153
size_t n = blobs.size ();
@@ -1113,7 +1158,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
1113
1158
}
1114
1159
}
1115
1160
1116
- if (convolutionOp.empty ())
1161
+ if (convolutionOp.empty () || blobs. empty () )
1117
1162
{
1118
1163
OCL4DNNConvConfig config;
1119
1164
config.in_shape = shape (inputs[0 ]);
@@ -1123,7 +1168,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
1123
1168
config.stride = stride;
1124
1169
config.dilation = dilation;
1125
1170
config.group = inputs[0 ].size [1 ] / umat_blobs[0 ].size [1 ];
1126
- config.bias_term = ( hasBias ()) ? true : false ;
1171
+ config.bias_term = umat_blobs. size () == 2 ;
1127
1172
config.use_half = use_half;
1128
1173
1129
1174
convolutionOp = Ptr<OCL4DNNConvSpatial<float > >(new OCL4DNNConvSpatial<float >(config));
@@ -1250,16 +1295,37 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
1250
1295
inputs_arr.getMatVector (inputs);
1251
1296
outputs_arr.getMatVector (outputs);
1252
1297
1298
+ int outCn = blobs.empty () ? inputs[1 ].size [0 ] : blobs[0 ].size [0 ];
1299
+ // Need to align non-const blobs
1300
+ if (blobs.empty ())
1301
+ {
1302
+ Mat wm = inputs[1 ].reshape (1 , outCn);
1303
+ if ( wm.step1 () % VEC_ALIGN != 0 )
1304
+ {
1305
+ wm.copyTo (weightsMat);
1306
+ if (inputs.size () > 2 )
1307
+ {
1308
+ Mat biasMat = inputs[2 ].reshape (1 , outCn);
1309
+ biasMat.col (0 ).copyTo (biasvec);
1310
+ biasvec.resize (outCn + 2 );
1311
+ }
1312
+ else
1313
+ {
1314
+ biasvec.resize (outCn + 2 , 0 );
1315
+ }
1316
+ }
1317
+ }
1318
+
1253
1319
/* printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n",
1254
1320
name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3],
1255
1321
kernel.width, kernel.height, pad.width, pad.height,
1256
1322
stride.width, stride.height, dilation.width, dilation.height);*/
1257
- CV_Assert_N (inputs.size () == (size_t )1 , inputs[0 ].size [1 ] % blobs[0 ].size [1 ] == 0 ,
1323
+ int inpGroupCn = blobs.empty () ? inputs[1 ].size [1 ] : blobs[0 ].size [1 ];
1324
+ CV_Assert_N (inputs.size () >= (size_t )1 , inputs[0 ].size [1 ] % inpGroupCn == 0 ,
1258
1325
outputs.size () == 1 , inputs[0 ].data != outputs[0 ].data );
1259
1326
1260
- int ngroups = inputs[0 ].size [1 ]/blobs[ 0 ]. size [ 1 ] ;
1327
+ int ngroups = inputs[0 ].size [1 ] / inpGroupCn ;
1261
1328
CV_Assert (outputs[0 ].size [1 ] % ngroups == 0 );
1262
- int outCn = blobs[0 ].size [0 ];
1263
1329
1264
1330
reluslope.clear ();
1265
1331
if ( activ )
@@ -1328,11 +1394,11 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl
1328
1394
virtual int64 getFLOPS (const std::vector<MatShape> &inputs,
1329
1395
const std::vector<MatShape> &outputs) const CV_OVERRIDE
1330
1396
{
1331
- CV_Assert (inputs.size () == outputs.size ());
1397
+ CV_Assert (inputs.size () == outputs.size () || inputs. size () == outputs. size () + blobs. size () );
1332
1398
1333
1399
int64 flops = 0 ;
1334
1400
int karea = std::accumulate (kernel_size.begin (), kernel_size.end (), 1 , std::multiplies<size_t >());
1335
- for (int i = 0 ; i < inputs .size (); i++)
1401
+ for (int i = 0 ; i < outputs .size (); i++)
1336
1402
{
1337
1403
flops += total (outputs[i])*(CV_BIG_INT (2 )*karea*inputs[i][1 ] + 1 );
1338
1404
}
0 commit comments