@@ -2525,8 +2525,7 @@ struct Net::Impl : public detail::NetImplBase
2525
2525
// (and so we eliminate the concatenation layer, because the channels
2526
2526
// are concatenated implicitly).
2527
2527
Ptr<ConcatLayer> concatLayer = ld.layerInstance .dynamicCast <ConcatLayer>();
2528
- if ( !concatLayer.empty () && concatLayer->axis == 1 && !concatLayer->padding &&
2529
- ld.outputBlobs .size () == 1 )
2528
+ if ( !concatLayer.empty () && !concatLayer->padding && ld.outputBlobs .size () == 1 )
2530
2529
{
2531
2530
Mat& output = ld.outputBlobs [0 ];
2532
2531
UMat umat_output;
@@ -2563,7 +2562,8 @@ struct Net::Impl : public detail::NetImplBase
2563
2562
// the concatenation optimization is applied with batch_size > 1.
2564
2563
// so, for now, we only apply this optimization in the most popular
2565
2564
// case batch_size == 1.
2566
- if ( output.dims == 4 && output.size [0 ] == 1 )
2565
+ int axis = clamp (concatLayer->axis , output.dims );
2566
+ if ( output.total (0 , axis) == 1 )
2567
2567
{
2568
2568
size_t i, ninputs = ld.inputBlobsId .size ();
2569
2569
std::vector<LayerPin> realinputs (ninputs);
@@ -2602,14 +2602,14 @@ struct Net::Impl : public detail::NetImplBase
2602
2602
OpenCLBackendWrapper::update (ld.outputBlobsWrappers , umats);
2603
2603
}
2604
2604
#endif
2605
- Range chrange[] = { Range::all (), Range::all (), Range::all () , Range::all () } ;
2605
+ std::vector< Range> chrange (output. dims , Range::all ()) ;
2606
2606
int ofs = 0 ;
2607
2607
for ( i = 0 ; i < ninputs; i++ )
2608
2608
{
2609
2609
LayerPin pin = realinputs[i];
2610
2610
LayerData* inp_i_data = &layers[pin.lid ];
2611
- int channels_i = ld.inputBlobs [i]->size [1 ];
2612
- chrange[1 ] = Range (ofs, ofs + channels_i);
2611
+ int channels_i = ld.inputBlobs [i]->size [axis ];
2612
+ chrange[axis ] = Range (ofs, ofs + channels_i);
2613
2613
printf_ ((" \t output %s(%d) to channels (%d, %d)\n " , inp_i_data->layerInstance ->name .c_str (),
2614
2614
pin.oid , ofs, ofs + channels_i));
2615
2615
ofs += channels_i;
0 commit comments