Skip to content

Commit ff99218

Browse files
committed
Merge pull request opencv#17791 from YashasSamaga:cuda4dnn-fix-concat-fusion
2 parents d0e6d24 + 37e2afb commit ff99218

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

modules/dnn/src/dnn.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2972,7 +2972,6 @@ struct Net::Impl : public detail::NetImplBase
29722972
ld.outputBlobsWrappers[0] = wrap(output);
29732973
#endif
29742974
std::vector<Range> chrange(output.dims, Range::all());
2975-
29762975
int ofs = 0;
29772976
for( i = 0; i < ninputs; i++ )
29782977
{
@@ -3000,9 +2999,9 @@ struct Net::Impl : public detail::NetImplBase
30002999
if (preferableBackend == DNN_BACKEND_CUDA)
30013000
{
30023001
auto cuda_wrapper = wrap(output).dynamicCast<CUDABackendWrapper>();
3003-
auto offset = chrange[1].start * (output.size[2] * output.size[3]);
3004-
auto shape = MatShape{1, chrange[1].size(), output.size[2], output.size[3]};
3005-
cuda_wrapper->update(shape, offset);
3002+
auto offset = chrange[axis].start * output_slice.total(axis + 1, output.dims);
3003+
auto new_shape = shape(output_slice);
3004+
cuda_wrapper->update(new_shape, offset);
30063005
inp_i_data->outputBlobsWrappers[pin.oid] = cuda_wrapper.staticCast<BackendWrapper>();
30073006
}
30083007
#endif

0 commit comments

Comments
 (0)