Skip to content

Commit aa52daf

Browse files
authored
Merge pull request opencv#26127 from alexlyulkov:al/blob-from-images
Faster implementation of blobFromImages for cpu nchw output opencv#26127 Faster implementation of blobFromImage and blobFromImages for HWC cv::Mat images -> NCHW cv::Mat case Running time on my pc in ms: **blobFromImage** ``` image size old new speed-up 32x32x3 0.008 0.002 4.0x 64x64x3 0.021 0.009 2.3x 128x128x3 0.164 0.037 4.4x 256x256x3 0.728 0.158 4.6x 512x512x3 3.310 0.628 5.2x 1024x1024x3 14.503 3.124 4.6x 2048x2048x3 61.647 28.049 2.2x ``` **blobFromImages** ``` image size old new speed-up 16x32x32x3 0.122 0.041 3.0x 16x64x64x3 0.790 0.165 4.8x 16x128x128x3 3.313 0.652 5.1x 16x256x256x3 13.495 3.127 4.3x 16x512x512x3 58.795 28.127 2.1x 16x1024x1024x3 251.135 121.955 2.1x 16x2048x2048x3 1023.570 487.188 2.1x ``` ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 parent d9a139f commit aa52daf

File tree

2 files changed

+218
-22
lines changed

2 files changed

+218
-22
lines changed

modules/dnn/perf/perf_utils.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
//
5+
// Copyright (C) 2017, Intel Corporation, all rights reserved.
6+
// Third party copyrights are property of their respective owners.
7+
8+
#include "perf_precomp.hpp"
9+
10+
namespace opencv_test {
11+
12+
using Utils_blobFromImage = TestBaseWithParam<std::vector<int>>;
13+
PERF_TEST_P_(Utils_blobFromImage, HWC_TO_NCHW) {
14+
std::vector<int> input_shape = GetParam();
15+
16+
Mat input(input_shape, CV_32FC3);
17+
randu(input, -10.0f, 10.f);
18+
19+
TEST_CYCLE() {
20+
Mat blob = blobFromImage(input);
21+
}
22+
23+
SANITY_CHECK_NOTHING();
24+
}
25+
26+
INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImage,
27+
Values(std::vector<int>{ 32, 32},
28+
std::vector<int>{ 64, 64},
29+
std::vector<int>{ 128, 128},
30+
std::vector<int>{ 256, 256},
31+
std::vector<int>{ 512, 512},
32+
std::vector<int>{1024, 1024},
33+
std::vector<int>{2048, 2048})
34+
);
35+
36+
using Utils_blobFromImages = TestBaseWithParam<std::vector<int>>;
37+
PERF_TEST_P_(Utils_blobFromImages, HWC_TO_NCHW) {
38+
std::vector<int> input_shape = GetParam();
39+
int batch = input_shape.front();
40+
std::vector<int> input_shape_no_batch(input_shape.begin()+1, input_shape.end());
41+
42+
std::vector<Mat> inputs;
43+
for (int i = 0; i < batch; i++) {
44+
Mat input(input_shape_no_batch, CV_32FC3);
45+
randu(input, -10.0f, 10.f);
46+
inputs.push_back(input);
47+
}
48+
49+
TEST_CYCLE() {
50+
Mat blobs = blobFromImages(inputs);
51+
}
52+
53+
SANITY_CHECK_NOTHING();
54+
}
55+
56+
INSTANTIATE_TEST_CASE_P(/**/, Utils_blobFromImages,
57+
Values(std::vector<int>{16, 32, 32},
58+
std::vector<int>{16, 64, 64},
59+
std::vector<int>{16, 128, 128},
60+
std::vector<int>{16, 256, 256},
61+
std::vector<int>{16, 512, 512},
62+
std::vector<int>{16, 1024, 1024},
63+
std::vector<int>{16, 2048, 2048})
64+
);
65+
66+
}

modules/dnn/src/dnn_utils.cpp

Lines changed: 152 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,111 @@ Mat blobFromImagesWithParams(InputArrayOfArrays images, const Image2BlobParams&
126126
return blob;
127127
}
128128

129+
template<typename Tinp, typename Tout>
130+
void blobFromImagesNCHWImpl(const std::vector<Mat>& images, Mat& blob_, const Image2BlobParams& param)
131+
{
132+
int w = images[0].cols;
133+
int h = images[0].rows;
134+
int wh = w * h;
135+
int nch = images[0].channels();
136+
CV_Assert(nch == 1 || nch == 3 || nch == 4);
137+
int sz[] = { (int)images.size(), nch, h, w};
138+
blob_.create(4, sz, param.ddepth);
139+
140+
for (size_t k = 0; k < images.size(); ++k)
141+
{
142+
CV_Assert(images[k].depth() == images[0].depth());
143+
CV_Assert(images[k].channels() == images[0].channels());
144+
CV_Assert(images[k].size() == images[0].size());
145+
146+
Tout* p_blob = blob_.ptr<Tout>() + k * nch * wh;
147+
Tout* p_blob_r = p_blob;
148+
Tout* p_blob_g = p_blob + wh;
149+
Tout* p_blob_b = p_blob + 2 * wh;
150+
Tout* p_blob_a = p_blob + 3 * wh;
151+
152+
if (param.swapRB)
153+
std::swap(p_blob_r, p_blob_b);
154+
155+
for (size_t i = 0; i < h; ++i)
156+
{
157+
const Tinp* p_img_row = images[k].ptr<Tinp>(i);
158+
159+
if (nch == 1)
160+
{
161+
for (size_t j = 0; j < w; ++j)
162+
{
163+
p_blob[i * w + j] = p_img_row[j];
164+
}
165+
}
166+
else if (nch == 3)
167+
{
168+
for (size_t j = 0; j < w; ++j)
169+
{
170+
p_blob_r[i * w + j] = p_img_row[j * 3 ];
171+
p_blob_g[i * w + j] = p_img_row[j * 3 + 1];
172+
p_blob_b[i * w + j] = p_img_row[j * 3 + 2];
173+
}
174+
}
175+
else // if (nch == 4)
176+
{
177+
for (size_t j = 0; j < w; ++j)
178+
{
179+
p_blob_r[i * w + j] = p_img_row[j * 4 ];
180+
p_blob_g[i * w + j] = p_img_row[j * 4 + 1];
181+
p_blob_b[i * w + j] = p_img_row[j * 4 + 2];
182+
p_blob_a[i * w + j] = p_img_row[j * 4 + 3];
183+
}
184+
}
185+
}
186+
}
187+
188+
if (param.mean == Scalar() && param.scalefactor == Scalar::all(1.0))
189+
return;
190+
CV_CheckTypeEQ(param.ddepth, CV_32F, "Scaling and mean substraction is supported only for CV_32F blob depth");
191+
192+
for (size_t k = 0; k < images.size(); ++k)
193+
{
194+
for (size_t ch = 0; ch < nch; ++ch)
195+
{
196+
float cur_mean = param.mean[ch];
197+
float cur_scale = param.scalefactor[ch];
198+
Tout* p_blob = blob_.ptr<Tout>() + k * nch * wh + ch * wh;
199+
for (size_t i = 0; i < wh; ++i)
200+
{
201+
p_blob[i] = (p_blob[i] - cur_mean) * cur_scale;
202+
}
203+
}
204+
}
205+
}
206+
207+
template<typename Tout>
208+
void blobFromImagesNCHW(const std::vector<Mat>& images, Mat& blob_, const Image2BlobParams& param)
209+
{
210+
if (images[0].depth() == CV_8U)
211+
blobFromImagesNCHWImpl<uint8_t, Tout>(images, blob_, param);
212+
else if (images[0].depth() == CV_8S)
213+
blobFromImagesNCHWImpl<int8_t, Tout>(images, blob_, param);
214+
else if (images[0].depth() == CV_16U)
215+
blobFromImagesNCHWImpl<uint16_t, Tout>(images, blob_, param);
216+
else if (images[0].depth() == CV_16S)
217+
blobFromImagesNCHWImpl<int16_t, Tout>(images, blob_, param);
218+
else if (images[0].depth() == CV_32S)
219+
blobFromImagesNCHWImpl<int32_t, Tout>(images, blob_, param);
220+
else if (images[0].depth() == CV_32F)
221+
blobFromImagesNCHWImpl<float, Tout>(images, blob_, param);
222+
else if (images[0].depth() == CV_64F)
223+
blobFromImagesNCHWImpl<double, Tout>(images, blob_, param);
224+
else
225+
CV_Error(Error::BadDepth, "Unsupported input image depth for blobFromImagesNCHW");
226+
}
227+
228+
template<typename Tout>
229+
void blobFromImagesNCHW(const std::vector<UMat>& images, UMat& blob_, const Image2BlobParams& param)
230+
{
231+
CV_Error(Error::StsNotImplemented, "");
232+
}
233+
129234
template<class Tmat>
130235
void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const Image2BlobParams& param)
131236
{
@@ -154,19 +259,6 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
154259
Scalar scalefactor = param.scalefactor;
155260
Scalar mean = param.mean;
156261

157-
if (param.swapRB)
158-
{
159-
if (nch > 2)
160-
{
161-
std::swap(mean[0], mean[2]);
162-
std::swap(scalefactor[0], scalefactor[2]);
163-
}
164-
else
165-
{
166-
CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels.");
167-
}
168-
}
169-
170262
for (size_t i = 0; i < images.size(); i++)
171263
{
172264
Size imgSize = images[i].size();
@@ -203,18 +295,35 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
203295
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
204296
}
205297
}
206-
207-
if (images[i].depth() == CV_8U && param.ddepth == CV_32F)
208-
images[i].convertTo(images[i], CV_32F);
209-
210-
subtract(images[i], mean, images[i]);
211-
multiply(images[i], scalefactor, images[i]);
212298
}
213299

214300
size_t nimages = images.size();
215301
Tmat image0 = images[0];
216302
CV_Assert(image0.dims == 2);
217303

304+
if (std::is_same<Tmat, Mat>::value && param.datalayout == DNN_LAYOUT_NCHW)
305+
{
306+
// Fast implementation for HWC cv::Mat images -> NCHW cv::Mat blob
307+
if (param.ddepth == CV_8U)
308+
blobFromImagesNCHW<uint8_t>(images, blob_, param);
309+
else
310+
blobFromImagesNCHW<float>(images, blob_, param);
311+
return;
312+
}
313+
314+
if (param.swapRB)
315+
{
316+
if (nch > 2)
317+
{
318+
std::swap(mean[0], mean[2]);
319+
std::swap(scalefactor[0], scalefactor[2]);
320+
}
321+
else
322+
{
323+
CV_LOG_WARNING(NULL, "Red/blue color swapping requires at least three image channels.");
324+
}
325+
}
326+
218327
if (param.datalayout == DNN_LAYOUT_NCHW)
219328
{
220329
if (nch == 3 || nch == 4)
@@ -225,7 +334,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
225334

226335
for (size_t i = 0; i < nimages; i++)
227336
{
228-
const Tmat& image = images[i];
337+
Tmat& image = images[i];
338+
if (image.depth() == CV_8U && param.ddepth == CV_32F)
339+
image.convertTo(image, CV_32F);
340+
if (mean != Scalar())
341+
subtract(image, mean, image);
342+
if (scalefactor != Scalar::all(1.0))
343+
multiply(image, scalefactor, image);
344+
229345
CV_Assert(image.depth() == blob_.depth());
230346
nch = image.channels();
231347
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
@@ -250,7 +366,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
250366

251367
for (size_t i = 0; i < nimages; i++)
252368
{
253-
const Tmat& image = images[i];
369+
Tmat& image = images[i];
370+
if (image.depth() == CV_8U && param.ddepth == CV_32F)
371+
image.convertTo(image, CV_32F);
372+
if (mean != Scalar())
373+
subtract(image, mean, image);
374+
if (scalefactor != Scalar::all(1.0))
375+
multiply(image, scalefactor, image);
376+
254377
CV_Assert(image.depth() == blob_.depth());
255378
nch = image.channels();
256379
CV_Assert(image.dims == 2 && (nch == 1));
@@ -269,7 +392,14 @@ void blobFromImagesWithParamsImpl(InputArrayOfArrays images_, Tmat& blob_, const
269392
int subMatType = CV_MAKETYPE(param.ddepth, nch);
270393
for (size_t i = 0; i < nimages; i++)
271394
{
272-
const Tmat& image = images[i];
395+
Tmat& image = images[i];
396+
if (image.depth() == CV_8U && param.ddepth == CV_32F)
397+
image.convertTo(image, CV_32F);
398+
if (mean != Scalar())
399+
subtract(image, mean, image);
400+
if (scalefactor != Scalar::all(1.0))
401+
multiply(image, scalefactor, image);
402+
273403
CV_Assert(image.depth() == blob_.depth());
274404
CV_Assert(image.channels() == image0.channels());
275405
CV_Assert(image.size() == image0.size());

0 commit comments

Comments
 (0)