Skip to content

Commit 9b7b22e

Browse files
committed
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
2 parents d17ab27 + b2ebd37 commit 9b7b22e

15 files changed

+717
-127
lines changed

cmake/OpenCVDetectInferenceEngine.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ endif()
129129

130130
if(INF_ENGINE_TARGET)
131131
if(NOT INF_ENGINE_RELEASE)
132-
message(WARNING "InferenceEngine version has not been set, 2020.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
132+
message(WARNING "InferenceEngine version has not been set, 2020.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
133133
endif()
134-
set(INF_ENGINE_RELEASE "2020030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
134+
set(INF_ENGINE_RELEASE "2020040000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)")
135135
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
136136
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
137137
)

modules/dnn/perf/perf_layer.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#include "perf_precomp.hpp"
6+
#include <opencv2/dnn/shape_utils.hpp>
7+
8+
namespace opencv_test {
9+
10+
struct Layer_Slice : public TestBaseWithParam<tuple<Backend, Target> >
11+
{
12+
template<int DIMS>
13+
void test_slice(const int* inputShape, const int* begin, const int* end)
14+
{
15+
int backendId = get<0>(GetParam());
16+
int targetId = get<1>(GetParam());
17+
18+
Mat input(DIMS, inputShape, CV_32FC1, Scalar::all(0));
19+
for (int i = 0; i < (int)input.total(); ++i)
20+
input.ptr<float>()[i] = (float)(i & 4095);
21+
22+
std::vector<Range> range(DIMS);
23+
for (int i = 0; i < DIMS; ++i)
24+
range[i] = Range(begin[i], end[i]);
25+
26+
Net net;
27+
LayerParams lp;
28+
lp.type = "Slice";
29+
lp.name = "testLayer";
30+
lp.set("begin", DictValue::arrayInt<int*>((int*)&begin[0], DIMS));
31+
lp.set("end", DictValue::arrayInt<int*>((int*)&end[0], DIMS));
32+
net.addLayerToPrev(lp.name, lp.type, lp);
33+
34+
// warmup
35+
{
36+
net.setInput(input);
37+
net.setPreferableBackend(backendId);
38+
net.setPreferableTarget(targetId);
39+
Mat out = net.forward();
40+
41+
EXPECT_GT(cv::norm(out, NORM_INF), 0);
42+
#if 0
43+
//normAssert(out, input(range));
44+
cout << input(range).clone().reshape(1, 1) << endl;
45+
cout << out.reshape(1, 1) << endl;
46+
#endif
47+
}
48+
49+
TEST_CYCLE()
50+
{
51+
Mat res = net.forward();
52+
}
53+
54+
SANITY_CHECK_NOTHING();
55+
}
56+
};
57+
58+
59+
60+
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_1)
61+
{
62+
const int inputShape[4] = {1, 64, 104, 104};
63+
const int begin[] = {0, 32, 0, 0};
64+
const int end[] = {1, 64, 104, 104};
65+
test_slice<4>(inputShape, begin, end);
66+
}
67+
68+
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_2)
69+
{
70+
const int inputShape[4] = {1, 128, 52, 52};
71+
const int begin[] = {0, 64, 0, 0};
72+
const int end[] = {1, 128, 52, 52};
73+
test_slice<4>(inputShape, begin, end);
74+
}
75+
76+
PERF_TEST_P_(Layer_Slice, YOLOv4_tiny_3)
77+
{
78+
const int inputShape[4] = {1, 256, 26, 26};
79+
const int begin[] = {0, 128, 0, 0};
80+
const int end[] = {1, 256, 26, 26};
81+
test_slice<4>(inputShape, begin, end);
82+
}
83+
84+
85+
PERF_TEST_P_(Layer_Slice, FastNeuralStyle_eccv16)
86+
{
87+
const int inputShape[4] = {1, 128, 80, 100};
88+
const int begin[] = {0, 0, 2, 2};
89+
const int end[] = {1, 128, 76, 96};
90+
test_slice<4>(inputShape, begin, end);
91+
}
92+
93+
INSTANTIATE_TEST_CASE_P(/**/, Layer_Slice, dnnBackendsAndTargets(false, false));
94+
95+
} // namespace

modules/dnn/perf/perf_net.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv3)
196196
{
197197
if (backend == DNN_BACKEND_HALIDE)
198198
throw SkipTestException("");
199+
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
200+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
201+
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
202+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
203+
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
204+
#endif
205+
199206
Mat sample = imread(findDataFile("dnn/dog416.png"));
200207
cvtColor(sample, sample, COLOR_BGR2RGB);
201208
Mat inp;
@@ -209,6 +216,12 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4)
209216
throw SkipTestException("");
210217
if (target == DNN_TARGET_MYRIAD)
211218
throw SkipTestException("");
219+
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) // nGraph compilation failure
220+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL)
221+
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
222+
if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16)
223+
throw SkipTestException("Test is disabled in OpenVINO 2020.4");
224+
#endif
212225
Mat sample = imread(findDataFile("dnn/dog416.png"));
213226
cvtColor(sample, sample, COLOR_BGR2RGB);
214227
Mat inp;
@@ -220,8 +233,6 @@ PERF_TEST_P_(DNNTestNetwork, YOLOv4_tiny)
220233
{
221234
if (backend == DNN_BACKEND_HALIDE)
222235
throw SkipTestException("");
223-
if (target == DNN_TARGET_MYRIAD)
224-
throw SkipTestException("");
225236
Mat sample = imread(findDataFile("dnn/dog416.png"));
226237
cvtColor(sample, sample, COLOR_BGR2RGB);
227238
Mat inp;

modules/dnn/src/graph_simplifier.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ int Subgraph::getInputNodeId(const Ptr<ImportGraphWrapper>& net,
6363
{
6464
CV_Assert(inpId < node->getNumInputs());
6565
std::string name = node->getInputName(inpId);
66-
// If operation produces several tensors, they are specified by index
67-
// after ':' character. In example, "input:0".
68-
name = name.substr(0, name.rfind(':'));
6966
const int numNodes = net->getNumNodes();
7067
for (int i = 0; i < numNodes; ++i)
7168
{

modules/dnn/src/layers/slice_layer.cpp

Lines changed: 145 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
#include "layers_common.hpp"
4949
#include <opencv2/dnn/shape_utils.hpp>
5050

51+
#include <opencv2/core/utils/logger.hpp>
52+
5153
#ifdef HAVE_OPENCL
5254
#include "opencl_kernels_dnn.hpp"
5355
#endif
@@ -204,58 +206,168 @@ class SliceLayerImpl : public SliceLayer
204206
finalSliceRanges[i][j] = clamp(finalSliceRanges[i][j], inpShape[j]);
205207
}
206208
}
209+
210+
#if 0
211+
std::cout << "DEBUG: DNN/Slice: " << outputs.size() << " inpShape=" << inpShape << std::endl;
212+
for (int i = 0; i < outputs.size(); ++i)
213+
{
214+
for (int j = 0; j < finalSliceRanges[i].size(); ++j)
215+
{
216+
std::cout << finalSliceRanges[i][j];
217+
}
218+
std::cout << std::endl;
219+
}
220+
#endif
207221
}
208222

209223
#ifdef HAVE_OPENCL
210224
bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
211225
{
212-
#if 1
213-
// TODO fix that (brokes YOLOv4-tiny)
214-
return false;
215-
#else
216226
std::vector<UMat> inputs;
217227
std::vector<UMat> outputs;
218228

219-
bool use_half = (inputs_.depth() == CV_16S);
220229
inputs_.getUMatVector(inputs);
221230
outputs_.getUMatVector(outputs);
222231

223-
if (inputs[0].dims < 4 || (total(shape(outputs[0]), 0, 2) % 4 != 0) ||
224-
(total(shape(outputs[0]), 2) % 4 != 0))
232+
CV_Assert(outputs.size() == finalSliceRanges.size());
233+
234+
const UMat& input = inputs[0];
235+
if (input.dims > 5)
236+
{
237+
CV_LOG_INFO(NULL, "DNN/OpenCL/Slice: implementation doesn't support dims=" << input.dims << ". Fallback to CPU");
225238
return false;
239+
}
226240

227-
String opts;
228-
if (use_half)
229-
opts = "-DDtype=half -DDtype4=half4 -DDtype8=half8";
230-
else
231-
opts = "-DDtype=float -DDtype4=float4 -DDtype8=float8";
232-
const UMat& inpMat = inputs[0];
241+
size_t WSZ = 128;
242+
243+
const int dims = input.dims;
244+
const int elemSize = (int)input.elemSize();
245+
String opts0 = cv::format(
246+
"-DDIMS=%d -DELEMSIZE=%d",
247+
dims, elemSize
248+
);
249+
for (int d = 0; d < dims; d++)
250+
{
251+
opts0 += cv::format(" -DSRC_STEP_%d=%d", d, (int)input.step[dims - 1 - d]);
252+
}
253+
String kname = cv::format("slice_%d", dims);
233254
for (size_t i = 0; i < outputs.size(); i++)
234255
{
235-
int groups = outputs[i].size[0];
236-
int channels = outputs[i].size[1];
237-
int rows = outputs[i].size[2];
238-
int cols = outputs[i].size[3];
239-
240-
ocl::Kernel kernel("slice", ocl::dnn::slice_oclsrc, opts);
241-
size_t local[] = { 128 };
242-
size_t global[] = { (size_t)groups * channels / 4 * local[0] };
243-
int idx = 0;
244-
kernel.set(idx++, ocl::KernelArg::PtrReadOnly(inpMat));
245-
kernel.set(idx++, (int)(inpMat.size[2] * inpMat.size[3]));
246-
kernel.set(idx++, (int)(rows * cols));
247-
kernel.set(idx++, (int)inpMat.size[3]);
248-
kernel.set(idx++, (int)cols);
249-
kernel.set(idx++, (int)finalSliceRanges[i][2].start);
250-
kernel.set(idx++, (int)finalSliceRanges[i][3].start);
251-
kernel.set(idx++, ocl::KernelArg::PtrWriteOnly(outputs[i]));
252-
bool ret = kernel.run(1, global, local, false);
256+
UMat& output = outputs[i];
257+
const std::vector<Range>& range = finalSliceRanges[i];
258+
259+
String opts = opts0;
260+
261+
CV_CheckEQ(output.dims, dims, "");
262+
for (int d = 0; d < dims; d++)
263+
{
264+
opts += cv::format(" -DDST_STEP_%d=%d -DDST_SZ_%d=%d -DSRC_START_%d=%d",
265+
d, (int)output.step[dims - 1 - d],
266+
d, (int)output.size[dims - 1 - d],
267+
d, (int)range[dims - 1 - d].start
268+
);
269+
CV_CheckEQ(range[d].size(), (int)output.size[d], "");
270+
}
271+
272+
int block_dims = 0;
273+
size_t block_size = elemSize;
274+
for (int i = dims - 1; i >= 0; --i)
275+
{
276+
if (input.step[i] != output.step[i])
277+
break;
278+
block_size *= output.size[i];
279+
block_dims++;
280+
}
281+
282+
const size_t total = output.total() * elemSize;
283+
size_t num_blocks = total / block_size;
284+
285+
if ((num_blocks <= 8 && block_size >= WSZ * 4) || (block_size >= WSZ * 64))
286+
{
287+
// use 1D copy mode
288+
opts += cv::format(" -DUSE_COPY_1D=1");
289+
290+
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
291+
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims);
292+
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
293+
294+
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
295+
}
296+
else
297+
{
298+
// use 2D copy mode
299+
int block_cols = block_size;
300+
int block_dims_contiguous = block_dims;
301+
size_t input_base_step = input.step[dims - 1 - block_dims_contiguous];
302+
size_t output_base_step = output.step[dims - 1 - block_dims_contiguous];
303+
304+
size_t block_rows = 1;
305+
for (int i = dims - 1 - block_dims_contiguous; i >= 0; --i)
306+
{
307+
if (input.step[i] * output_base_step != output.step[i] * input_base_step)
308+
break;
309+
block_rows *= output.size[i];
310+
block_dims++;
311+
}
312+
313+
block_size *= block_rows;
314+
315+
num_blocks = total / block_size;
316+
317+
if (block_rows > 1)
318+
{
319+
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims);
320+
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
321+
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
322+
323+
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_cols);
324+
325+
opts += cv::format(" -DBLOCK_ROWS=%d", (int)block_rows);
326+
opts += cv::format(" -DBLOCK_SRC_STRIDE=%d", (int)input_base_step);
327+
}
328+
else
329+
{
330+
// use 1D copy mode
331+
opts += cv::format(" -DUSE_COPY_1D=1");
332+
333+
opts += cv::format(" -DBLOCK_DIMS=%d", block_dims_contiguous);
334+
opts += cv::format(" -DBLOCK_DIMS_CONTIGUOUS=%d", block_dims_contiguous);
335+
opts += cv::format(" -DBLOCK_SIZE=%d", (int)block_size);
336+
337+
opts += cv::format(" -DBLOCK_COLS=%d", (int)block_size);
338+
}
339+
}
340+
341+
const size_t MIN_WORK_ITEMS = 16;
342+
if (block_size <= 4 * MIN_WORK_ITEMS)
343+
WSZ = 4;
344+
else if (block_size <= 8 * MIN_WORK_ITEMS)
345+
WSZ = 8;
346+
else if (block_size <= 16 * MIN_WORK_ITEMS)
347+
WSZ = 16;
348+
else if (block_size <= 32 * MIN_WORK_ITEMS)
349+
WSZ = 32;
350+
else if (block_size <= 64 * MIN_WORK_ITEMS)
351+
WSZ = 64;
352+
353+
opts += cv::format(" -DWSZ=%d", (int)WSZ);
354+
355+
size_t local[] = { WSZ, 1 };
356+
size_t global[] = { WSZ, num_blocks };
357+
358+
ocl::Kernel kernel(kname.c_str(), ocl::dnn::slice_oclsrc, opts);
359+
if (kernel.empty())
360+
return false;
361+
bool ret = kernel.args(
362+
ocl::KernelArg::PtrReadOnly(input),
363+
ocl::KernelArg::PtrWriteOnly(output)
364+
)
365+
.run(2, global, local, false);
253366
if (!ret)
254367
return false;
255-
}
368+
} // for outputs.size()
256369

257370
return true;
258-
#endif
259371
}
260372
#endif
261373

modules/dnn/src/op_inf_engine.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
#define INF_ENGINE_RELEASE_2020_4 2020040000
3030

3131
#ifndef INF_ENGINE_RELEASE
32-
#warning("IE version have not been provided via command-line. Using 2020.3 by default")
33-
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_3
32+
#warning("IE version have not been provided via command-line. Using 2020.4 by default")
33+
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2020_4
3434
#endif
3535

3636
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
@@ -44,7 +44,7 @@
4444
#pragma GCC diagnostic ignored "-Wsuggest-override"
4545
#endif
4646

47-
#ifdef HAVE_DNN_IE_NN_BUILDER_2019
47+
#if defined(HAVE_DNN_IE_NN_BUILDER_2019) || INF_ENGINE_VER_MAJOR_EQ(INF_ENGINE_RELEASE_2020_4)
4848
//#define INFERENCE_ENGINE_DEPRECATED // turn off deprecation warnings from IE
4949
//there is no way to suppress warnings from IE only at this moment, so we are forced to suppress warnings globally
5050
#if defined(__GNUC__)
@@ -53,7 +53,7 @@
5353
#ifdef _MSC_VER
5454
#pragma warning(disable: 4996) // was declared deprecated
5555
#endif
56-
#endif // HAVE_DNN_IE_NN_BUILDER_2019
56+
#endif
5757

5858
#if defined(__GNUC__) && INF_ENGINE_VER_MAJOR_LT(INF_ENGINE_RELEASE_2020_1)
5959
#pragma GCC visibility push(default)

0 commit comments

Comments
 (0)