Skip to content

Commit b3910cf

Browse files
authored
Merge pull request #2893 from Wovchena/demos/multi_channel-move-to-ov-2.0
OV2.0 c++ demos/multi_channel: move to new API, set PERFORMANCE_HINT THROUGHPUT, remove -nireq
2 parents ea4305d + 3461006 commit b3910cf

File tree

23 files changed

+413
-788
lines changed

23 files changed

+413
-788
lines changed

demos/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ macro(add_demo)
134134
target_include_directories(${OMZ_DEMO_NAME} PRIVATE ${OMZ_DEMO_INCLUDE_DIRECTORIES})
135135
endif()
136136

137-
target_link_libraries(${OMZ_DEMO_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES}
137+
target_link_libraries(${OMZ_DEMO_NAME} PRIVATE ${OpenCV_LIBRARIES} openvino::runtime ${InferenceEngine_LIBRARIES}
138138
${OMZ_DEMO_DEPENDENCIES} ngraph::ngraph utils gflags)
139139

140140
if(UNIX)
@@ -143,6 +143,8 @@ macro(add_demo)
143143
endmacro()
144144

145145
find_package(OpenCV REQUIRED COMPONENTS core highgui videoio imgproc imgcodecs gapi)
146+
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
147+
# TODO: remove InferenceEngine and ngraph after 2022.1
146148
find_package(InferenceEngine REQUIRED)
147149
find_package(ngraph REQUIRED)
148150

demos/common/cpp/utils/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ source_group("include" FILES ${HEADERS})
1010

1111
add_library(utils STATIC ${HEADERS} ${SOURCES})
1212
target_include_directories(utils PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
13-
target_link_libraries(utils PRIVATE gflags ${InferenceEngine_LIBRARIES} opencv_core opencv_imgcodecs opencv_videoio)
13+
target_link_libraries(utils PRIVATE gflags openvino::runtime ${InferenceEngine_LIBRARIES} opencv_core opencv_imgcodecs opencv_videoio)

demos/common/cpp/utils/include/utils/common.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ inline slog::LogStream& operator<<(slog::LogStream& os, const InferenceEngine::V
6868
return os;
6969
}
7070

71+
inline slog::LogStream& operator<<(slog::LogStream& os, const ov::Version& version) {
72+
return os << "OpenVINO" << slog::endl
73+
<< "\tversion: " << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH << slog::endl
74+
<< "\tbuild: " << version.buildNumber;
75+
}
76+
7177
/**
7278
* @class Color
7379
* @brief A Color class stores channels of a given color

demos/common/cpp/utils/include/utils/ocv_common.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#pragma once
1111

12+
#include <inference_engine.hpp>
1213
#include <opencv2/opencv.hpp>
1314

1415
#include "openvino/openvino.hpp"
@@ -21,7 +22,7 @@
2122
* @brief Get cv::Mat value in the correct format.
2223
*/
2324
template <typename T>
24-
static const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
25+
const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
2526
switch (mat.type()) {
2627
case CV_8UC1: return (T)mat.at<uchar>(h, w);
2728
case CV_8UC3: return (T)mat.at<cv::Vec3b>(h, w)[c];
@@ -37,7 +38,7 @@ static const T getMatValue(const cv::Mat& mat, size_t h, size_t w, size_t c) {
3738
* @param blob - Blob object which to be filled by an image data.
3839
* @param batchIndex - batch index of an image inside of the blob.
3940
*/
40-
static UNUSED void matToBlob(const cv::Mat& mat, const InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
41+
inline void matToBlob(const cv::Mat& mat, const InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
4142
InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims();
4243
const size_t width = blobSize[3];
4344
const size_t height = blobSize[2];
@@ -140,8 +141,8 @@ static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
140141
bool isMatFloat = matType == CV_32F;
141142

142143
size_t channels = mat.channels();
143-
size_t height = mat.size().height;
144-
size_t width = mat.size().width;
144+
size_t height = mat.rows;
145+
size_t width = mat.cols;
145146

146147
size_t strideH = mat.step.buf[0];
147148
size_t strideW = mat.step.buf[1];
@@ -170,8 +171,8 @@ static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
170171

171172
static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
172173
const size_t channels = mat.channels();
173-
const size_t height = mat.size().height;
174-
const size_t width = mat.size().width;
174+
const size_t height = mat.rows;
175+
const size_t width = mat.cols;
175176

176177
const size_t strideH = mat.step.buf[0];
177178
const size_t strideW = mat.step.buf[1];
@@ -215,7 +216,6 @@ inline void putHighlightedText(const cv::Mat& frame,
215216
cv::putText(frame, message, position, fontFace, fontScale, color, thickness);
216217
}
217218

218-
219219
class OutputTransform {
220220
public:
221221
OutputTransform() : doResize(false), scaleFactor(1) {}

demos/common/cpp/utils/src/config_factory.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <set>
2020
#include <string>
2121

22-
#include <gpu/gpu_config.hpp>
2322
#include <utils/args_helper.hpp>
2423
#include <utils/common.hpp>
2524

demos/multi_channel_common/cpp/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,5 +99,5 @@ target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
9999
find_package(Threads REQUIRED)
100100

101101
target_link_libraries(${TARGET_NAME}
102-
PRIVATE ${InferenceEngine_LIBRARIES} gflags ${OpenCV_LIBRARIES} Threads::Threads
102+
PRIVATE openvino::runtime gflags ${OpenCV_LIBRARIES} Threads::Threads
103103
PUBLIC utils)

demos/multi_channel_common/cpp/graph.cpp

Lines changed: 37 additions & 172 deletions
Original file line numberDiff line numberDiff line change
@@ -12,100 +12,33 @@
1212
#include "graph.hpp"
1313
#include "threading.hpp"
1414

15-
#ifdef USE_TBB
16-
#include <tbb/parallel_for.h>
17-
#endif
18-
1915
namespace {
20-
21-
void loadImgToIEGraph(const cv::Mat& img, size_t batch, void* ieBuffer) {
22-
const int channels = img.channels();
23-
const int height = img.rows;
24-
const int width = img.cols;
25-
26-
float* ieData = reinterpret_cast<float*>(ieBuffer);
27-
int bOffset = static_cast<int>(batch) * channels * width * height;
28-
for (int c = 0; c < channels; c++) {
29-
int cOffset = c * width * height;
30-
for (int w = 0; w < width; w++) {
31-
for (int h = 0; h < height; h++) {
32-
ieData[bOffset + cOffset + h * width + w] =
33-
static_cast<float>(img.at<cv::Vec3b>(h, w)[c]);
34-
}
35-
}
16+
void framesToTensor(const std::vector<std::shared_ptr<VideoFrame>>& frames, const ov::Tensor& tensor) {
17+
static const ov::Layout layout{"NHWC"};
18+
static const ov::Shape shape = tensor.get_shape();
19+
static const size_t batchSize = shape[ov::layout::batch_idx(layout)];
20+
static const cv::Size inSize{int(shape[ov::layout::width_idx(layout)]), int(shape[ov::layout::height_idx(layout)])};
21+
static const size_t channels = shape[ov::layout::channels_idx(layout)];
22+
static const size_t batchOffset = inSize.area() * channels;
23+
assert(batchSize == frames.size());
24+
assert(channels == 3);
25+
uint8_t* data = tensor.data<uint8_t>();
26+
for (size_t i = 0; i < batchSize; ++i) {
27+
assert(frames[i]->frame.channels() == channels);
28+
cv::resize(frames[i]->frame, cv::Mat{inSize, CV_8UC3, static_cast<void*>(data + batchOffset * i)}, inSize);
3629
}
3730
}
38-
3931
} // namespace
4032

41-
void IEGraph::initNetwork(const std::string& deviceName) {
42-
auto cnnNetwork = ie.ReadNetwork(modelPath);
43-
44-
if (deviceName.find("CPU") != std::string::npos) {
45-
ie.SetConfig({{InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, "NO"}}, "CPU");
46-
ie.SetConfig({{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, "CPU");
47-
}
48-
if (deviceName.find("GPU") != std::string::npos) {
49-
ie.SetConfig({{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}, "GPU");
50-
}
51-
if (!cpuExtensionPath.empty()) {
52-
auto extension_ptr = std::make_shared<InferenceEngine::Extension>(cpuExtensionPath);
53-
ie.AddExtension(extension_ptr, "CPU");
54-
}
55-
if (!cldnnConfigPath.empty()) {
56-
ie.SetConfig({{InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, cldnnConfigPath}}, "GPU");
57-
}
58-
59-
// Set batch size
60-
if (batchSize > 1) {
61-
auto inShapes = cnnNetwork.getInputShapes();
62-
for (auto& pair : inShapes) {
63-
auto& dims = pair.second;
64-
if (!dims.empty()) {
65-
dims[0] = batchSize;
66-
}
67-
}
68-
cnnNetwork.reshape(inShapes);
69-
}
70-
InferenceEngine::ExecutableNetwork executableNetwork;
71-
executableNetwork = ie.LoadNetwork(cnnNetwork, deviceName);
72-
logExecNetworkInfo(executableNetwork, modelPath, deviceName);
73-
slog::info << "\tNumber of network inference requests: " << maxRequests << slog::endl;
74-
slog::info << "\tBatch size is set to " << cnnNetwork.getBatchSize() << slog::endl;
75-
76-
InferenceEngine::InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
77-
if (inputInfo.size() != 1) {
78-
throw std::logic_error("Face Detection network should have only one input");
79-
}
80-
inputDataBlobName = inputInfo.begin()->first;
81-
82-
InferenceEngine::OutputsDataMap outputInfo(cnnNetwork.getOutputsInfo());
83-
outputDataBlobNames.reserve(outputInfo.size());
84-
for (const auto& i : outputInfo) {
85-
outputDataBlobNames.push_back(i.first);
86-
}
87-
88-
for (size_t i = 0; i < maxRequests; ++i) {
89-
auto req = std::make_shared<InferenceEngine::InferRequest>(executableNetwork.CreateInferRequest());
90-
availableRequests.push(req);
91-
}
92-
93-
if (postLoad != nullptr)
94-
postLoad(outputDataBlobNames, cnnNetwork);
95-
96-
availableRequests.front()->StartAsync();
97-
availableRequests.front()->Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY);
98-
}
99-
100-
void IEGraph::start(GetterFunc getterFunc, PostprocessingFunc postprocessingFunc) {
33+
void IEGraph::start(size_t batchSize, GetterFunc getterFunc, PostprocessingFunc postprocessingFunc) {
34+
assert(batchSize > 0);
10135
assert(nullptr != getterFunc);
10236
assert(nullptr != postprocessingFunc);
10337
assert(nullptr == getter);
10438
getter = std::move(getterFunc);
10539
postprocessing = std::move(postprocessingFunc);
106-
getterThread = std::thread([&]() {
40+
getterThread = std::thread([&, batchSize]() {
10741
std::vector<std::shared_ptr<VideoFrame>> vframes;
108-
std::vector<cv::Mat> imgsToProc(batchSize);
10942
while (!terminate) {
11043
vframes.clear();
11144
size_t b = 0;
@@ -120,7 +53,7 @@ void IEGraph::start(GetterFunc getterFunc, PostprocessingFunc postprocessingFunc
12053
}
12154
}
12255

123-
InferenceEngine::InferRequest::Ptr req;
56+
ov::InferRequest req;
12457
{
12558
std::unique_lock<std::mutex> lock(mtxAvalableRequests);
12659
condVarAvailableRequests.wait(lock, [&]() {
@@ -133,88 +66,36 @@ void IEGraph::start(GetterFunc getterFunc, PostprocessingFunc postprocessingFunc
13366
availableRequests.pop();
13467
}
13568

136-
auto inputBlob = req->GetBlob(inputDataBlobName);
137-
imgsToProc.resize(batchSize);
138-
for (size_t i = 0; i < batchSize; i++) {
139-
if (imgsToProc[i].empty()) {
140-
auto& dims = inputBlob->getTensorDesc().getDims();
141-
assert(4 == dims.size());
142-
auto height = static_cast<int>(dims[2]);
143-
auto width = static_cast<int>(dims[3]);
144-
imgsToProc[i] = cv::Mat(height, width, CV_8UC3);
145-
}
146-
}
147-
148-
auto preprocess = [&]() {
149-
InferenceEngine::LockedMemory<void> buff = InferenceEngine::as<
150-
InferenceEngine::MemoryBlob>(inputBlob)->wmap();
151-
float* inputPtr = static_cast<float*>(buff);
152-
auto loopBody = [&](size_t i) {
153-
cv::resize(vframes[i]->frame,
154-
imgsToProc[i],
155-
imgsToProc[i].size());
156-
loadImgToIEGraph(imgsToProc[i], i, inputPtr);
157-
};
158-
#ifdef USE_TBB
159-
run_in_arena([&](){
160-
tbb::parallel_for<size_t>(0, batchSize, loopBody);
161-
});
162-
#else
163-
for (size_t i = 0; i < batchSize; i++) {
164-
loopBody(i);
165-
}
166-
#endif
167-
};
168-
16969
if (perfTimerInfer.enabled()) {
17070
{
17171
ScopedTimer st(perfTimerPreprocess);
172-
preprocess();
72+
framesToTensor(vframes, req.get_input_tensor());
17373
}
17474
auto startTime = std::chrono::high_resolution_clock::now();
175-
req->StartAsync();
75+
req.start_async();
17676
std::unique_lock<std::mutex> lock(mtxBusyRequests);
17777
busyBatchRequests.push({std::move(vframes), std::move(req), startTime});
17878
} else {
179-
preprocess();
180-
req->StartAsync();
79+
framesToTensor(vframes, req.get_input_tensor());
80+
req.start_async();
18181
std::unique_lock<std::mutex> lock(mtxBusyRequests);
18282
busyBatchRequests.push({std::move(vframes), std::move(req),
18383
std::chrono::high_resolution_clock::time_point()});
18484
}
18585
condVarBusyRequests.notify_one();
18686
}
187-
condVarBusyRequests.notify_one(); // notify that there will be no new InferRequests
87+
condVarBusyRequests.notify_one(); // notify that there will be no new InferRequests
18888
});
18989
}
19090

191-
IEGraph::IEGraph(const InitParams& p):
192-
perfTimerPreprocess(p.collectStats ? PerfTimer::DefaultIterationsCount : 0),
193-
perfTimerInfer(p.collectStats ? PerfTimer::DefaultIterationsCount : 0),
194-
confidenceThreshold(0.5f), batchSize(p.batchSize),
195-
modelPath(p.modelPath),
196-
cpuExtensionPath(p.cpuExtPath), cldnnConfigPath(p.cldnnConfigPath),
197-
maxRequests(p.maxRequests) {
198-
assert(p.maxRequests > 0);
199-
200-
postLoad = p.postLoadFunc;
201-
initNetwork(p.deviceName);
202-
}
203-
20491
bool IEGraph::isRunning() {
20592
std::lock_guard<std::mutex> lock(mtxBusyRequests);
20693
return !terminate || !busyBatchRequests.empty();
20794
}
20895

209-
InferenceEngine::SizeVector IEGraph::getInputDims() const {
210-
assert(!availableRequests.empty());
211-
auto inputBlob = availableRequests.front()->GetBlob(inputDataBlobName);
212-
return inputBlob->getTensorDesc().getDims();
213-
}
214-
21596
std::vector<std::shared_ptr<VideoFrame>> IEGraph::getBatchData(cv::Size frameSize) {
21697
std::vector<std::shared_ptr<VideoFrame>> vframes;
217-
InferenceEngine::InferRequest::Ptr req;
98+
ov::InferRequest req;
21899
std::chrono::high_resolution_clock::time_point startTime;
219100
{
220101
std::unique_lock<std::mutex> lock(mtxBusyRequests);
@@ -231,56 +112,40 @@ std::vector<std::shared_ptr<VideoFrame>> IEGraph::getBatchData(cv::Size frameSiz
231112
busyBatchRequests.pop();
232113
}
233114

234-
if (nullptr != req && InferenceEngine::OK == req->Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY)) {
235-
auto detections = postprocessing(req, outputDataBlobNames, frameSize);
236-
for (decltype(detections.size()) i = 0; i < detections.size(); i ++) {
237-
vframes[i]->detections = std::move(detections[i]);
238-
}
239-
if (perfTimerInfer.enabled()) {
240-
auto endTime = std::chrono::high_resolution_clock::now();
241-
perfTimerInfer.addValue(endTime - startTime);
242-
}
115+
req.wait();
116+
auto detections = postprocessing(req, frameSize);
117+
for (decltype(detections.size()) i = 0; i < detections.size(); i ++) {
118+
vframes[i]->detections = std::move(detections[i]);
119+
}
120+
if (perfTimerInfer.enabled()) {
121+
auto endTime = std::chrono::high_resolution_clock::now();
122+
perfTimerInfer.addValue(endTime - startTime);
243123
}
244124

245-
if (nullptr != req) {
125+
{
246126
std::unique_lock<std::mutex> lock(mtxAvalableRequests);
247127
availableRequests.push(std::move(req));
248-
lock.unlock();
249-
condVarAvailableRequests.notify_one();
250128
}
129+
condVarAvailableRequests.notify_one();
251130

252131
return vframes;
253132
}
254133

255-
unsigned int IEGraph::getBatchSize() const {
256-
return static_cast<unsigned int>(batchSize);
257-
}
258-
259-
void IEGraph::setDetectionConfidence(float conf) {
260-
confidenceThreshold = conf;
261-
}
262-
263134
IEGraph::~IEGraph() {
264135
terminate = true;
265136
{
266137
std::unique_lock<std::mutex> lock(mtxAvalableRequests);
267-
bool ready = false;
268-
while (!ready) {
138+
while (availableRequests.size() != maxRequests) {
269139
std::unique_lock<std::mutex> lock(mtxBusyRequests);
270140
if (!busyBatchRequests.empty()) {
271141
auto& req = busyBatchRequests.front().req;
272-
if (nullptr != req) {
273-
req->Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY);
274-
availableRequests.push(std::move(req));
275-
}
142+
req.cancel();
143+
availableRequests.push(std::move(req));
276144
busyBatchRequests.pop();
277145
}
278-
if (availableRequests.size() == maxRequests) {
279-
ready = true;
280-
}
281146
}
282-
condVarAvailableRequests.notify_one();
283147
}
148+
condVarAvailableRequests.notify_one();
284149
if (getterThread.joinable()) {
285150
getterThread.join();
286151
}

0 commit comments

Comments
 (0)