12
12
#include " graph.hpp"
13
13
#include " threading.hpp"
14
14
15
- #ifdef USE_TBB
16
- #include < tbb/parallel_for.h>
17
- #endif
18
-
19
15
namespace {
20
-
21
- void loadImgToIEGraph (const cv::Mat& img, size_t batch, void * ieBuffer) {
22
- const int channels = img.channels ();
23
- const int height = img.rows ;
24
- const int width = img.cols ;
25
-
26
- float * ieData = reinterpret_cast <float *>(ieBuffer);
27
- int bOffset = static_cast <int >(batch) * channels * width * height;
28
- for (int c = 0 ; c < channels; c++) {
29
- int cOffset = c * width * height;
30
- for (int w = 0 ; w < width; w++) {
31
- for (int h = 0 ; h < height; h++) {
32
- ieData[bOffset + cOffset + h * width + w] =
33
- static_cast <float >(img.at <cv::Vec3b>(h, w)[c]);
34
- }
35
- }
16
+ void framesToTensor (const std::vector<std::shared_ptr<VideoFrame>>& frames, const ov::Tensor& tensor) {
17
+ static const ov::Layout layout{" NHWC" };
18
+ static const ov::Shape shape = tensor.get_shape ();
19
+ static const size_t batchSize = shape[ov::layout::batch_idx (layout)];
20
+ static const cv::Size inSize{int (shape[ov::layout::width_idx (layout)]), int (shape[ov::layout::height_idx (layout)])};
21
+ static const size_t channels = shape[ov::layout::channels_idx (layout)];
22
+ static const size_t batchOffset = inSize.area () * channels;
23
+ assert (batchSize == frames.size ());
24
+ assert (channels == 3 );
25
+ uint8_t * data = tensor.data <uint8_t >();
26
+ for (size_t i = 0 ; i < batchSize; ++i) {
27
+ assert (frames[i]->frame .channels () == channels);
28
+ cv::resize (frames[i]->frame , cv::Mat{inSize, CV_8UC3, static_cast <void *>(data + batchOffset * i)}, inSize);
36
29
}
37
30
}
38
-
39
31
} // namespace
40
32
41
- void IEGraph::initNetwork (const std::string& deviceName) {
42
- auto cnnNetwork = ie.ReadNetwork (modelPath);
43
-
44
- if (deviceName.find (" CPU" ) != std::string::npos) {
45
- ie.SetConfig ({{InferenceEngine::PluginConfigParams::KEY_CPU_BIND_THREAD, " NO" }}, " CPU" );
46
- ie.SetConfig ({{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, " CPU" );
47
- }
48
- if (deviceName.find (" GPU" ) != std::string::npos) {
49
- ie.SetConfig ({{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}}, " GPU" );
50
- }
51
- if (!cpuExtensionPath.empty ()) {
52
- auto extension_ptr = std::make_shared<InferenceEngine::Extension>(cpuExtensionPath);
53
- ie.AddExtension (extension_ptr, " CPU" );
54
- }
55
- if (!cldnnConfigPath.empty ()) {
56
- ie.SetConfig ({{InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, cldnnConfigPath}}, " GPU" );
57
- }
58
-
59
- // Set batch size
60
- if (batchSize > 1 ) {
61
- auto inShapes = cnnNetwork.getInputShapes ();
62
- for (auto & pair : inShapes) {
63
- auto & dims = pair.second ;
64
- if (!dims.empty ()) {
65
- dims[0 ] = batchSize;
66
- }
67
- }
68
- cnnNetwork.reshape (inShapes);
69
- }
70
- InferenceEngine::ExecutableNetwork executableNetwork;
71
- executableNetwork = ie.LoadNetwork (cnnNetwork, deviceName);
72
- logExecNetworkInfo (executableNetwork, modelPath, deviceName);
73
- slog::info << " \t Number of network inference requests: " << maxRequests << slog::endl;
74
- slog::info << " \t Batch size is set to " << cnnNetwork.getBatchSize () << slog::endl;
75
-
76
- InferenceEngine::InputsDataMap inputInfo (cnnNetwork.getInputsInfo ());
77
- if (inputInfo.size () != 1 ) {
78
- throw std::logic_error (" Face Detection network should have only one input" );
79
- }
80
- inputDataBlobName = inputInfo.begin ()->first ;
81
-
82
- InferenceEngine::OutputsDataMap outputInfo (cnnNetwork.getOutputsInfo ());
83
- outputDataBlobNames.reserve (outputInfo.size ());
84
- for (const auto & i : outputInfo) {
85
- outputDataBlobNames.push_back (i.first );
86
- }
87
-
88
- for (size_t i = 0 ; i < maxRequests; ++i) {
89
- auto req = std::make_shared<InferenceEngine::InferRequest>(executableNetwork.CreateInferRequest ());
90
- availableRequests.push (req);
91
- }
92
-
93
- if (postLoad != nullptr )
94
- postLoad (outputDataBlobNames, cnnNetwork);
95
-
96
- availableRequests.front ()->StartAsync ();
97
- availableRequests.front ()->Wait (InferenceEngine::InferRequest::WaitMode::RESULT_READY);
98
- }
99
-
100
- void IEGraph::start (GetterFunc getterFunc, PostprocessingFunc postprocessingFunc) {
33
+ void IEGraph::start (size_t batchSize, GetterFunc getterFunc, PostprocessingFunc postprocessingFunc) {
34
+ assert (batchSize > 0 );
101
35
assert (nullptr != getterFunc);
102
36
assert (nullptr != postprocessingFunc);
103
37
assert (nullptr == getter);
104
38
getter = std::move (getterFunc);
105
39
postprocessing = std::move (postprocessingFunc);
106
- getterThread = std::thread ([&]() {
40
+ getterThread = std::thread ([&, batchSize ]() {
107
41
std::vector<std::shared_ptr<VideoFrame>> vframes;
108
- std::vector<cv::Mat> imgsToProc (batchSize);
109
42
while (!terminate) {
110
43
vframes.clear ();
111
44
size_t b = 0 ;
@@ -120,7 +53,7 @@ void IEGraph::start(GetterFunc getterFunc, PostprocessingFunc postprocessingFunc
120
53
}
121
54
}
122
55
123
- InferenceEngine ::InferRequest::Ptr req;
56
+ ov ::InferRequest req;
124
57
{
125
58
std::unique_lock<std::mutex> lock (mtxAvalableRequests);
126
59
condVarAvailableRequests.wait (lock, [&]() {
@@ -133,88 +66,36 @@ void IEGraph::start(GetterFunc getterFunc, PostprocessingFunc postprocessingFunc
133
66
availableRequests.pop ();
134
67
}
135
68
136
- auto inputBlob = req->GetBlob (inputDataBlobName);
137
- imgsToProc.resize (batchSize);
138
- for (size_t i = 0 ; i < batchSize; i++) {
139
- if (imgsToProc[i].empty ()) {
140
- auto & dims = inputBlob->getTensorDesc ().getDims ();
141
- assert (4 == dims.size ());
142
- auto height = static_cast <int >(dims[2 ]);
143
- auto width = static_cast <int >(dims[3 ]);
144
- imgsToProc[i] = cv::Mat (height, width, CV_8UC3);
145
- }
146
- }
147
-
148
- auto preprocess = [&]() {
149
- InferenceEngine::LockedMemory<void > buff = InferenceEngine::as<
150
- InferenceEngine::MemoryBlob>(inputBlob)->wmap ();
151
- float * inputPtr = static_cast <float *>(buff);
152
- auto loopBody = [&](size_t i) {
153
- cv::resize (vframes[i]->frame ,
154
- imgsToProc[i],
155
- imgsToProc[i].size ());
156
- loadImgToIEGraph (imgsToProc[i], i, inputPtr);
157
- };
158
- #ifdef USE_TBB
159
- run_in_arena ([&](){
160
- tbb::parallel_for<size_t >(0 , batchSize, loopBody);
161
- });
162
- #else
163
- for (size_t i = 0 ; i < batchSize; i++) {
164
- loopBody (i);
165
- }
166
- #endif
167
- };
168
-
169
69
if (perfTimerInfer.enabled ()) {
170
70
{
171
71
ScopedTimer st (perfTimerPreprocess);
172
- preprocess ( );
72
+ framesToTensor (vframes, req. get_input_tensor () );
173
73
}
174
74
auto startTime = std::chrono::high_resolution_clock::now ();
175
- req-> StartAsync ();
75
+ req. start_async ();
176
76
std::unique_lock<std::mutex> lock (mtxBusyRequests);
177
77
busyBatchRequests.push ({std::move (vframes), std::move (req), startTime});
178
78
} else {
179
- preprocess ( );
180
- req-> StartAsync ();
79
+ framesToTensor (vframes, req. get_input_tensor () );
80
+ req. start_async ();
181
81
std::unique_lock<std::mutex> lock (mtxBusyRequests);
182
82
busyBatchRequests.push ({std::move (vframes), std::move (req),
183
83
std::chrono::high_resolution_clock::time_point ()});
184
84
}
185
85
condVarBusyRequests.notify_one ();
186
86
}
187
- condVarBusyRequests.notify_one (); // notify that there will be no new InferRequests
87
+ condVarBusyRequests.notify_one (); // notify that there will be no new InferRequests
188
88
});
189
89
}
190
90
191
- IEGraph::IEGraph (const InitParams& p):
192
- perfTimerPreprocess(p.collectStats ? PerfTimer::DefaultIterationsCount : 0 ),
193
- perfTimerInfer(p.collectStats ? PerfTimer::DefaultIterationsCount : 0 ),
194
- confidenceThreshold(0 .5f ), batchSize(p.batchSize),
195
- modelPath(p.modelPath),
196
- cpuExtensionPath(p.cpuExtPath), cldnnConfigPath(p.cldnnConfigPath),
197
- maxRequests(p.maxRequests) {
198
- assert (p.maxRequests > 0 );
199
-
200
- postLoad = p.postLoadFunc ;
201
- initNetwork (p.deviceName );
202
- }
203
-
204
91
bool IEGraph::isRunning () {
205
92
std::lock_guard<std::mutex> lock (mtxBusyRequests);
206
93
return !terminate || !busyBatchRequests.empty ();
207
94
}
208
95
209
- InferenceEngine::SizeVector IEGraph::getInputDims () const {
210
- assert (!availableRequests.empty ());
211
- auto inputBlob = availableRequests.front ()->GetBlob (inputDataBlobName);
212
- return inputBlob->getTensorDesc ().getDims ();
213
- }
214
-
215
96
std::vector<std::shared_ptr<VideoFrame>> IEGraph::getBatchData (cv::Size frameSize) {
216
97
std::vector<std::shared_ptr<VideoFrame>> vframes;
217
- InferenceEngine ::InferRequest::Ptr req;
98
+ ov ::InferRequest req;
218
99
std::chrono::high_resolution_clock::time_point startTime;
219
100
{
220
101
std::unique_lock<std::mutex> lock (mtxBusyRequests);
@@ -231,56 +112,40 @@ std::vector<std::shared_ptr<VideoFrame>> IEGraph::getBatchData(cv::Size frameSiz
231
112
busyBatchRequests.pop ();
232
113
}
233
114
234
- if (nullptr != req && InferenceEngine::OK == req->Wait (InferenceEngine::InferRequest::WaitMode::RESULT_READY)) {
235
- auto detections = postprocessing (req, outputDataBlobNames, frameSize);
236
- for (decltype (detections.size ()) i = 0 ; i < detections.size (); i ++) {
237
- vframes[i]->detections = std::move (detections[i]);
238
- }
239
- if (perfTimerInfer.enabled ()) {
240
- auto endTime = std::chrono::high_resolution_clock::now ();
241
- perfTimerInfer.addValue (endTime - startTime);
242
- }
115
+ req.wait ();
116
+ auto detections = postprocessing (req, frameSize);
117
+ for (decltype (detections.size ()) i = 0 ; i < detections.size (); i ++) {
118
+ vframes[i]->detections = std::move (detections[i]);
119
+ }
120
+ if (perfTimerInfer.enabled ()) {
121
+ auto endTime = std::chrono::high_resolution_clock::now ();
122
+ perfTimerInfer.addValue (endTime - startTime);
243
123
}
244
124
245
- if ( nullptr != req) {
125
+ {
246
126
std::unique_lock<std::mutex> lock (mtxAvalableRequests);
247
127
availableRequests.push (std::move (req));
248
- lock.unlock ();
249
- condVarAvailableRequests.notify_one ();
250
128
}
129
+ condVarAvailableRequests.notify_one ();
251
130
252
131
return vframes;
253
132
}
254
133
255
- unsigned int IEGraph::getBatchSize () const {
256
- return static_cast <unsigned int >(batchSize);
257
- }
258
-
259
- void IEGraph::setDetectionConfidence (float conf) {
260
- confidenceThreshold = conf;
261
- }
262
-
263
134
IEGraph::~IEGraph () {
264
135
terminate = true ;
265
136
{
266
137
std::unique_lock<std::mutex> lock (mtxAvalableRequests);
267
- bool ready = false ;
268
- while (!ready) {
138
+ while (availableRequests.size () != maxRequests) {
269
139
std::unique_lock<std::mutex> lock (mtxBusyRequests);
270
140
if (!busyBatchRequests.empty ()) {
271
141
auto & req = busyBatchRequests.front ().req ;
272
- if (nullptr != req) {
273
- req->Wait (InferenceEngine::InferRequest::WaitMode::RESULT_READY);
274
- availableRequests.push (std::move (req));
275
- }
142
+ req.cancel ();
143
+ availableRequests.push (std::move (req));
276
144
busyBatchRequests.pop ();
277
145
}
278
- if (availableRequests.size () == maxRequests) {
279
- ready = true ;
280
- }
281
146
}
282
- condVarAvailableRequests.notify_one ();
283
147
}
148
+ condVarAvailableRequests.notify_one ();
284
149
if (getterThread.joinable ()) {
285
150
getterThread.join ();
286
151
}
0 commit comments