19
19
#include < utils/common.hpp>
20
20
#include < ngraph/ngraph.hpp>
21
21
22
- using namespace InferenceEngine ;
22
+ std::vector<float > defaultAnchors[] = {
23
+ // YOLOv1v2
24
+ { 0 .57273f , 0 .677385f , 1 .87446f , 2 .06253f , 3 .33843f , 5 .47434f , 7 .88282f , 3 .52778f , 9 .77052f , 9 .16828f },
25
+ // YOLOv3
26
+ { 10 .0f , 13 .0f , 16 .0f , 30 .0f , 33 .0f , 23 .0f ,
27
+ 30 .0f , 61 .0f , 62 .0f , 45 .0f , 59 .0f , 119 .0f ,
28
+ 116 .0f , 90 .0f , 156 .0f , 198 .0f , 373 .0f , 326 .0f },
29
+ // YOLOv4
30
+ { 12 .0f , 16 .0f , 19 .0f , 36 .0f , 40 .0f , 28 .0f ,
31
+ 36 .0f , 75 .0f , 76 .0f , 55 .0f , 72 .0f , 146 .0f ,
32
+ 142 .0f , 110 .0f , 192 .0f , 243 .0f , 459 .0f , 401 .0f },
33
+ // YOLOv4_Tiny
34
+ { 10 .0f , 14 .0f , 23 .0f , 27 .0f , 37 .0f , 58 .0f ,
35
+ 81 .0f , 82 .0f , 135 .0f , 169 .0f , 344 .0f , 319 .0f }
36
+ };
37
+
38
+ const std::vector<int64_t > defaultMasks[] = {
39
+ // YOLOv1v2
40
+ {},
41
+ // YOLOv3
42
+ {},
43
+ // YOLOv4
44
+ {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 },
45
+ // YOLOv4_Tiny
46
+ {1 , 2 , 3 , 3 , 4 , 5 }
47
+ };
48
+
49
+ static inline float sigmoid (float x) {
50
+ return 1 .f / (1 .f + exp (-x));
51
+ }
52
+ static inline float linear (float x) {
53
+ return x;
54
+ }
55
+
23
56
24
57
ModelYolo::ModelYolo (const std::string& modelFileName, float confidenceThreshold, bool useAutoResize,
25
- bool useAdvancedPostprocessing, float boxIOUThreshold, const std::vector<std::string>& labels) :
58
+ bool useAdvancedPostprocessing, float boxIOUThreshold, const std::vector<std::string>& labels,
59
+ const std::vector<float >& anchors, const std::vector<int64_t >& masks) :
26
60
DetectionModel(modelFileName, confidenceThreshold, useAutoResize, labels),
27
61
boxIOUThreshold(boxIOUThreshold),
28
62
useAdvancedPostprocessing(useAdvancedPostprocessing),
29
- isYoloV3(true ){
63
+ yoloVersion(YOLO_V3),
64
+ presetAnchors(anchors),
65
+ presetMasks(masks) {
30
66
}
31
67
32
68
void ModelYolo::prepareInputsOutputs (InferenceEngine::CNNNetwork& cnnNetwork) {
33
69
// --------------------------- Configure input & output -------------------------------------------------
34
70
// --------------------------- Prepare input blobs ------------------------------------------------------
35
71
slog::info << " Checking that the inputs are as the demo expects" << slog::endl;
36
- InputsDataMap inputInfo (cnnNetwork.getInputsInfo ());
72
+ InferenceEngine:: InputsDataMap inputInfo (cnnNetwork.getInputsInfo ());
37
73
if (inputInfo.size () != 1 ) {
38
74
throw std::logic_error (" This demo accepts networks that have only one input" );
39
75
}
40
76
41
- InputInfo::Ptr& input = inputInfo.begin ()->second ;
77
+ InferenceEngine:: InputInfo::Ptr& input = inputInfo.begin ()->second ;
42
78
inputsNames.push_back (inputInfo.begin ()->first );
43
- input->setPrecision (Precision::U8);
79
+ input->setPrecision (InferenceEngine:: Precision::U8);
44
80
if (useAutoResize) {
45
- input->getPreProcess ().setResizeAlgorithm (ResizeAlgorithm::RESIZE_BILINEAR);
46
- input->getInputData ()->setLayout (Layout::NHWC);
81
+ input->getPreProcess ().setResizeAlgorithm (InferenceEngine:: ResizeAlgorithm::RESIZE_BILINEAR);
82
+ input->getInputData ()->setLayout (InferenceEngine:: Layout::NHWC);
47
83
}
48
84
else {
49
- input->getInputData ()->setLayout (Layout::NCHW);
85
+ input->getInputData ()->setLayout (InferenceEngine:: Layout::NCHW);
50
86
}
51
87
52
88
// --- Reading image input parameters
53
- const TensorDesc& inputDesc = inputInfo.begin ()->second ->getTensorDesc ();
89
+ const InferenceEngine:: TensorDesc& inputDesc = inputInfo.begin ()->second ->getTensorDesc ();
54
90
netInputHeight = getTensorHeight (inputDesc);
55
91
netInputWidth = getTensorWidth (inputDesc);
56
92
57
93
// --------------------------- Prepare output blobs -----------------------------------------------------
58
94
slog::info << " Checking that the outputs are as the demo expects" << slog::endl;
59
- OutputsDataMap outputInfo (cnnNetwork.getOutputsInfo ());
95
+ InferenceEngine:: OutputsDataMap outputInfo (cnnNetwork.getOutputsInfo ());
60
96
for (auto & output : outputInfo) {
61
- output.second ->setPrecision (Precision::FP32);
97
+ output.second ->setPrecision (InferenceEngine:: Precision::FP32);
62
98
if (output.second ->getDims ().size () == 4 ) {
63
- output.second ->setLayout (Layout::NCHW);
99
+ output.second ->setLayout (InferenceEngine:: Layout::NCHW);
64
100
}
65
101
outputsNames.push_back (output.first );
66
102
}
67
103
104
+ yoloVersion = YOLO_V3;
105
+ bool isRegionFound = false ;
68
106
if (auto ngraphFunction = (cnnNetwork).getFunction ()) {
69
107
for (const auto op : ngraphFunction->get_ops ()) {
70
108
auto outputLayer = outputInfo.find (op->get_friendly_name ());
71
109
if (outputLayer != outputInfo.end ()) {
72
110
auto regionYolo = std::dynamic_pointer_cast<ngraph::op::RegionYolo>(op);
73
111
74
- if (!regionYolo) {
75
- throw std::runtime_error (" Invalid output type: " +
76
- std::string (op->get_type_info ().name ) + " . RegionYolo expected" );
77
- }
112
+ if (regionYolo) {
113
+ isRegionFound = true ;
78
114
79
- if (!regionYolo->get_mask ().size ()) {
80
- isYoloV3 = false ;
81
- }
115
+ if (!regionYolo->get_mask ().size ()) {
116
+ yoloVersion = YOLO_V1V2 ;
117
+ }
82
118
83
- regions.emplace (outputLayer->first , Region (regionYolo));
119
+ regions.emplace (outputLayer->first , Region (regionYolo));
120
+ }
84
121
}
85
122
}
86
123
}
87
124
else {
88
125
throw std::runtime_error (" Can't get ngraph::Function. Make sure the provided model is in IR version 10 or greater." );
89
126
}
127
+
128
+ if (!isRegionFound)
129
+ {
130
+ yoloVersion = outputsNames.size () == 2 ? YOLO_V4_TINY : YOLO_V4;
131
+
132
+ int num = 3 ;
133
+ int i = 0 ;
134
+
135
+ auto chosenMasks = presetMasks.size () ? presetMasks : defaultMasks[yoloVersion];
136
+ if (chosenMasks.size () != num * outputInfo.size ()) {
137
+ throw std::runtime_error (std::string (" Invalid size of masks array, got " ) + std::to_string (presetMasks.size ()) +
138
+ " , should be " + std::to_string (num * outputInfo.size ()));
139
+ }
140
+
141
+ std::sort (outputsNames.begin (), outputsNames.end (),
142
+ [&outputInfo](const std::string& x, const std::string& y) {return outputInfo[x]->getDims ()[2 ] > outputInfo[y]->getDims ()[2 ];});
143
+
144
+ for (const auto & name : outputsNames) {
145
+ auto & output = outputInfo[name];
146
+ auto shape = output->getDims ();
147
+ auto classes = shape[1 ] / num - 5 ;
148
+ if (shape[1 ] % num != 0 ) {
149
+ throw std::runtime_error (std::string (" The output blob " ) + name + " has wrong 2nd dimension" );
150
+ }
151
+ regions.emplace (name, Region (classes, 4 ,
152
+ presetAnchors.size () ? presetAnchors : defaultAnchors[yoloVersion],
153
+ std::vector<int64_t >(chosenMasks.begin () + i*num, chosenMasks.begin () + (i+1 )*num),
154
+ shape[3 ], shape[2 ]));
155
+ i++;
156
+ }
157
+ }
158
+ else {
159
+ // Currently externally set anchors and masks are supported only for YoloV4
160
+ if (presetAnchors.size () || presetMasks.size ()){
161
+ slog::warn << " Preset anchors and mask can be set for YoloV4 model only. "
162
+ " This model is not YoloV4, so these options will be ignored." << slog::endl;
163
+ }
164
+ }
90
165
}
91
166
92
167
std::unique_ptr<ResultBase> ModelYolo::postprocess (InferenceResult & infResult) {
@@ -151,24 +226,27 @@ void ModelYolo::parseYOLOOutput(const std::string& output_name,
151
226
int sideH = 0 ;
152
227
unsigned long scaleH;
153
228
unsigned long scaleW;
154
- if (isYoloV3) {
155
- auto & dims = blob->getTensorDesc ().getDims ();
156
- const int out_blob_h = static_cast <int >(dims[2 ]);
157
- const int out_blob_w = static_cast <int >(dims[3 ]);
158
- sideH = out_blob_h;
159
- sideW = out_blob_w;
160
- scaleW = resized_im_w;
161
- scaleH = resized_im_h;
162
- }
163
- else {
229
+ switch (yoloVersion) {
230
+ case YOLO_V1V2:
164
231
sideH = region.outputHeight ;
165
232
sideW = region.outputWidth ;
166
233
scaleW = region.outputWidth ;
167
234
scaleH = region.outputHeight ;
235
+ break ;
236
+ case YOLO_V3:
237
+ case YOLO_V4:
238
+ case YOLO_V4_TINY:
239
+ sideH = static_cast <int >(blob->getTensorDesc ().getDims ()[2 ]);
240
+ sideW = static_cast <int >(blob->getTensorDesc ().getDims ()[3 ]);
241
+ scaleW = resized_im_w;
242
+ scaleH = resized_im_h;
243
+ break ;
168
244
}
169
245
170
246
auto entriesNum = sideW * sideH;
171
- const float * output_blob = blob->buffer ().as <PrecisionTrait<Precision::FP32>::value_type*>();
247
+ const float * output_blob = blob->buffer ().as <InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>();
248
+
249
+ auto postprocessRawData = (yoloVersion == YOLO_V4 || yoloVersion == YOLO_V4_TINY) ? sigmoid : linear;
172
250
173
251
// --------------------------- Parsing YOLO Region output -------------------------------------
174
252
for (int i = 0 ; i < entriesNum; ++i) {
@@ -178,13 +256,13 @@ void ModelYolo::parseYOLOOutput(const std::string& output_name,
178
256
// --- Getting region data from blob
179
257
int obj_index = calculateEntryIndex (entriesNum, region.coords , region.classes , n * entriesNum + i, region.coords );
180
258
int box_index = calculateEntryIndex (entriesNum, region.coords , region.classes , n * entriesNum + i, 0 );
181
- float scale = output_blob[obj_index];
259
+ float scale = postprocessRawData ( output_blob[obj_index]) ;
182
260
183
261
// --- Preliminary check for confidence threshold conformance
184
262
if (scale >= confidenceThreshold){
185
263
// --- Calculating scaled region's coordinates
186
- double x = (col + output_blob[box_index + 0 * entriesNum]) / sideW * original_im_w;
187
- double y = (row + output_blob[box_index + 1 * entriesNum]) / sideH * original_im_h;
264
+ double x = (col + postprocessRawData ( output_blob[box_index + 0 * entriesNum]) ) / sideW * original_im_w;
265
+ double y = (row + postprocessRawData ( output_blob[box_index + 1 * entriesNum]) ) / sideH * original_im_h;
188
266
double height = std::exp (output_blob[box_index + 3 * entriesNum]) * region.anchors [2 * n + 1 ] * original_im_h / scaleH;
189
267
double width = std::exp (output_blob[box_index + 2 * entriesNum]) * region.anchors [2 * n] * original_im_w / scaleW;
190
268
@@ -196,7 +274,7 @@ void ModelYolo::parseYOLOOutput(const std::string& output_name,
196
274
197
275
for (int j = 0 ; j < region.classes ; ++j) {
198
276
int class_index = calculateEntryIndex (entriesNum, region.coords , region.classes , n * entriesNum + i, region.coords + 1 + j);
199
- float prob = scale * output_blob[class_index];
277
+ float prob = scale * postprocessRawData ( output_blob[class_index]) ;
200
278
201
279
// --- Checking confidence threshold conformance and adding region to the list
202
280
if (prob >= confidenceThreshold) {
@@ -250,9 +328,31 @@ ModelYolo::Region::Region(const std::shared_ptr<ngraph::op::RegionYolo>& regionY
250
328
num = regionYolo->get_num_regions ();
251
329
anchors = regionYolo->get_anchors ();
252
330
if (anchors.empty ()) {
253
- anchors.insert (anchors.end (),
254
- { 0 .57273f , 0 .677385f , 1 .87446f , 2 .06253f , 3 .33843f , 5 .47434f , 7 .88282f , 3 .52778f , 9 .77052f , 9 .16828f });
331
+ anchors = defaultAnchors[YOLO_V1V2];
255
332
num = 5 ;
256
333
}
257
334
}
258
335
}
336
+
337
+ ModelYolo::Region::Region (int classes, int coords, const std::vector<float >& anchors, const std::vector<int64_t >& masks, int outputWidth, int outputHeight) :
338
+ classes(classes), coords(coords),
339
+ outputWidth(outputWidth), outputHeight(outputHeight) {
340
+ num = masks.size ();
341
+
342
+ if (anchors.size () == 0 || anchors.size () % 2 != 0 ) {
343
+ throw std::runtime_error (" Explicitly initialized region should have non-empty even-sized regions vector" );
344
+ }
345
+
346
+ if (num) {
347
+ this ->anchors .resize (num * 2 );
348
+
349
+ for (int i = 0 ; i < num; ++i) {
350
+ this ->anchors [i * 2 ] = anchors[masks[i] * 2 ];
351
+ this ->anchors [i * 2 + 1 ] = anchors[masks[i] * 2 + 1 ];
352
+ }
353
+ }
354
+ else {
355
+ this ->anchors = anchors;
356
+ num = anchors.size () / 2 ;
357
+ }
358
+ }
0 commit comments