Skip to content

Commit a3d8f3a

Browse files
committed
add yolov3-onnx and yolox wraooers
1 parent 451b9b9 commit a3d8f3a

File tree

5 files changed

+533
-1
lines changed

5 files changed

+533
-1
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
// Copyright (C) 2022 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <string>
20+
#include <vector>
21+
22+
#include <openvino/openvino.hpp>
23+
24+
#include "models/detection_model.h"
25+
#include "utils/image_utils.h"
26+
27+
28+
29+
class ModelYoloV3ONNX: public DetectionModel {
30+
public:
31+
/// Constructor.
32+
/// @param modelFileName name of model to load
33+
/// @param confidenceThreshold - threshold to eliminate low-confidence detections.
34+
/// Any detected object with confidence lower than this threshold will be ignored.
35+
/// @param labels - array of labels for every class. If this array is empty or contains less elements
36+
/// than actual classes number, default "Label #N" will be shown for missing items.
37+
/// @param layout - model input layout
38+
ModelYoloV3ONNX(const std::string& modelFileName,
39+
float confidenceThreshold,
40+
const std::vector<std::string>& labels = std::vector<std::string>(),
41+
const std::string& layout = "");
42+
43+
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
44+
std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
45+
46+
protected:
47+
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
48+
float getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd);
49+
50+
std::string boxesOutputName;
51+
std::string scoresOutputName;
52+
std::string indicesOuputName;
53+
static const int numberOfClasses = 80;
54+
};
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/*
2+
// Copyright (C) 2022 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#pragma once
18+
#include <stddef.h>
19+
#include <stdint.h>
20+
21+
#include <map>
22+
#include <memory>
23+
#include <string>
24+
25+
#include <openvino/op/region_yolo.hpp>
26+
#include <openvino/openvino.hpp>
27+
28+
#include "models/detection_model.h"
29+
#include "utils/image_utils.h"
30+
#include "utils/nms.hpp"
31+
32+
33+
34+
class ModelYoloX: public DetectionModel {
35+
public:
36+
/// Constructor.
37+
/// @param modelFileName name of model to load
38+
/// @param confidenceThreshold - threshold to eliminate low-confidence detections.
39+
/// Any detected object with confidence lower than this threshold will be ignored.
40+
/// @param boxIOUThreshold - threshold to treat separate output regions as one object for filtering
41+
/// during postprocessing (only one of them should stay). The default value is 0.5
42+
/// @param labels - array of labels for every class. If this array is empty or contains less elements
43+
/// than actual classes number, default "Label #N" will be shown for missing items.
44+
/// @param layout - model input layout
45+
ModelYoloX(const std::string& modelFileName,
46+
float confidenceThreshold,
47+
float boxIOUThreshold = 0.5,
48+
const std::vector<std::string>& labels = std::vector<std::string>(),
49+
const std::string& layout = "");
50+
51+
std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) override;
52+
std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;
53+
54+
protected:
55+
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
56+
void prepareGridsAndStrides();
57+
Anchor xywh2xyxy(float x, float y, float width, float height);
58+
59+
double boxIOUThreshold;
60+
std::vector<std::pair<int, int>> grids;
61+
std::vector<int> expandedStrides;
62+
static const int numberOfClasses = 80;
63+
};
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*
2+
// Copyright (C) 2022 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#include "models/detection_model_yolov3_onnx.h"
18+
19+
#include <algorithm>
20+
#include <cmath>
21+
#include <cstdint>
22+
#include <stdexcept>
23+
#include <string>
24+
#include <utility>
25+
#include <vector>
26+
27+
#include <openvino/openvino.hpp>
28+
29+
#include <utils/common.hpp>
30+
#include <utils/slog.hpp>
31+
32+
#include "models/input_data.h"
33+
#include "models/internal_model_data.h"
34+
#include "models/results.h"
35+
#include "utils/image_utils.h"
36+
37+
38+
ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName,
39+
float confidenceThreshold,
40+
const std::vector<std::string>& labels,
41+
const std::string& layout)
42+
: DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) {
43+
interpolationMode = CUBIC;
44+
resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX;
45+
}
46+
47+
48+
void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
49+
// --------------------------- Configure input & output -------------------------------------------------
50+
// --------------------------- Prepare input ------------------------------------------------------
51+
const ov::OutputVector& inputs = model->inputs();
52+
if (inputs.size() != 2) {
53+
throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs");
54+
}
55+
56+
// Check first image input
57+
std::string imageInputName = inputs.begin()->get_any_name();
58+
inputsNames.push_back(imageInputName);
59+
60+
const ov::Shape& imageShape = inputs.begin()->get_shape();
61+
const ov::Layout& imageLayout = getInputLayout(inputs.front());
62+
63+
if (imageShape.size() != 4 && imageShape[ov::layout::channels_idx(imageLayout)] != 3) {
64+
throw std::logic_error("Expected 4D image input with 3 channels");
65+
}
66+
67+
ov::preprocess::PrePostProcessor ppp(model);
68+
ppp.input(imageInputName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"});
69+
70+
ppp.input(imageInputName).model().set_layout(imageLayout);
71+
72+
// Check second info input
73+
std::string infoInputName = (++inputs.begin())->get_any_name();
74+
inputsNames.push_back(infoInputName);
75+
76+
const ov::Shape infoShape = (++inputs.begin())->get_shape();
77+
const ov::Layout& infoLayout = getInputLayout(inputs.at(1));
78+
79+
if (infoShape.size() != 2 && infoShape[ov::layout::channels_idx(infoLayout)] != 2) {
80+
throw std::logic_error("Expected 2D info input with 2 channels");
81+
}
82+
83+
ppp.input(infoInputName).tensor().set_element_type(ov::element::i32);
84+
85+
ppp.input(infoInputName).model().set_layout(infoLayout);
86+
87+
// --------------------------- Reading image input parameters -------------------------------------------
88+
netInputWidth = imageShape[ov::layout::width_idx(imageLayout)];
89+
netInputHeight = imageShape[ov::layout::height_idx(imageLayout)];
90+
91+
// --------------------------- Prepare output -----------------------------------------------------
92+
if (model->outputs().size() != 3) {
93+
throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs");
94+
}
95+
96+
const ov::OutputVector& outputs = model->outputs();
97+
for (auto& output : outputs) {
98+
const ov::Shape& currentShape = output.get_partial_shape().get_max_shape();
99+
std::string currentName = output.get_any_name();
100+
if (currentShape[currentShape.size() - 1] == 3) {
101+
indicesOuputName = currentName;
102+
ppp.output(currentName).tensor().set_element_type(ov::element::i32);
103+
} else if (currentShape[2] == 4) {
104+
boxesOutputName = currentName;
105+
ppp.output(currentName).tensor().set_element_type(ov::element::f32);
106+
} else if (currentShape[1] == numberOfClasses) {
107+
scoresOutputName = currentName;
108+
ppp.output(currentName).tensor().set_element_type(ov::element::f32);
109+
} else
110+
throw std::logic_error("Expected shapes [:,:,4], [:,numClasses,:] and [:,3] for outputs");
111+
outputsNames.push_back(currentName);
112+
}
113+
model = ppp.build();
114+
}
115+
116+
std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData,
117+
ov::InferRequest& request) {
118+
const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
119+
int* img_size = new int[2];
120+
img_size[0] = origImg.rows;
121+
img_size[1] = origImg.cols;
122+
ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), img_size);
123+
124+
request.set_tensor(inputsNames[1], infoInput);
125+
126+
return ImageModel::preprocess(inputData, request);
127+
}
128+
129+
float ModelYoloV3ONNX::getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) {
130+
float* scoresPtr = scoresTensor.data<float>();
131+
const auto shape = scoresTensor.get_shape();
132+
int N = shape[2];
133+
134+
return scoresPtr[classInd * N + boxInd];
135+
}
136+
137+
std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) {
138+
// Get info about input image
139+
const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth;
140+
const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight;
141+
142+
// Get outputs tensors
143+
const ov::Tensor& boxes = infResult.outputsData[boxesOutputName];
144+
const float* boxesPtr = boxes.data<float>();
145+
146+
const ov::Tensor& scores = infResult.outputsData[scoresOutputName];
147+
const ov::Tensor& indices = infResult.outputsData[indicesOuputName];
148+
149+
const int* indicesData = indices.data<int>();
150+
const auto indicesShape = indices.get_shape();
151+
const auto boxShape = boxes.get_shape();
152+
153+
// Generate detection results
154+
DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData);
155+
size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0];
156+
int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1];
157+
158+
for (size_t i = 0; i < numberOfBoxes; ++i) {
159+
int batchInd = indicesData[i * indicesStride];
160+
int classInd = indicesData[i * indicesStride + 1];
161+
int boxInd = indicesData[i * indicesStride + 2];
162+
163+
if (batchInd == -1)
164+
break;
165+
166+
float score = getScore(scores, classInd, boxInd);
167+
168+
if (score > confidenceThreshold) {
169+
DetectedObject obj;
170+
size_t startPos = boxShape[2] * boxInd;
171+
172+
auto x = boxesPtr[startPos + 1];
173+
auto y = boxesPtr[startPos];
174+
auto width = boxesPtr[startPos + 3] - x;
175+
auto height = boxesPtr[startPos + 2] - y;
176+
177+
// Create new detected box
178+
obj.x = clamp(x, 0.f, static_cast<float>(imgWidth));
179+
obj.y = clamp(y, 0.f, static_cast<float>(imgHeight));
180+
obj.height = clamp(height, 0.f, static_cast<float>(imgHeight));
181+
obj.width = clamp(width, 0.f, static_cast<float>(imgWidth));
182+
obj.confidence = score;
183+
obj.labelID = classInd;
184+
obj.label = getLabelName(classInd);
185+
186+
187+
result->objects.push_back(obj);
188+
189+
}
190+
}
191+
192+
return std::unique_ptr<ResultBase>(result);
193+
}

0 commit comments

Comments
 (0)