|
| 1 | +/* |
| 2 | +// Copyright (C) 2022 Intel Corporation |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +// you may not use this file except in compliance with the License. |
| 6 | +// You may obtain a copy of the License at |
| 7 | +// |
| 8 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +// |
| 10 | +// Unless required by applicable law or agreed to in writing, software |
| 11 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +// See the License for the specific language governing permissions and |
| 14 | +// limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +#include "models/detection_model_yolov3_onnx.h" |
| 18 | + |
| 19 | +#include <algorithm> |
| 20 | +#include <cmath> |
| 21 | +#include <cstdint> |
| 22 | +#include <stdexcept> |
| 23 | +#include <string> |
| 24 | +#include <utility> |
| 25 | +#include <vector> |
| 26 | + |
| 27 | +#include <openvino/openvino.hpp> |
| 28 | + |
| 29 | +#include <utils/common.hpp> |
| 30 | +#include <utils/slog.hpp> |
| 31 | + |
| 32 | +#include "models/input_data.h" |
| 33 | +#include "models/internal_model_data.h" |
| 34 | +#include "models/results.h" |
| 35 | +#include "utils/image_utils.h" |
| 36 | + |
| 37 | + |
| 38 | +ModelYoloV3ONNX::ModelYoloV3ONNX(const std::string& modelFileName, |
| 39 | + float confidenceThreshold, |
| 40 | + const std::vector<std::string>& labels, |
| 41 | + const std::string& layout) |
| 42 | + : DetectionModel(modelFileName, confidenceThreshold, false, labels, layout) { |
| 43 | + interpolationMode = CUBIC; |
| 44 | + resizeMode = RESIZE_KEEP_ASPECT_LETTERBOX; |
| 45 | + } |
| 46 | + |
| 47 | + |
| 48 | +void ModelYoloV3ONNX::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) { |
| 49 | + // --------------------------- Configure input & output ------------------------------------------------- |
| 50 | + // --------------------------- Prepare input ------------------------------------------------------ |
| 51 | + const ov::OutputVector& inputs = model->inputs(); |
| 52 | + if (inputs.size() != 2) { |
| 53 | + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 2 inputs"); |
| 54 | + } |
| 55 | + |
| 56 | + // Check first image input |
| 57 | + std::string imageInputName = inputs.begin()->get_any_name(); |
| 58 | + inputsNames.push_back(imageInputName); |
| 59 | + |
| 60 | + const ov::Shape& imageShape = inputs.begin()->get_shape(); |
| 61 | + const ov::Layout& imageLayout = getInputLayout(inputs.front()); |
| 62 | + |
| 63 | + if (imageShape.size() != 4 && imageShape[ov::layout::channels_idx(imageLayout)] != 3) { |
| 64 | + throw std::logic_error("Expected 4D image input with 3 channels"); |
| 65 | + } |
| 66 | + |
| 67 | + ov::preprocess::PrePostProcessor ppp(model); |
| 68 | + ppp.input(imageInputName).tensor().set_element_type(ov::element::u8).set_layout({"NHWC"}); |
| 69 | + |
| 70 | + ppp.input(imageInputName).model().set_layout(imageLayout); |
| 71 | + |
| 72 | + // Check second info input |
| 73 | + std::string infoInputName = (++inputs.begin())->get_any_name(); |
| 74 | + inputsNames.push_back(infoInputName); |
| 75 | + |
| 76 | + const ov::Shape infoShape = (++inputs.begin())->get_shape(); |
| 77 | + const ov::Layout& infoLayout = getInputLayout(inputs.at(1)); |
| 78 | + |
| 79 | + if (infoShape.size() != 2 && infoShape[ov::layout::channels_idx(infoLayout)] != 2) { |
| 80 | + throw std::logic_error("Expected 2D info input with 2 channels"); |
| 81 | + } |
| 82 | + |
| 83 | + ppp.input(infoInputName).tensor().set_element_type(ov::element::i32); |
| 84 | + |
| 85 | + ppp.input(infoInputName).model().set_layout(infoLayout); |
| 86 | + |
| 87 | + // --------------------------- Reading image input parameters ------------------------------------------- |
| 88 | + netInputWidth = imageShape[ov::layout::width_idx(imageLayout)]; |
| 89 | + netInputHeight = imageShape[ov::layout::height_idx(imageLayout)]; |
| 90 | + |
| 91 | + // --------------------------- Prepare output ----------------------------------------------------- |
| 92 | + if (model->outputs().size() != 3) { |
| 93 | + throw std::logic_error("YoloV3ONNX model wrapper expects models that have 3 outputs"); |
| 94 | + } |
| 95 | + |
| 96 | + const ov::OutputVector& outputs = model->outputs(); |
| 97 | + for (auto& output : outputs) { |
| 98 | + const ov::Shape& currentShape = output.get_partial_shape().get_max_shape(); |
| 99 | + std::string currentName = output.get_any_name(); |
| 100 | + if (currentShape[currentShape.size() - 1] == 3) { |
| 101 | + indicesOuputName = currentName; |
| 102 | + ppp.output(currentName).tensor().set_element_type(ov::element::i32); |
| 103 | + } else if (currentShape[2] == 4) { |
| 104 | + boxesOutputName = currentName; |
| 105 | + ppp.output(currentName).tensor().set_element_type(ov::element::f32); |
| 106 | + } else if (currentShape[1] == numberOfClasses) { |
| 107 | + scoresOutputName = currentName; |
| 108 | + ppp.output(currentName).tensor().set_element_type(ov::element::f32); |
| 109 | + } else |
| 110 | + throw std::logic_error("Expected shapes [:,:,4], [:,numClasses,:] and [:,3] for outputs"); |
| 111 | + outputsNames.push_back(currentName); |
| 112 | + } |
| 113 | + model = ppp.build(); |
| 114 | +} |
| 115 | + |
| 116 | +std::shared_ptr<InternalModelData> ModelYoloV3ONNX::preprocess(const InputData& inputData, |
| 117 | + ov::InferRequest& request) { |
| 118 | + const auto& origImg = inputData.asRef<ImageInputData>().inputImage; |
| 119 | + int* img_size = new int[2]; |
| 120 | + img_size[0] = origImg.rows; |
| 121 | + img_size[1] = origImg.cols; |
| 122 | + ov::Tensor infoInput = ov::Tensor(ov::element::i32, ov::Shape({1, 2}), img_size); |
| 123 | + |
| 124 | + request.set_tensor(inputsNames[1], infoInput); |
| 125 | + |
| 126 | + return ImageModel::preprocess(inputData, request); |
| 127 | +} |
| 128 | + |
| 129 | +float ModelYoloV3ONNX::getScore(const ov::Tensor& scoresTensor, size_t classInd, size_t boxInd) { |
| 130 | + float* scoresPtr = scoresTensor.data<float>(); |
| 131 | + const auto shape = scoresTensor.get_shape(); |
| 132 | + int N = shape[2]; |
| 133 | + |
| 134 | + return scoresPtr[classInd * N + boxInd]; |
| 135 | +} |
| 136 | + |
| 137 | +std::unique_ptr<ResultBase> ModelYoloV3ONNX::postprocess(InferenceResult& infResult) { |
| 138 | + // Get info about input image |
| 139 | + const auto imgWidth = infResult.internalModelData->asRef<InternalImageModelData>().inputImgWidth; |
| 140 | + const auto imgHeight = infResult.internalModelData->asRef<InternalImageModelData>().inputImgHeight; |
| 141 | + |
| 142 | + // Get outputs tensors |
| 143 | + const ov::Tensor& boxes = infResult.outputsData[boxesOutputName]; |
| 144 | + const float* boxesPtr = boxes.data<float>(); |
| 145 | + |
| 146 | + const ov::Tensor& scores = infResult.outputsData[scoresOutputName]; |
| 147 | + const ov::Tensor& indices = infResult.outputsData[indicesOuputName]; |
| 148 | + |
| 149 | + const int* indicesData = indices.data<int>(); |
| 150 | + const auto indicesShape = indices.get_shape(); |
| 151 | + const auto boxShape = boxes.get_shape(); |
| 152 | + |
| 153 | + // Generate detection results |
| 154 | + DetectionResult* result = new DetectionResult(infResult.frameId, infResult.metaData); |
| 155 | + size_t numberOfBoxes = indicesShape.size() == 3 ? indicesShape[1] : indicesShape[0]; |
| 156 | + int indicesStride = indicesShape.size() == 3 ? indicesShape[2] : indicesShape[1]; |
| 157 | + |
| 158 | + for (size_t i = 0; i < numberOfBoxes; ++i) { |
| 159 | + int batchInd = indicesData[i * indicesStride]; |
| 160 | + int classInd = indicesData[i * indicesStride + 1]; |
| 161 | + int boxInd = indicesData[i * indicesStride + 2]; |
| 162 | + |
| 163 | + if (batchInd == -1) |
| 164 | + break; |
| 165 | + |
| 166 | + float score = getScore(scores, classInd, boxInd); |
| 167 | + |
| 168 | + if (score > confidenceThreshold) { |
| 169 | + DetectedObject obj; |
| 170 | + size_t startPos = boxShape[2] * boxInd; |
| 171 | + |
| 172 | + auto x = boxesPtr[startPos + 1]; |
| 173 | + auto y = boxesPtr[startPos]; |
| 174 | + auto width = boxesPtr[startPos + 3] - x; |
| 175 | + auto height = boxesPtr[startPos + 2] - y; |
| 176 | + |
| 177 | + // Create new detected box |
| 178 | + obj.x = clamp(x, 0.f, static_cast<float>(imgWidth)); |
| 179 | + obj.y = clamp(y, 0.f, static_cast<float>(imgHeight)); |
| 180 | + obj.height = clamp(height, 0.f, static_cast<float>(imgHeight)); |
| 181 | + obj.width = clamp(width, 0.f, static_cast<float>(imgWidth)); |
| 182 | + obj.confidence = score; |
| 183 | + obj.labelID = classInd; |
| 184 | + obj.label = getLabelName(classInd); |
| 185 | + |
| 186 | + |
| 187 | + result->objects.push_back(obj); |
| 188 | + |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + return std::unique_ptr<ResultBase>(result); |
| 193 | +} |
0 commit comments