|
| 1 | +#include <vector> |
| 2 | +#include <string> |
| 3 | +#include <utility> |
| 4 | + |
| 5 | +#include <opencv2/opencv.hpp> |
| 6 | +#include "labelsimagenet1k.h" |
| 7 | + |
| 8 | +using namespace std; |
| 9 | +using namespace cv; |
| 10 | +using namespace dnn; |
| 11 | + |
| 12 | +vector< pair<dnn::Backend, dnn::Target> > backendTargetPairs = { |
| 13 | + std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_OPENCV, dnn::DNN_TARGET_CPU), |
| 14 | + std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA), |
| 15 | + std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CUDA, dnn::DNN_TARGET_CUDA_FP16), |
| 16 | + std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_TIMVX, dnn::DNN_TARGET_NPU), |
| 17 | + std::make_pair<dnn::Backend, dnn::Target>(dnn::DNN_BACKEND_CANN, dnn::DNN_TARGET_NPU) }; |
| 18 | + |
| 19 | + |
| 20 | +std::string keys = |
| 21 | +"{ help h | | Print help message. }" |
| 22 | +"{ model m | image_classification_mobilenetv1_2022apr.onnx | Usage: Set model type, defaults to image_classification_mobilenetv1_2022apr.onnx (v1) }" |
| 23 | +"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
| 24 | +"{ initial_width | 0 | Preprocess input image by initial resizing to a specific width.}" |
| 25 | +"{ initial_height | 0 | Preprocess input image by initial resizing to a specific height.}" |
| 26 | +"{ rgb | true | swap R and B plane.}" |
| 27 | +"{ crop | false | Preprocess input image by center cropping.}" |
| 28 | +"{ vis v | true | Usage: Specify to open a new window to show results.}" |
| 29 | +"{ backend bt | 0 | Choose one of computation backends: " |
| 30 | +"0: (default) OpenCV implementation + CPU, " |
| 31 | +"1: CUDA + GPU (CUDA), " |
| 32 | +"2: CUDA + GPU (CUDA FP16), " |
| 33 | +"3: TIM-VX + NPU, " |
| 34 | +"4: CANN + NPU}"; |
| 35 | + |
| 36 | + |
| 37 | +int main(int argc, char** argv) |
| 38 | +{ |
| 39 | + CommandLineParser parser(argc, argv, keys); |
| 40 | + |
| 41 | + parser.about("Use this script to run classification deep learning networks in opencv Zoo using OpenCV."); |
| 42 | + if (parser.has("help")) |
| 43 | + { |
| 44 | + parser.printMessage(); |
| 45 | + return 0; |
| 46 | + } |
| 47 | + |
| 48 | + int rszWidth = parser.get<int>("initial_width"); |
| 49 | + int rszHeight = parser.get<int>("initial_height"); |
| 50 | + bool swapRB = parser.get<bool>("rgb"); |
| 51 | + bool crop = parser.get<bool>("crop"); |
| 52 | + bool vis = parser.get<bool>("vis"); |
| 53 | + String model = parser.get<String>("model"); |
| 54 | + int backendTargetid = parser.get<int>("backend"); |
| 55 | + |
| 56 | + if (model.empty()) |
| 57 | + { |
| 58 | + CV_Error(Error::StsError, "Model file " + model + " not found"); |
| 59 | + } |
| 60 | + vector<string> labels = getLabelsImagenet1k(); |
| 61 | + |
| 62 | + Net net = readNet(samples::findFile(model)); |
| 63 | + net.setPreferableBackend(backendTargetPairs[backendTargetid].first); |
| 64 | + net.setPreferableTarget(backendTargetPairs[backendTargetid].second); |
| 65 | + //! [Open a video file or an image file or a camera stream] |
| 66 | + VideoCapture cap; |
| 67 | + if (parser.has("input")) |
| 68 | + cap.open(samples::findFile(parser.get<String>("input"))); |
| 69 | + else |
| 70 | + cap.open(0); |
| 71 | + if (!cap.isOpened()) |
| 72 | + CV_Error(Error::StsError, "Cannot opend video or file"); |
| 73 | + Mat frame, blob; |
| 74 | + static const std::string kWinName = model; |
| 75 | + int nbInference = 0; |
| 76 | + while (waitKey(1) < 0) |
| 77 | + { |
| 78 | + cap >> frame; |
| 79 | + if (frame.empty()) |
| 80 | + { |
| 81 | + cout << "Frame is empty" << endl; |
| 82 | + waitKey(); |
| 83 | + break; |
| 84 | + } |
| 85 | + |
| 86 | + if (rszWidth != 0 && rszHeight != 0) |
| 87 | + { |
| 88 | + resize(frame, frame, Size(rszWidth, rszHeight)); |
| 89 | + } |
| 90 | + Image2BlobParams paramMobilenet; |
| 91 | + paramMobilenet.datalayout = DNN_LAYOUT_NCHW; |
| 92 | + paramMobilenet.ddepth = CV_32F; |
| 93 | + paramMobilenet.mean = Scalar(123.675, 116.28, 103.53); |
| 94 | + paramMobilenet.scalefactor = Scalar(1 / (255. * 0.229), 1 / (255. * 0.224), 1 / (255. * 0.225)); |
| 95 | + paramMobilenet.size = Size(224, 224); |
| 96 | + paramMobilenet.swapRB = swapRB; |
| 97 | + if (crop) |
| 98 | + paramMobilenet.paddingmode = DNN_PMODE_CROP_CENTER; |
| 99 | + else |
| 100 | + paramMobilenet.paddingmode = DNN_PMODE_NULL; |
| 101 | + //! [Create a 4D blob from a frame] |
| 102 | + blobFromImageWithParams(frame, blob, paramMobilenet); |
| 103 | + |
| 104 | + //! [Set input blob] |
| 105 | + net.setInput(blob); |
| 106 | + Mat prob = net.forward(); |
| 107 | + |
| 108 | + //! [Get a class with a highest score] |
| 109 | + Point classIdPoint; |
| 110 | + double confidence; |
| 111 | + minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint); |
| 112 | + int classId = classIdPoint.x; |
| 113 | + std::string label = format("%s: %.4f", (labels.empty() ? format("Class #%d", classId).c_str() : |
| 114 | + labels[classId].c_str()), |
| 115 | + confidence); |
| 116 | + if (vis) |
| 117 | + { |
| 118 | + putText(frame, label, Point(0, 55), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
| 119 | + imshow(kWinName, frame); |
| 120 | + } |
| 121 | + else |
| 122 | + { |
| 123 | + cout << label << endl; |
| 124 | + nbInference++; |
| 125 | + if (nbInference > 100) |
| 126 | + { |
| 127 | + cout << nbInference << " inference made. Demo existing" << endl; |
| 128 | + break; |
| 129 | + } |
| 130 | + } |
| 131 | + } |
| 132 | + return 0; |
| 133 | +} |
0 commit comments